diff --git a/.clang-tidy b/.clang-tidy index a30f9e592..c02b7152f 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -64,8 +64,8 @@ Checks: '*, -readability-identifier-length, -misc-no-recursion, -concurrency-mt-unsafe, - -bugprone-easily-swappable-parameters' - + -bugprone-easily-swappable-parameters, + -bugprone-unchecked-optional-access' WarningsAsErrors: '' HeaderFilterRegex: 'src/.*' AnalyzeTemporaryDtors: false diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index a2dc0aef2..49b7d4273 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -19,11 +19,16 @@ on: jobs: community_build: name: "Community build" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 60 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: RelWithDebInfo steps: - name: Set up repository @@ -33,35 +38,56 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 - - name: Build community binaries + - name: Spin up mgbuild container run: | - # Activate toolchain. - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run - # Initialize dependencies. - ./init - - # Build community binaries. - cd build - cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DMG_ENTERPRISE=OFF .. - make -j$THREADS + - name: Build release binaries + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph --community - name: Run unit tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph unit - # Run unit tests. - cd build - ctest -R memgraph__unit --output-on-failure -j$THREADS + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove code_analysis: name: "Code analysis" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 60 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Debug steps: - name: Set up repository @@ -71,6 +97,14 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run + # This is also needed if we want do to comparison against other branches # See https://github.community/t/checkout-code-fails-when-it-runs-lerna-run-test-since-master/17920 - name: Fetch all history for all tags and branches @@ -78,11 +112,13 @@ jobs: - name: Initialize deps run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Initialize dependencies. - ./init + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph --init-only - name: Set base branch if: ${{ github.event_name == 'pull_request' }} @@ -96,45 +132,43 @@ jobs: - name: Python code analysis run: | - CHANGED_FILES=$(git diff -U0 ${{ env.BASE_BRANCH }}... --name-only --diff-filter=d) - for file in ${CHANGED_FILES}; do - echo ${file} - if [[ ${file} == *.py ]]; then - python3 -m black --check --diff ${file} - python3 -m isort --profile black --check-only --diff ${file} - fi - done + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph code-analysis --base-branch "${{ env.BASE_BRANCH }}" - name: Build combined ASAN, UBSAN and coverage binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - cd build - cmake -DTEST_COVERAGE=ON -DASAN=ON -DUBSAN=ON .. - make -j$THREADS memgraph__unit + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph --coverage --asan --ubsan - name: Run unit tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run unit tests. It is restricted to 2 threads intentionally, because higher concurrency makes the timing related tests unstable. - cd build - LSAN_OPTIONS=suppressions=$PWD/../tools/lsan.supp UBSAN_OPTIONS=halt_on_error=1 ctest -R memgraph__unit --output-on-failure -j2 + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph unit-coverage - name: Compute code coverage run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Compute code coverage. - cd tools/github - ./coverage_convert - - # Package code coverage. - cd generated - tar -czf code_coverage.tar.gz coverage.json html report.json summary.rmu + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph code-coverage - name: Save code coverage uses: actions/upload-artifact@v4 @@ -144,21 +178,36 @@ jobs: - name: Run clang-tidy run: | - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph clang-tidy --base-branch "${{ env.BASE_BRANCH }}" - # Restrict clang-tidy results only to the modified parts - git diff -U0 ${{ env.BASE_BRANCH }}... -- src | ./tools/github/clang-tidy/clang-tidy-diff.py -p 1 -j $THREADS -path build -regex ".+\.cpp" | tee ./build/clang_tidy_output.txt - - # Fail if any warning is reported - ! cat ./build/clang_tidy_output.txt | ./tools/github/clang-tidy/grep_error_lines.sh > /dev/null + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove debug_build: name: "Debug build" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 100 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Debug steps: - name: Set up repository @@ -168,58 +217,95 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 - - name: Build debug binaries + - name: Spin up mgbuild container run: | - # Activate toolchain. - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run - # Initialize dependencies. - ./init - - # Build debug binaries. - cd build - cmake .. - make -j$THREADS + - name: Build release binaries + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph - name: Run leftover CTest tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run leftover CTest tests (all except unit and benchmark tests). - cd build - ctest -E "(memgraph__unit|memgraph__benchmark)" --output-on-failure + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph leftover-CTest - name: Run drivers tests run: | - ./tests/drivers/run.sh + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph drivers - name: Run integration tests run: | - tests/integration/run.sh + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph integration - name: Run cppcheck and clang-format run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run cppcheck and clang-format. - cd tools/github - ./cppcheck_and_clang_format diff - + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph cppcheck-and-clang-format + - name: Save cppcheck and clang-format errors uses: actions/upload-artifact@v4 with: name: "Code coverage(Debug build)" path: tools/github/cppcheck_and_clang_format.txt + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove + release_build: name: "Release build" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 100 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Release steps: - name: Set up repository @@ -229,26 +315,33 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run - name: Build release binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Initialize dependencies. - ./init - - # Build release binaries. - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j$THREADS + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph - name: Run GQL Behave tests run: | - cd tests - ./setup.sh /opt/toolchain-v4/activate - cd gql_behave - ./continuous_integration + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph gql-behave - name: Save quality assurance status uses: actions/upload-artifact@v4 @@ -260,14 +353,19 @@ jobs: - name: Run unit tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run unit tests. - cd build - ctest -R memgraph__unit --output-on-failure -j$THREADS + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph unit + # This step will be skipped because the e2e stream tests have been disabled + # We need to fix this as soon as possible - name: Ensure Kafka and Pulsar are up + if: false run: | cd tests/e2e/streams/kafka docker-compose up -d @@ -276,13 +374,17 @@ jobs: - name: Run e2e tests run: | - cd tests - ./setup.sh /opt/toolchain-v4/activate - source ve3/bin/activate_e2e - cd e2e - ./run.sh + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph e2e + # Same as two steps prior - name: Ensure Kafka and Pulsar are down + if: false run: | cd tests/e2e/streams/kafka docker-compose down @@ -291,59 +393,92 @@ jobs: - name: Run stress test (plain) run: | - cd tests/stress - source ve3/bin/activate - ./continuous_integration + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph stress-plain - name: Run stress test (SSL) run: | - cd tests/stress - source ve3/bin/activate - ./continuous_integration --use-ssl + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph stress-ssl - name: Run durability test run: | - cd tests/stress - source ve3/bin/activate - python3 durability --num-steps 5 + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph durability - name: Create enterprise DEB package run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - cd build + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + package-memgraph - # create mgconsole - # we use the -B to force the build - make -j$THREADS -B mgconsole - - # Create enterprise DEB package. - mkdir output && cd output - cpack -G DEB --config ../CPackConfig.cmake + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + copy --package - name: Save enterprise DEB package uses: actions/upload-artifact@v4 with: name: "Enterprise DEB package" - path: build/output/memgraph*.deb + path: build/output/${{ env.OS }}/memgraph*.deb + + - name: Copy build logs + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + copy --build-logs - name: Save test data uses: actions/upload-artifact@v4 if: always() with: name: "Test data(Release build)" - path: | - # multiple paths could be defined - build/logs + path: build/logs + + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove release_jepsen_test: name: "Release Jepsen Test" - runs-on: [self-hosted, Linux, X64, Debian10, JepsenControl] - #continue-on-error: true + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 80 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-12 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: RelWithDebInfo steps: - name: Set up repository @@ -353,16 +488,31 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run + - name: Build release binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - # Initialize dependencies. - ./init - # Build only memgraph release binarie. - cd build - cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. - make -j$THREADS memgraph + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph + + - name: Copy memgraph binary + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + copy --binary - name: Refresh Jepsen Cluster run: | @@ -381,13 +531,27 @@ jobs: name: "Jepsen Report" path: tests/jepsen/Jepsen.tar.gz + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove + release_benchmarks: name: "Release benchmarks" - runs-on: [self-hosted, Linux, X64, Diff, Gen7] + runs-on: [self-hosted, Linux, X64, DockerMgBuild, Gen7] + timeout-minutes: 60 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Release steps: - name: Set up repository @@ -397,25 +561,33 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run + - name: Build release binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Initialize dependencies. - ./init - - # Build only memgraph release binaries. - cd build - cmake -DCMAKE_BUILD_TYPE=release .. - make -j$THREADS + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph - name: Run macro benchmarks run: | - cd tests/macro_benchmark - ./harness QuerySuite MemgraphRunner \ - --groups aggregation 1000_create unwind_create dense_expand match \ - --no-strict + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph macro-benchmark - name: Get branch name (merge) if: github.event_name != 'pull_request' @@ -429,30 +601,49 @@ jobs: - name: Upload macro benchmark results run: | - cd tools/bench-graph-client - virtualenv -p python3 ve3 - source ve3/bin/activate - pip install -r requirements.txt - ./main.py --benchmark-name "macro_benchmark" \ - --benchmark-results "../../tests/macro_benchmark/.harness_summary" \ - --github-run-id "${{ github.run_id }}" \ - --github-run-number "${{ github.run_number }}" \ - --head-branch-name "${{ env.BRANCH_NAME }}" + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph upload-to-bench-graph \ + --benchmark-name "macro_benchmark" \ + --benchmark-results "../../tests/macro_benchmark/.harness_summary" \ + --github-run-id ${{ github.run_id }} \ + --github-run-number ${{ github.run_number }} \ + --head-branch-name ${{ env.BRANCH_NAME }} - # TODO (andi) No need for path flags and for --disk-storage and --in-memory-analytical - name: Run mgbench run: | - cd tests/mgbench - ./benchmark.py vendor-native --num-workers-for-benchmark 12 --export-results benchmark_result.json pokec/medium/*/* + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph mgbench - name: Upload mgbench results run: | - cd tools/bench-graph-client - virtualenv -p python3 ve3 - source ve3/bin/activate - pip install -r requirements.txt - ./main.py --benchmark-name "mgbench" \ - --benchmark-results "../../tests/mgbench/benchmark_result.json" \ - --github-run-id "${{ github.run_id }}" \ - --github-run-number "${{ github.run_number }}" \ - --head-branch-name "${{ env.BRANCH_NAME }}" + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph upload-to-bench-graph \ + --benchmark-name "mgbench" \ + --benchmark-results "../../tests/mgbench/benchmark_result.json" \ + --github-run-id "${{ github.run_id }}" \ + --github-run-number "${{ github.run_number }}" \ + --head-branch-name "${{ env.BRANCH_NAME }}" + + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove diff --git a/CMakeLists.txt b/CMakeLists.txt index 028406447..c02039497 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -300,6 +300,19 @@ endif() option(ENABLE_JEMALLOC "Use jemalloc" ON) +option(MG_MEMORY_PROFILE "If build should be setup for memory profiling" OFF) +if (MG_MEMORY_PROFILE AND ENABLE_JEMALLOC) + message(STATUS "Jemalloc has been disabled because MG_MEMORY_PROFILE is enabled") + set(ENABLE_JEMALLOC OFF) +endif () +if (MG_MEMORY_PROFILE AND ASAN) + message(STATUS "ASAN has been disabled because MG_MEMORY_PROFILE is enabled") + set(ASAN OFF) +endif () +if (MG_MEMORY_PROFILE) + add_compile_definitions(MG_MEMORY_PROFILE) +endif () + if (ASAN) message(WARNING "Disabling jemalloc as it doesn't work well with ASAN") set(ENABLE_JEMALLOC OFF) diff --git a/environment/os/rocky-9.3.sh b/environment/os/rocky-9.3.sh index 571278654..6ec045b42 100755 --- a/environment/os/rocky-9.3.sh +++ b/environment/os/rocky-9.3.sh @@ -59,7 +59,7 @@ MEMGRAPH_BUILD_DEPS=( doxygen graphviz # source documentation generators which nodejs golang custom-golang1.18.9 # for driver tests zip unzip java-11-openjdk-devel java-17-openjdk java-17-openjdk-devel custom-maven3.9.3 # for driver tests - sbcl # for custom Lisp C++ preprocessing + cl-asdf common-lisp-controller sbcl # for custom Lisp C++ preprocessing autoconf # for jemalloc code generation libtool # for protobuf code generation cyrus-sasl-devel @@ -162,6 +162,30 @@ install() { fi continue fi + if [ "$pkg" == doxygen ]; then + if ! dnf list installed doxygen >/dev/null 2>/dev/null; then + dnf install -y https://dl.rockylinux.org/pub/rocky/9/CRB/x86_64/os/Packages/d/doxygen-1.9.1-11.el9.x86_64.rpm + fi + continue + fi + if [ "$pkg" == cl-asdf ]; then + if ! dnf list installed cl-asdf >/dev/null 2>/dev/null; then + dnf install -y https://pkgs.sysadmins.ws/el8/base/x86_64/cl-asdf-20101028-18.el8.noarch.rpm + fi + continue + fi + if [ "$pkg" == common-lisp-controller ]; then + if ! dnf list installed common-lisp-controller >/dev/null 2>/dev/null; then + dnf install -y https://pkgs.sysadmins.ws/el8/base/x86_64/common-lisp-controller-7.4-20.el8.noarch.rpm + fi + continue + fi + if [ "$pkg" == sbcl ]; then + if ! dnf list installed sbcl >/dev/null 2>/dev/null; then + dnf install -y https://pkgs.sysadmins.ws/el8/base/x86_64/sbcl-2.0.1-4.el8.x86_64.rpm + fi + continue + fi if [ "$pkg" == PyYAML ]; then if [ -z ${SUDO_USER+x} ]; then # Running as root (e.g. Docker). pip3 install --user PyYAML diff --git a/environment/os/run.sh b/environment/os/run.sh index e7c370f62..654636a5f 100755 --- a/environment/os/run.sh +++ b/environment/os/run.sh @@ -5,17 +5,20 @@ IFS=' ' # NOTE: docker_image_name could be local image build based on release/package images. # NOTE: each line has to be under quotes, docker_container_type, script_name and docker_image_name separate with a space. # "docker_container_type script_name docker_image_name" +# docker_container_type OPTIONS: +# * mgrun -> running plain/empty operating system for the purposes of testing native memgraph package +# * mgbuild -> running the builder container to build memgraph inside it -> it's possible create builder images using release/package/run.sh OPERATING_SYSTEMS=( - "mgrun amzn-2 amazonlinux:2" - "mgrun centos-7 centos:7" - "mgrun centos-9 dokken/centos-stream-9" - "mgrun debian-10 debian:10" - "mgrun debian-11 debian:11" - "mgrun fedora-36 fedora:36" - "mgrun ubuntu-18.04 ubuntu:18.04" - "mgrun ubuntu-20.04 ubuntu:20.04" - "mgrun ubuntu-22.04 ubuntu:22.04" - # "mgbuild centos-7 package-mgbuild_centos-7" + # "mgrun amzn-2 amazonlinux:2" + # "mgrun centos-7 centos:7" + # "mgrun centos-9 dokken/centos-stream-9" + # "mgrun debian-10 debian:10" + # "mgrun debian-11 debian:11" + # "mgrun fedora-36 fedora:36" + # "mgrun ubuntu-18.04 ubuntu:18.04" + # "mgrun ubuntu-20.04 ubuntu:20.04" + # "mgrun ubuntu-22.04 ubuntu:22.04" + # "mgbuild debian-12 memgraph/memgraph-builder:v5_debian-12" ) if [ ! "$(docker info)" ]; then @@ -33,14 +36,24 @@ print_help () { # NOTE: This is an idempotent operation! # TODO(gitbuda): Consider making docker_run always delete + start a new container or add a new function. docker_run () { - cnt_name="$1" - cnt_image="$2" + cnt_type="$1" + if [[ "$cnt_type" != "mgbuild" && "$cnt_type" != "mgrun" ]]; then + echo "ERROR: Wrong docker_container_type -> valid options are mgbuild, mgrun" + exit 1 + fi + cnt_name="$2" + cnt_image="$3" if [ ! "$(docker ps -q -f name=$cnt_name)" ]; then if [ "$(docker ps -aq -f status=exited -f name=$cnt_name)" ]; then echo "Cleanup of the old exited container..." docker rm $cnt_name fi - docker run -d --volume "$SCRIPT_DIR/../../:/memgraph" --network host --name "$cnt_name" "$cnt_image" sleep infinity + if [[ "$cnt_type" == "mgbuild" ]]; then + docker run -d --volume "$SCRIPT_DIR/../../:/memgraph" --network host --name "$cnt_name" "$cnt_image" + fi + if [[ "$cnt_type" == "mgrun" ]]; then + docker run -d --volume "$SCRIPT_DIR/../../:/memgraph" --network host --name "$cnt_name" "$cnt_image" sleep infinity + fi fi echo "The $cnt_image container is active under $cnt_name name!" } @@ -55,9 +68,9 @@ docker_stop_and_rm () { cnt_name="$1" if [ "$(docker ps -q -f name=$cnt_name)" ]; then docker stop "$1" - if [ "$(docker ps -aq -f status=exited -f name=$cnt_name)" ]; then - docker rm "$1" - fi + fi + if [ "$(docker ps -aq -f status=exited -f name=$cnt_name)" ]; then + docker rm "$1" fi } @@ -71,7 +84,7 @@ start_all () { docker_name="${docker_container_type}_$script_name" echo "" echo "~~~~ OPERATING ON $docker_image as $docker_name..." - docker_run "$docker_name" "$docker_image" + docker_run "$docker_container_type" "$docker_name" "$docker_image" docker_exec "$docker_name" "/memgraph/environment/os/$script_name.sh install NEW_DEPS" echo "---- DONE EVERYHING FOR $docker_image as $docker_name..." echo "" diff --git a/include/_mgp.hpp b/include/_mgp.hpp index 8b67bc36a..b1d9e26d5 100644 --- a/include/_mgp.hpp +++ b/include/_mgp.hpp @@ -326,6 +326,21 @@ inline mgp_vertex *graph_get_vertex_by_id(mgp_graph *g, mgp_vertex_id id, mgp_me return MgInvoke<mgp_vertex *>(mgp_graph_get_vertex_by_id, g, id, memory); } +inline bool graph_has_text_index(mgp_graph *graph, const char *index_name) { + return MgInvoke<int>(mgp_graph_has_text_index, graph, index_name); +} + +inline mgp_map *graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query, + text_search_mode search_mode, mgp_memory *memory) { + return MgInvoke<mgp_map *>(mgp_graph_search_text_index, graph, index_name, search_query, search_mode, memory); +} + +inline mgp_map *graph_aggregate_over_text_index(mgp_graph *graph, const char *index_name, const char *search_query, + const char *aggregation_query, mgp_memory *memory) { + return MgInvoke<mgp_map *>(mgp_graph_aggregate_over_text_index, graph, index_name, search_query, aggregation_query, + memory); +} + inline mgp_vertices_iterator *graph_iter_vertices(mgp_graph *g, mgp_memory *memory) { return MgInvoke<mgp_vertices_iterator *>(mgp_graph_iter_vertices, g, memory); } diff --git a/include/mg_procedure.h b/include/mg_procedure.h index 93ef241d8..117dc66ab 100644 --- a/include/mg_procedure.h +++ b/include/mg_procedure.h @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -891,6 +891,36 @@ enum mgp_error mgp_edge_iter_properties(struct mgp_edge *e, struct mgp_memory *m enum mgp_error mgp_graph_get_vertex_by_id(struct mgp_graph *g, struct mgp_vertex_id id, struct mgp_memory *memory, struct mgp_vertex **result); +/// Result is non-zero if the index with the given name exists. +/// The current implementation always returns without errors. +enum mgp_error mgp_graph_has_text_index(struct mgp_graph *graph, const char *index_name, int *result); + +/// Available modes of searching text indices. +MGP_ENUM_CLASS text_search_mode{ + SPECIFIED_PROPERTIES, + REGEX, + ALL_PROPERTIES, +}; + +/// Search the named text index for the given query. The result is a map with the "search_results" and "error_msg" keys. +/// The "search_results" key contains the vertices whose text-indexed properties match the given query. +/// In case of a Tantivy error, the "search_results" key is absent, and "error_msg" contains the error message. +/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if there’s an allocation error while constructing the results map. +/// Return mgp_error::MGP_ERROR_KEY_ALREADY_EXISTS if the same key is being created in the results map more than once. +enum mgp_error mgp_graph_search_text_index(struct mgp_graph *graph, const char *index_name, const char *search_query, + enum text_search_mode search_mode, struct mgp_memory *memory, + struct mgp_map **result); + +/// Aggregate over the results of a search over the named text index. The result is a map with the "aggregation_results" +/// and "error_msg" keys. +/// The "aggregation_results" key contains the vertices whose text-indexed properties match the given query. +/// In case of a Tantivy error, the "aggregation_results" key is absent, and "error_msg" contains the error message. +/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if there’s an allocation error while constructing the results map. +/// Return mgp_error::MGP_ERROR_KEY_ALREADY_EXISTS if the same key is being created in the results map more than once. +enum mgp_error mgp_graph_aggregate_over_text_index(struct mgp_graph *graph, const char *index_name, + const char *search_query, const char *aggregation_query, + struct mgp_memory *memory, struct mgp_map **result); + /// Creates label index for given label. /// mgp_error::MGP_ERROR_NO_ERROR is always returned. /// if label index already exists, result will be 0, otherwise 1. diff --git a/include/mgp.hpp b/include/mgp.hpp index 3f7ed591e..f35231062 100644 --- a/include/mgp.hpp +++ b/include/mgp.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -32,6 +32,15 @@ namespace mgp { +class TextSearchException : public std::exception { + public: + explicit TextSearchException(std::string message) : message_(std::move(message)) {} + const char *what() const noexcept override { return message_.c_str(); } + + private: + std::string message_; +}; + class IndexException : public std::exception { public: explicit IndexException(std::string message) : message_(std::move(message)) {} @@ -4306,12 +4315,12 @@ inline void AddParamsReturnsToProc(mgp_proc *proc, std::vector<Parameter> ¶m } } // namespace detail -inline bool CreateLabelIndex(mgp_graph *memgaph_graph, const std::string_view label) { - return create_label_index(memgaph_graph, label.data()); +inline bool CreateLabelIndex(mgp_graph *memgraph_graph, const std::string_view label) { + return create_label_index(memgraph_graph, label.data()); } -inline bool DropLabelIndex(mgp_graph *memgaph_graph, const std::string_view label) { - return drop_label_index(memgaph_graph, label.data()); +inline bool DropLabelIndex(mgp_graph *memgraph_graph, const std::string_view label) { + return drop_label_index(memgraph_graph, label.data()); } inline List ListAllLabelIndices(mgp_graph *memgraph_graph) { @@ -4322,14 +4331,14 @@ inline List ListAllLabelIndices(mgp_graph *memgraph_graph) { return List(label_indices); } -inline bool CreateLabelPropertyIndex(mgp_graph *memgaph_graph, const std::string_view label, +inline bool CreateLabelPropertyIndex(mgp_graph *memgraph_graph, const std::string_view label, const std::string_view property) { - return create_label_property_index(memgaph_graph, label.data(), property.data()); + return create_label_property_index(memgraph_graph, label.data(), property.data()); } -inline bool DropLabelPropertyIndex(mgp_graph *memgaph_graph, const std::string_view label, +inline bool DropLabelPropertyIndex(mgp_graph *memgraph_graph, const std::string_view label, const std::string_view property) { - return drop_label_property_index(memgaph_graph, label.data(), property.data()); + return drop_label_property_index(memgraph_graph, label.data(), property.data()); } inline List ListAllLabelPropertyIndices(mgp_graph *memgraph_graph) { @@ -4340,6 +4349,58 @@ inline List ListAllLabelPropertyIndices(mgp_graph *memgraph_graph) { return List(label_property_indices); } +namespace { +constexpr std::string_view kErrorMsgKey = "error_msg"; +constexpr std::string_view kSearchResultsKey = "search_results"; +constexpr std::string_view kAggregationResultsKey = "aggregation_results"; +} // namespace + +inline List SearchTextIndex(mgp_graph *memgraph_graph, std::string_view index_name, std::string_view search_query, + text_search_mode search_mode) { + auto results_or_error = Map(mgp::MemHandlerCallback(graph_search_text_index, memgraph_graph, index_name.data(), + search_query.data(), search_mode)); + if (results_or_error.KeyExists(kErrorMsgKey)) { + if (!results_or_error.At(kErrorMsgKey).IsString()) { + throw TextSearchException{"The error message is not a string!"}; + } + throw TextSearchException(results_or_error.At(kErrorMsgKey).ValueString().data()); + } + + if (!results_or_error.KeyExists(kSearchResultsKey)) { + throw TextSearchException{"Incomplete text index search results!"}; + } + + if (!results_or_error.At(kSearchResultsKey).IsList()) { + throw TextSearchException{"Text index search results have wrong type!"}; + } + + return results_or_error.At(kSearchResultsKey).ValueList(); +} + +inline std::string_view AggregateOverTextIndex(mgp_graph *memgraph_graph, std::string_view index_name, + std::string_view search_query, std::string_view aggregation_query) { + auto results_or_error = + Map(mgp::MemHandlerCallback(graph_aggregate_over_text_index, memgraph_graph, index_name.data(), + search_query.data(), aggregation_query.data())); + + if (results_or_error.KeyExists(kErrorMsgKey)) { + if (!results_or_error.At(kErrorMsgKey).IsString()) { + throw TextSearchException{"The error message is not a string!"}; + } + throw TextSearchException(results_or_error.At(kErrorMsgKey).ValueString().data()); + } + + if (!results_or_error.KeyExists(kAggregationResultsKey)) { + throw TextSearchException{"Incomplete text index aggregation results!"}; + } + + if (!results_or_error.At(kAggregationResultsKey).IsString()) { + throw TextSearchException{"Text index aggregation results have wrong type!"}; + } + + return results_or_error.At(kAggregationResultsKey).ValueString(); +} + inline bool CreateExistenceConstraint(mgp_graph *memgraph_graph, const std::string_view label, const std::string_view property) { return create_existence_constraint(memgraph_graph, label.data(), property.data()); diff --git a/init b/init index 9187ee5aa..5d1799237 100755 --- a/init +++ b/init @@ -14,6 +14,7 @@ function print_help () { echo "Optional arguments:" echo -e " -h\tdisplay this help and exit" echo -e " --without-libs-setup\tskip the step for setting up libs" + echo -e " --ci\tscript is being run inside ci" } function setup_virtualenv () { @@ -35,6 +36,7 @@ function setup_virtualenv () { } setup_libs=true +ci=false if [[ $# -eq 1 && "$1" == "-h" ]]; then print_help exit 0 @@ -45,6 +47,10 @@ else shift setup_libs=false ;; + --ci) + shift + ci=true + ;; *) # unknown option echo "Invalid argument provided: $1" @@ -76,11 +82,13 @@ if [[ "$setup_libs" == "true" ]]; then fi # Fix for centos 7 during release -if [ "${DISTRO}" = "centos-7" ] || [ "${DISTRO}" = "debian-11" ] || [ "${DISTRO}" = "amzn-2" ]; then - if python3 -m pip show virtualenv >/dev/null 2>/dev/null; then - python3 -m pip uninstall -y virtualenv +if [[ "$ci" == "false" ]]; then + if [ "${DISTRO}" = "centos-7" ] || [ "${DISTRO}" = "debian-11" ] || [ "${DISTRO}" = "amzn-2" ]; then + if python3 -m pip show virtualenv >/dev/null 2>/dev/null; then + python3 -m pip uninstall -y virtualenv + fi + python3 -m pip install virtualenv fi - python3 -m pip install virtualenv fi # setup gql_behave dependencies @@ -119,14 +127,16 @@ fi # Install precommit hook except on old operating systems because we don't # develop on them -> pre-commit hook not required -> we can use latest # packages. -if [ "${DISTRO}" != "centos-7" ] && [ "$DISTRO" != "debian-10" ] && [ "${DISTRO}" != "ubuntu-18.04" ] && [ "${DISTRO}" != "amzn-2" ]; then - python3 -m pip install pre-commit - python3 -m pre_commit install - # Install py format tools for usage during the development. - echo "Install black formatter" - python3 -m pip install black==23.1.* - echo "Install isort" - python3 -m pip install isort==5.12.* +if [[ "$ci" == "false" ]]; then + if [ "${DISTRO}" != "centos-7" ] && [ "$DISTRO" != "debian-10" ] && [ "${DISTRO}" != "ubuntu-18.04" ] && [ "${DISTRO}" != "amzn-2" ]; then + python3 -m pip install pre-commit + python3 -m pre_commit install + # Install py format tools for usage during the development. + echo "Install black formatter" + python3 -m pip install black==23.1.* + echo "Install isort" + python3 -m pip install isort==5.12.* + fi fi # Link `include/mgp.py` with `release/mgp/mgp.py` diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 7d568d548..ab6a313f1 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -295,6 +295,32 @@ set_path_external_library(jemalloc STATIC import_header_library(rangev3 ${CMAKE_CURRENT_SOURCE_DIR}/rangev3/include) +ExternalProject_Add(mgcxx-proj + PREFIX mgcxx-proj + GIT_REPOSITORY https://github.com/memgraph/mgcxx + GIT_TAG "v0.0.4" + CMAKE_ARGS + "-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>" + "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}" + "-DENABLE_TESTS=OFF" + INSTALL_DIR "${PROJECT_BINARY_DIR}/mgcxx" +) +ExternalProject_Get_Property(mgcxx-proj install_dir) +set(MGCXX_ROOT ${install_dir}) + +add_library(tantivy_text_search STATIC IMPORTED GLOBAL) +add_dependencies(tantivy_text_search mgcxx-proj) +set_property(TARGET tantivy_text_search PROPERTY IMPORTED_LOCATION ${MGCXX_ROOT}/lib/libtantivy_text_search.a) + +add_library(mgcxx_text_search STATIC IMPORTED GLOBAL) +add_dependencies(mgcxx_text_search mgcxx-proj) +set_property(TARGET mgcxx_text_search PROPERTY IMPORTED_LOCATION ${MGCXX_ROOT}/lib/libmgcxx_text_search.a) +# We need to create the include directory first in order to be able to add it +# as an include directory. The header files in the include directory will be +# generated later during the build process. +file(MAKE_DIRECTORY ${MGCXX_ROOT}/include) +set_property(TARGET mgcxx_text_search PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${MGCXX_ROOT}/include) + # Setup NuRaft import_external_library(nuraft STATIC ${CMAKE_CURRENT_SOURCE_DIR}/nuraft/lib/libnuraft.a diff --git a/libs/setup.sh b/libs/setup.sh index 9c2a38c47..e23c5efef 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -127,6 +127,7 @@ declare -A primary_urls=( ["jemalloc"]="http://$local_cache_host/git/jemalloc.git" ["range-v3"]="http://$local_cache_host/git/range-v3.git" ["nuraft"]="http://$local_cache_host/git/NuRaft.git" + ["asio"]="http://$local_cache_host/git/asio.git" ) # The goal of secondary urls is to have links to the "source of truth" of @@ -157,6 +158,7 @@ declare -A secondary_urls=( ["jemalloc"]="https://github.com/jemalloc/jemalloc.git" ["range-v3"]="https://github.com/ericniebler/range-v3.git" ["nuraft"]="https://github.com/eBay/NuRaft.git" + ["asio"]="https://github.com/chriskohlhoff/asio.git" ) # antlr @@ -266,13 +268,13 @@ repo_clone_try_double "${primary_urls[jemalloc]}" "${secondary_urls[jemalloc]}" pushd jemalloc ./autogen.sh -MALLOC_CONF="retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" \ +MALLOC_CONF="background_thread:true,retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" \ ./configure \ --disable-cxx \ --with-lg-page=12 \ --with-lg-hugepage=21 \ --enable-shared=no --prefix=$working_dir \ - --with-malloc-conf="retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" + --with-malloc-conf="background_thread:true,retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" make -j$CPUS install popd @@ -286,5 +288,7 @@ nuraft_tag="v2.1.0" repo_clone_try_double "${primary_urls[nuraft]}" "${secondary_urls[nuraft]}" "nuraft" "$nuraft_tag" true pushd nuraft git apply ../nuraft2.1.0.patch +asio_tag="asio-1-29-0" +repo_clone_try_double "${primary_urls[asio]}" "${secondary_urls[asio]}" "asio" "$asio_tag" true ./prepare.sh popd diff --git a/query_modules/CMakeLists.txt b/query_modules/CMakeLists.txt index 41dbb495c..1336f3eb0 100644 --- a/query_modules/CMakeLists.txt +++ b/query_modules/CMakeLists.txt @@ -6,6 +6,8 @@ project(memgraph_query_modules) disallow_in_source_build() +find_package(fmt REQUIRED) + # Everything that is installed here, should be under the "query_modules" component. set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "query_modules") string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type) @@ -58,6 +60,22 @@ install(PROGRAMS $<TARGET_FILE:schema> # Also install the source of the example, so user can read it. install(FILES schema.cpp DESTINATION lib/memgraph/query_modules/src) +add_library(text SHARED text_search_module.cpp) +target_include_directories(text PRIVATE ${CMAKE_SOURCE_DIR}/include) +target_compile_options(text PRIVATE -Wall) +target_link_libraries(text PRIVATE -static-libgcc -static-libstdc++ fmt::fmt) +# Strip C++ example in release build. +if (lower_build_type STREQUAL "release") + add_custom_command(TARGET text POST_BUILD + COMMAND strip -s $<TARGET_FILE:text> + COMMENT "Stripping symbols and sections from the C++ text_search module") +endif() +install(PROGRAMS $<TARGET_FILE:text> + DESTINATION lib/memgraph/query_modules + RENAME text.so) +# Also install the source of the example, so user can read it. +install(FILES text_search_module.cpp DESTINATION lib/memgraph/query_modules/src) + # Install the Python example and modules install(FILES example.py DESTINATION lib/memgraph/query_modules RENAME py_example.py) install(FILES graph_analyzer.py DESTINATION lib/memgraph/query_modules) diff --git a/query_modules/text_search_module.cpp b/query_modules/text_search_module.cpp new file mode 100644 index 000000000..8e4405058 --- /dev/null +++ b/query_modules/text_search_module.cpp @@ -0,0 +1,149 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include <string> +#include <string_view> + +#include <fmt/format.h> + +#include <mgp.hpp> + +namespace TextSearch { +constexpr std::string_view kProcedureSearch = "search"; +constexpr std::string_view kProcedureRegexSearch = "regex_search"; +constexpr std::string_view kProcedureSearchAllProperties = "search_all"; +constexpr std::string_view kProcedureAggregate = "aggregate"; +constexpr std::string_view kParameterIndexName = "index_name"; +constexpr std::string_view kParameterSearchQuery = "search_query"; +constexpr std::string_view kParameterAggregationQuery = "aggregation_query"; +constexpr std::string_view kReturnNode = "node"; +constexpr std::string_view kReturnAggregation = "aggregation"; +const std::string kSearchAllPrefix = "all"; + +void Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory); +void RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory); +void SearchAllProperties(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory); +void Aggregate(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory); +} // namespace TextSearch + +void TextSearch::Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { + mgp::MemoryDispatcherGuard guard{memory}; + const auto record_factory = mgp::RecordFactory(result); + auto arguments = mgp::List(args); + + try { + const auto *index_name = arguments[0].ValueString().data(); + const auto *search_query = arguments[1].ValueString().data(); + for (const auto &node : + mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::SPECIFIED_PROPERTIES)) { + auto record = record_factory.NewRecord(); + record.Insert(TextSearch::kReturnNode.data(), node.ValueNode()); + } + } catch (const std::exception &e) { + record_factory.SetErrorMessage(e.what()); + } +} + +void TextSearch::RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { + mgp::MemoryDispatcherGuard guard{memory}; + const auto record_factory = mgp::RecordFactory(result); + auto arguments = mgp::List(args); + + try { + const auto *index_name = arguments[0].ValueString().data(); + const auto *search_query = arguments[1].ValueString().data(); + for (const auto &node : mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::REGEX)) { + auto record = record_factory.NewRecord(); + record.Insert(TextSearch::kReturnNode.data(), node.ValueNode()); + } + } catch (const std::exception &e) { + record_factory.SetErrorMessage(e.what()); + } +} + +void TextSearch::SearchAllProperties(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, + mgp_memory *memory) { + mgp::MemoryDispatcherGuard guard{memory}; + const auto record_factory = mgp::RecordFactory(result); + auto arguments = mgp::List(args); + + try { + const auto *index_name = arguments[0].ValueString().data(); + const auto *search_query = fmt::format("{}:{}", kSearchAllPrefix, arguments[1].ValueString()).data(); + for (const auto &node : + mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::ALL_PROPERTIES)) { + auto record = record_factory.NewRecord(); + record.Insert(TextSearch::kReturnNode.data(), node.ValueNode()); + } + } catch (const std::exception &e) { + record_factory.SetErrorMessage(e.what()); + } +} + +void TextSearch::Aggregate(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { + mgp::MemoryDispatcherGuard guard{memory}; + const auto record_factory = mgp::RecordFactory(result); + auto arguments = mgp::List(args); + + try { + const auto *index_name = arguments[0].ValueString().data(); + const auto *search_query = arguments[1].ValueString().data(); + const auto *aggregation_query = arguments[2].ValueString().data(); + const auto aggregation_result = + mgp::AggregateOverTextIndex(memgraph_graph, index_name, search_query, aggregation_query); + auto record = record_factory.NewRecord(); + record.Insert(TextSearch::kReturnAggregation.data(), aggregation_result.data()); + } catch (const std::exception &e) { + record_factory.SetErrorMessage(e.what()); + } +} + +extern "C" int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) { + try { + mgp::MemoryDispatcherGuard guard{memory}; + + AddProcedure(TextSearch::Search, TextSearch::kProcedureSearch, mgp::ProcedureType::Read, + { + mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String), + mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String), + }, + {mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory); + + AddProcedure(TextSearch::RegexSearch, TextSearch::kProcedureRegexSearch, mgp::ProcedureType::Read, + { + mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String), + mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String), + }, + {mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory); + + AddProcedure(TextSearch::SearchAllProperties, TextSearch::kProcedureSearchAllProperties, mgp::ProcedureType::Read, + { + mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String), + mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String), + }, + {mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory); + + AddProcedure(TextSearch::Aggregate, TextSearch::kProcedureAggregate, mgp::ProcedureType::Read, + { + mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String), + mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String), + mgp::Parameter(TextSearch::kParameterAggregationQuery, mgp::Type::String), + }, + {mgp::Return(TextSearch::kReturnAggregation, mgp::Type::String)}, module, memory); + } catch (const std::exception &e) { + std::cerr << "Error while initializing query module: " << e.what() << std::endl; + return 1; + } + + return 0; +} + +extern "C" int mgp_shutdown_module() { return 0; } diff --git a/release/package/amd-builders-v4.yml b/release/package/amd-builders-v4.yml new file mode 100644 index 000000000..2e3d4c5d2 --- /dev/null +++ b/release/package/amd-builders-v4.yml @@ -0,0 +1,73 @@ +version: "3" +services: + mgbuild_v4_amzn-2: + image: "memgraph/mgbuild:v4_amzn-2" + build: + context: amzn-2 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_amzn-2" + + mgbuild_v4_centos-7: + image: "memgraph/mgbuild:v4_centos-7" + build: + context: centos-7 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_centos-7" + + mgbuild_v4_centos-9: + image: "memgraph/mgbuild:v4_centos-9" + build: + context: centos-9 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_centos-9" + + mgbuild_v4_debian-10: + image: "memgraph/mgbuild:v4_debian-10" + build: + context: debian-10 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_debian-10" + + mgbuild_v4_debian-11: + image: "memgraph/mgbuild:v4_debian-11" + build: + context: debian-11 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_debian-11" + + mgbuild_v4_fedora-36: + image: "memgraph/mgbuild:v4_fedora-36" + build: + context: fedora-36 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_fedora-36" + + mgbuild_v4_ubuntu-18.04: + image: "memgraph/mgbuild:v4_ubuntu-18.04" + build: + context: ubuntu-18.04 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-18.04" + + mgbuild_v4_ubuntu-20.04: + image: "memgraph/mgbuild:v4_ubuntu-20.04" + build: + context: ubuntu-20.04 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-20.04" + + mgbuild_v4_ubuntu-22.04: + image: "memgraph/mgbuild:v4_ubuntu-22.04" + build: + context: ubuntu-22.04 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-22.04" diff --git a/release/package/amd-builders-v5.yml b/release/package/amd-builders-v5.yml new file mode 100644 index 000000000..d37bc6092 --- /dev/null +++ b/release/package/amd-builders-v5.yml @@ -0,0 +1,81 @@ +version: "3" +services: + mgbuild_v5_amzn-2: + image: "memgraph/mgbuild:v5_amzn-2" + build: + context: amzn-2 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_amzn-2" + + mgbuild_v5_centos-7: + image: "memgraph/mgbuild:v5_centos-7" + build: + context: centos-7 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_centos-7" + + mgbuild_v5_centos-9: + image: "memgraph/mgbuild:v5_centos-9" + build: + context: centos-9 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_centos-9" + + mgbuild_v5_debian-11: + image: "memgraph/mgbuild:v5_debian-11" + build: + context: debian-11 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_debian-11" + + mgbuild_v5_debian-12: + image: "memgraph/mgbuild:v5_debian-12" + build: + context: debian-12 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_debian-12" + + mgbuild_v5_fedora-38: + image: "memgraph/mgbuild:v5_fedora-38" + build: + context: fedora-38 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_fedora-38" + + mgbuild_v5_fedora-39: + image: "memgraph/mgbuild:v5_fedora-39" + build: + context: fedora-39 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_fedora-39" + + mgbuild_v5_rocky-9.3: + image: "memgraph/mgbuild:v5_rocky-9.3" + build: + context: rocky-9.3 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_rocky-9.3" + + mgbuild_v5_ubuntu-20.04: + image: "memgraph/mgbuild:v5_ubuntu-20.04" + build: + context: ubuntu-20.04 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_ubuntu-20.04" + + mgbuild_v5_ubuntu-22.04: + image: "memgraph/mgbuild:v5_ubuntu-22.04" + build: + context: ubuntu-22.04 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_ubuntu-22.04" diff --git a/release/package/amzn-2/Dockerfile b/release/package/amzn-2/Dockerfile index ba3ad34b9..80fdb9893 100644 --- a/release/package/amzn-2/Dockerfile +++ b/release/package/amzn-2/Dockerfile @@ -7,9 +7,34 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz +# Download and install toolchain +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/amzn-2.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/amzn-2.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9) +RUN pip3 install --user PyYAML ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/arm-builders-v4.yml b/release/package/arm-builders-v4.yml new file mode 100644 index 000000000..18cd5c4e9 --- /dev/null +++ b/release/package/arm-builders-v4.yml @@ -0,0 +1,18 @@ +version: "3" + +services: + mgbuild_v4_debian-11-arm: + image: "memgraph/mgbuild:v4_debian-11-arm" + build: + context: debian-11-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_debian-11-arm" + + mgbuild_v4_ubuntu_v4_22.04-arm: + image: "memgraph/mgbuild:v4_ubuntu-22.04-arm" + build: + context: ubuntu-22.04-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-22.04-arm" diff --git a/release/package/arm-builders-v5.yml b/release/package/arm-builders-v5.yml new file mode 100644 index 000000000..c276a49a8 --- /dev/null +++ b/release/package/arm-builders-v5.yml @@ -0,0 +1,18 @@ +version: "3" + +services: + debian-12-arm: + image: "memgraph/mgbuild:v5_debian-12-arm" + build: + context: debian-12-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_debian-12-arm" + + ubuntu-22.04-arm: + image: "memgraph/mgbuild:v5_ubuntu-22.04-arm" + build: + context: ubuntu-22.04-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_ubuntu-22.04-arm" diff --git a/release/package/arm-builders.yml b/release/package/arm-builders.yml deleted file mode 100644 index d52f3bb26..000000000 --- a/release/package/arm-builders.yml +++ /dev/null @@ -1,11 +0,0 @@ -version: "3" - -services: - debian-11-arm: - build: - context: debian-11-arm - container_name: "mgbuild_debian-11-arm" - ubuntu-2204-arm: - build: - context: ubuntu-22.04-arm - container_name: "mgbuild_ubuntu-22.04-arm" diff --git a/release/package/centos-7/Dockerfile b/release/package/centos-7/Dockerfile index ff386c551..d34dfee77 100644 --- a/release/package/centos-7/Dockerfile +++ b/release/package/centos-7/Dockerfile @@ -7,9 +7,33 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/centos-7.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/centos-7.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9) +RUN pip3 install --user PyYAML ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/centos-9/Dockerfile b/release/package/centos-9/Dockerfile index 1c4c9541e..7c559a932 100644 --- a/release/package/centos-9/Dockerfile +++ b/release/package/centos-9/Dockerfile @@ -7,9 +7,33 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/centos-9.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/centos-9.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9) +RUN pip3 install --user PyYAML ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-10/Dockerfile b/release/package/debian-10/Dockerfile index abd4a7f0e..a3ef12cc6 100644 --- a/release/package/debian-10/Dockerfile +++ b/release/package/debian-10/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-10.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-10.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-11-arm/Dockerfile b/release/package/debian-11-arm/Dockerfile index 5040d8041..9b3aa3c8c 100644 --- a/release/package/debian-11-arm/Dockerfile +++ b/release/package/debian-11-arm/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-11-arm.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-11-arm.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-11/Dockerfile b/release/package/debian-11/Dockerfile index cf47f253e..62070eccf 100644 --- a/release/package/debian-11/Dockerfile +++ b/release/package/debian-11/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-11.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-11.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-12-arm/Dockerfile b/release/package/debian-12-arm/Dockerfile new file mode 100644 index 000000000..6d4200391 --- /dev/null +++ b/release/package/debian-12-arm/Dockerfile @@ -0,0 +1,39 @@ +FROM debian:12 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && apt install -y \ + ca-certificates wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-12-arm.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-12-arm.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-12/Dockerfile b/release/package/debian-12/Dockerfile new file mode 100644 index 000000000..c35640eb3 --- /dev/null +++ b/release/package/debian-12/Dockerfile @@ -0,0 +1,39 @@ +FROM debian:12 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && apt install -y \ + ca-certificates wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-12.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-12.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/docker-compose.yml b/release/package/docker-compose.yml deleted file mode 100644 index 4da0526ba..000000000 --- a/release/package/docker-compose.yml +++ /dev/null @@ -1,38 +0,0 @@ -version: "3" -services: - mgbuild_centos-7: - build: - context: centos-7 - container_name: "mgbuild_centos-7" - mgbuild_centos-9: - build: - context: centos-9 - container_name: "mgbuild_centos-9" - mgbuild_debian-10: - build: - context: debian-10 - container_name: "mgbuild_debian-10" - mgbuild_debian-11: - build: - context: debian-11 - container_name: "mgbuild_debian-11" - mgbuild_ubuntu-18.04: - build: - context: ubuntu-18.04 - container_name: "mgbuild_ubuntu-18.04" - mgbuild_ubuntu-20.04: - build: - context: ubuntu-20.04 - container_name: "mgbuild_ubuntu-20.04" - mgbuild_ubuntu-22.04: - build: - context: ubuntu-22.04 - container_name: "mgbuild_ubuntu-22.04" - mgbuild_fedora-36: - build: - context: fedora-36 - container_name: "mgbuild_fedora-36" - mgbuild_amzn-2: - build: - context: amzn-2 - container_name: "mgbuild_amzn-2" diff --git a/release/package/fedora-36/Dockerfile b/release/package/fedora-36/Dockerfile index f84af132f..954a5b2bf 100644 --- a/release/package/fedora-36/Dockerfile +++ b/release/package/fedora-36/Dockerfile @@ -8,9 +8,30 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/fedora-36.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/fedora-36.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/fedora-38/Dockerfile b/release/package/fedora-38/Dockerfile new file mode 100644 index 000000000..cf7454ae4 --- /dev/null +++ b/release/package/fedora-38/Dockerfile @@ -0,0 +1,37 @@ +FROM fedora:38 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +RUN yum -y update \ + && yum install -y wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/fedora-38.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/fedora-38.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/fedora-39/Dockerfile b/release/package/fedora-39/Dockerfile new file mode 100644 index 000000000..263017a3d --- /dev/null +++ b/release/package/fedora-39/Dockerfile @@ -0,0 +1,37 @@ +FROM fedora:39 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +RUN yum -y update \ + && yum install -y wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/fedora-39.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/fedora-39.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/mgbuild.sh b/release/package/mgbuild.sh new file mode 100755 index 000000000..e24776f60 --- /dev/null +++ b/release/package/mgbuild.sh @@ -0,0 +1,667 @@ +#!/bin/bash +set -Eeuo pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +SCRIPT_NAME=${0##*/} +PROJECT_ROOT="$SCRIPT_DIR/../.." +MGBUILD_HOME_DIR="/home/mg" +MGBUILD_ROOT_DIR="$MGBUILD_HOME_DIR/memgraph" + +DEFAULT_TOOLCHAIN="v5" +SUPPORTED_TOOLCHAINS=( + v4 v5 +) +DEFAULT_OS="all" +SUPPORTED_OS=( + all + amzn-2 + centos-7 centos-9 + debian-10 debian-11 debian-11-arm debian-12 debian-12-arm + fedora-36 fedora-38 fedora-39 + rocky-9.3 + ubuntu-18.04 ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm +) +SUPPORTED_OS_V4=( + amzn-2 + centos-7 centos-9 + debian-10 debian-11 debian-11-arm + fedora-36 + ubuntu-18.04 ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm +) +SUPPORTED_OS_V5=( + amzn-2 + centos-7 centos-9 + debian-11 debian-11-arm debian-12 debian-12-arm + fedora-38 fedora-39 + rocky-9.3 + ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm +) +DEFAULT_BUILD_TYPE="Release" +SUPPORTED_BUILD_TYPES=( + Debug + Release + RelWithDebInfo +) +DEFAULT_ARCH="amd" +SUPPORTED_ARCHS=( + amd + arm +) +SUPPORTED_TESTS=( + clang-tidy cppcheck-and-clang-format code-analysis + code-coverage drivers durability e2e gql-behave + integration leftover-CTest macro-benchmark + mgbench stress-plain stress-ssl + unit unit-coverage upload-to-bench-graph + +) +DEFAULT_THREADS=0 +DEFAULT_ENTERPRISE_LICENSE="" +DEFAULT_ORGANIZATION_NAME="memgraph" + +print_help () { + echo -e "\nUsage: $SCRIPT_NAME [GLOBAL OPTIONS] COMMAND [COMMAND OPTIONS]" + echo -e "\nInteract with mgbuild containers" + + echo -e "\nCommands:" + echo -e " build Build mgbuild image" + echo -e " build-memgraph [OPTIONS] Build memgraph binary inside mgbuild container" + echo -e " copy OPTIONS Copy an artifact from mgbuild container to host" + echo -e " package-memgraph Create memgraph package from built binary inside mgbuild container" + echo -e " pull Pull mgbuild image from dockerhub" + echo -e " push [OPTIONS] Push mgbuild image to dockerhub" + echo -e " run [OPTIONS] Run mgbuild container" + echo -e " stop [OPTIONS] Stop mgbuild container" + echo -e " test-memgraph TEST Run a selected test TEST (see supported tests below) inside mgbuild container" + + echo -e "\nSupported tests:" + echo -e " \"${SUPPORTED_TESTS[*]}\"" + + echo -e "\nGlobal options:" + echo -e " --arch string Specify target architecture (\"${SUPPORTED_ARCHS[*]}\") (default \"$DEFAULT_ARCH\")" + echo -e " --build-type string Specify build type (\"${SUPPORTED_BUILD_TYPES[*]}\") (default \"$DEFAULT_BUILD_TYPE\")" + echo -e " --enterprise-license string Specify the enterprise license (default \"\")" + echo -e " --organization-name string Specify the organization name (default \"memgraph\")" + echo -e " --os string Specify operating system (\"${SUPPORTED_OS[*]}\") (default \"$DEFAULT_OS\")" + echo -e " --threads int Specify the number of threads a command will use (default \"\$(nproc)\" for container)" + echo -e " --toolchain string Specify toolchain version (\"${SUPPORTED_TOOLCHAINS[*]}\") (default \"$DEFAULT_TOOLCHAIN\")" + + echo -e "\nbuild-memgraph options:" + echo -e " --asan Build with ASAN" + echo -e " --community Build community version" + echo -e " --coverage Build with code coverage" + echo -e " --for-docker Add flag -DMG_TELEMETRY_ID_OVERRIDE=DOCKER to cmake" + echo -e " --for-platform Add flag -DMG_TELEMETRY_ID_OVERRIDE=DOCKER-PLATFORM to cmake" + echo -e " --init-only Only run init script" + echo -e " --no-copy Don't copy the memgraph repo from host." + echo -e " Use this option with caution, be sure that memgraph source code is in correct location inside mgbuild container" + echo -e " --ubsan Build with UBSAN" + + echo -e "\ncopy options:" + echo -e " --binary Copy memgraph binary from mgbuild container to host" + echo -e " --build-logs Copy build logs from mgbuild container to host" + echo -e " --package Copy memgraph package from mgbuild container to host" + + echo -e "\npush options:" + echo -e " -p, --password string Specify password for docker login" + echo -e " -u, --username string Specify username for docker login" + + echo -e "\nrun options:" + echo -e " --pull Pull the mgbuild image before running" + + echo -e "\nstop options:" + echo -e " --remove Remove the stopped mgbuild container" + + echo -e "\nToolchain v4 supported OSs:" + echo -e " \"${SUPPORTED_OS_V4[*]}\"" + + echo -e "\nToolchain v5 supported OSs:" + echo -e " \"${SUPPORTED_OS_V5[*]}\"" + + echo -e "\nExample usage:" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd run" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd --build-type RelWithDebInfo build-memgraph --community" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd --build-type RelWithDebInfo test-memgraph unit" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd package" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd copy --package" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd stop --remove" +} + +check_support() { + local is_supported=false + case "$1" in + arch) + for e in "${SUPPORTED_ARCHS[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: Architecture $2 isn't supported!\nChoose from ${SUPPORTED_ARCHS[*]}" + exit 1 + fi + ;; + build_type) + for e in "${SUPPORTED_BUILD_TYPES[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: Build type $2 isn't supported!\nChoose from ${SUPPORTED_BUILD_TYPES[*]}" + exit 1 + fi + ;; + os) + for e in "${SUPPORTED_OS[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: OS $2 isn't supported!\nChoose from ${SUPPORTED_OS[*]}" + exit 1 + fi + ;; + toolchain) + for e in "${SUPPORTED_TOOLCHAINS[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "TError: oolchain version $2 isn't supported!\nChoose from ${SUPPORTED_TOOLCHAINS[*]}" + exit 1 + fi + ;; + os_toolchain_combo) + if [[ "$3" == "v4" ]]; then + local SUPPORTED_OS_TOOLCHAIN=("${SUPPORTED_OS_V4[@]}") + elif [[ "$3" == "v5" ]]; then + local SUPPORTED_OS_TOOLCHAIN=("${SUPPORTED_OS_V5[@]}") + else + echo -e "Error: $3 isn't a supported toolchain_version!\nChoose from ${SUPPORTED_TOOLCHAINS[*]}" + exit 1 + fi + for e in "${SUPPORTED_OS_TOOLCHAIN[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: Toolchain version $3 doesn't support OS $2!\nChoose from ${SUPPORTED_OS_TOOLCHAIN[*]}" + exit 1 + fi + ;; + *) + echo -e "Error: This function can only check arch, build_type, os, toolchain version and os toolchain combination" + exit 1 + ;; + esac +} + + +################################################## +######## BUILD, COPY AND PACKAGE MEMGRAPH ######## +################################################## +build_memgraph () { + local build_container="mgbuild_${toolchain_version}_${os}" + local ACTIVATE_TOOLCHAIN="source /opt/toolchain-${toolchain_version}/activate" + local ACTIVATE_CARGO="source $MGBUILD_HOME_DIR/.cargo/env" + local container_build_dir="$MGBUILD_ROOT_DIR/build" + local container_output_dir="$container_build_dir/output" + local arm_flag="" + if [[ "$arch" == "arm" ]] || [[ "$os" =~ "-arm" ]]; then + arm_flag="-DMG_ARCH="ARM64"" + fi + local build_type_flag="-DCMAKE_BUILD_TYPE=$build_type" + local telemetry_id_override_flag="" + local community_flag="" + local coverage_flag="" + local asan_flag="" + local ubsan_flag="" + local init_only=false + local for_docker=false + local for_platform=false + local copy_from_host=true + while [[ "$#" -gt 0 ]]; do + case "$1" in + --community) + community_flag="-DMG_ENTERPRISE=OFF" + shift 1 + ;; + --init-only) + init_only=true + shift 1 + ;; + --for-docker) + for_docker=true + if [[ "$for_platform" == "true" ]]; then + echo "Error: Cannot combine --for-docker and --for-platform flags" + exit 1 + fi + telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER " + shift 1 + ;; + --for-platform) + for_platform=true + if [[ "$for_docker" == "true" ]]; then + echo "Error: Cannot combine --for-docker and --for-platform flags" + exit 1 + fi + telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER-PLATFORM " + shift 1 + ;; + --coverage) + coverage_flag="-DTEST_COVERAGE=ON" + shift 1 + ;; + --asan) + asan_flag="-DASAN=ON" + shift 1 + ;; + --ubsan) + ubsan_flag="-DUBSAN=ON" + shift 1 + ;; + --no-copy) + copy_from_host=false + shift 1 + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac + done + + echo "Initializing deps ..." + # If master is not the current branch, fetch it, because the get_version + # script depends on it. If we are on master, the fetch command is going to + # fail so that's why there is the explicit check. + # Required here because Docker build container can't access remote. + cd "$PROJECT_ROOT" + if [[ "$(git rev-parse --abbrev-ref HEAD)" != "master" ]]; then + git fetch origin master:master + fi + + if [[ "$copy_from_host" == "true" ]]; then + # Ensure we have a clean build directory + docker exec -u mg "$build_container" bash -c "rm -rf $MGBUILD_ROOT_DIR && mkdir -p $MGBUILD_ROOT_DIR" + echo "Copying project files..." + docker cp "$PROJECT_ROOT/." "$build_container:$MGBUILD_ROOT_DIR/" + fi + # Change ownership of copied files so the mg user inside container can access them + docker exec -u root $build_container bash -c "chown -R mg:mg $MGBUILD_ROOT_DIR" + + echo "Installing dependencies using '/memgraph/environment/os/$os.sh' script..." + docker exec -u root "$build_container" bash -c "$MGBUILD_ROOT_DIR/environment/os/$os.sh check TOOLCHAIN_RUN_DEPS || /environment/os/$os.sh install TOOLCHAIN_RUN_DEPS" + docker exec -u root "$build_container" bash -c "$MGBUILD_ROOT_DIR/environment/os/$os.sh check MEMGRAPH_BUILD_DEPS || /environment/os/$os.sh install MEMGRAPH_BUILD_DEPS" + + echo "Building targeted package..." + # Fix issue with git marking directory as not safe + docker exec -u mg "$build_container" bash -c "cd $MGBUILD_ROOT_DIR && git config --global --add safe.directory '*'" + docker exec -u mg "$build_container" bash -c "cd $MGBUILD_ROOT_DIR && $ACTIVATE_TOOLCHAIN && ./init --ci" + if [[ "$init_only" == "true" ]]; then + return + fi + + echo "Building Memgraph for $os on $build_container..." + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && rm -rf ./*" + # Fix cmake failing locally if remote is clone via ssh + docker exec -u mg "$build_container" bash -c "cd $MGBUILD_ROOT_DIR && git remote set-url origin https://github.com/memgraph/memgraph.git" + + # Define cmake command + local cmake_cmd="cmake $build_type_flag $arm_flag $community_flag $telemetry_id_override_flag $coverage_flag $asan_flag $ubsan_flag .." + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO && $cmake_cmd" + + # ' is used instead of " because we need to run make within the allowed + # container resources. + # Default value for $threads is 0 instead of $(nproc) because macos + # doesn't support the nproc command. + # 0 is set for default value and checked here because mgbuild containers + # support nproc + # shellcheck disable=SC2016 + if [[ "$threads" == 0 ]]; then + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO "'&& make -j$(nproc)' + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO "'&& make -j$(nproc) -B mgconsole' + else + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO "'&& make -j$threads' + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO "'&& make -j$threads -B mgconsole' + fi +} + +package_memgraph() { + local ACTIVATE_TOOLCHAIN="source /opt/toolchain-${toolchain_version}/activate" + local build_container="mgbuild_${toolchain_version}_${os}" + local container_output_dir="$MGBUILD_ROOT_DIR/build/output" + local package_command="" + if [[ "$os" =~ ^"centos".* ]] || [[ "$os" =~ ^"fedora".* ]] || [[ "$os" =~ ^"amzn".* ]] || [[ "$os" =~ ^"rocky".* ]]; then + docker exec -u root "$build_container" bash -c "yum -y update" + package_command=" cpack -G RPM --config ../CPackConfig.cmake && rpmlint --file='../../release/rpm/rpmlintrc' memgraph*.rpm " + fi + if [[ "$os" =~ ^"debian".* ]]; then + docker exec -u root "$build_container" bash -c "apt --allow-releaseinfo-change -y update" + package_command=" cpack -G DEB --config ../CPackConfig.cmake " + fi + if [[ "$os" =~ ^"ubuntu".* ]]; then + docker exec -u root "$build_container" bash -c "apt update" + package_command=" cpack -G DEB --config ../CPackConfig.cmake " + fi + docker exec -u mg "$build_container" bash -c "mkdir -p $container_output_dir && cd $container_output_dir && $ACTIVATE_TOOLCHAIN && $package_command" +} + +copy_memgraph() { + local build_container="mgbuild_${toolchain_version}_${os}" + case "$1" in + --binary) + echo "Copying memgraph binary to host..." + local container_output_path="$MGBUILD_ROOT_DIR/build/memgraph" + local host_output_path="$PROJECT_ROOT/build/memgraph" + mkdir -p "$PROJECT_ROOT/build" + docker cp -L $build_container:$container_output_path $host_output_path + echo "Binary saved to $host_output_path" + ;; + --build-logs) + echo "Copying memgraph build logs to host..." + local container_output_path="$MGBUILD_ROOT_DIR/build/logs" + local host_output_path="$PROJECT_ROOT/build/logs" + mkdir -p "$PROJECT_ROOT/build" + docker cp -L $build_container:$container_output_path $host_output_path + echo "Build logs saved to $host_output_path" + ;; + --package) + echo "Copying memgraph package to host..." + local container_output_dir="$MGBUILD_ROOT_DIR/build/output" + local host_output_dir="$PROJECT_ROOT/build/output/$os" + local last_package_name=$(docker exec -u mg "$build_container" bash -c "cd $container_output_dir && ls -t memgraph* | head -1") + mkdir -p "$host_output_dir" + docker cp "$build_container:$container_output_dir/$last_package_name" "$host_output_dir/$last_package_name" + echo "Package saved to $host_output_dir/$last_package_name" + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac +} + + +################################################## +##################### TESTS ###################### +################################################## +test_memgraph() { + local ACTIVATE_TOOLCHAIN="source /opt/toolchain-${toolchain_version}/activate" + local ACTIVATE_VENV="./setup.sh /opt/toolchain-${toolchain_version}/activate" + local ACTIVATE_CARGO="source $MGBUILD_HOME_DIR/.cargo/env" + local EXPORT_LICENSE="export MEMGRAPH_ENTERPRISE_LICENSE=$enterprise_license" + local EXPORT_ORG_NAME="export MEMGRAPH_ORGANIZATION_NAME=$organization_name" + local BUILD_DIR="$MGBUILD_ROOT_DIR/build" + local build_container="mgbuild_${toolchain_version}_${os}" + echo "Running $1 test on $build_container..." + + case "$1" in + unit) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $BUILD_DIR && $ACTIVATE_TOOLCHAIN "'&& ctest -R memgraph__unit --output-on-failure -j$threads' + ;; + unit-coverage) + local setup_lsan_ubsan="export LSAN_OPTIONS=suppressions=$BUILD_DIR/../tools/lsan.supp && export UBSAN_OPTIONS=halt_on_error=1" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $BUILD_DIR && $ACTIVATE_TOOLCHAIN && $setup_lsan_ubsan "'&& ctest -R memgraph__unit --output-on-failure -j2' + ;; + leftover-CTest) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $BUILD_DIR && $ACTIVATE_TOOLCHAIN "'&& ctest -E "(memgraph__unit|memgraph__benchmark)" --output-on-failure' + ;; + drivers) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& ./tests/drivers/run.sh' + ;; + integration) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& tests/integration/run.sh' + ;; + cppcheck-and-clang-format) + local test_output_path="$MGBUILD_ROOT_DIR/tools/github/cppcheck_and_clang_format.txt" + local test_output_host_dest="$PROJECT_ROOT/tools/github/cppcheck_and_clang_format.txt" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tools/github && $ACTIVATE_TOOLCHAIN "'&& ./cppcheck_and_clang_format diff' + docker cp $build_container:$test_output_path $test_output_host_dest + ;; + stress-plain) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/stress && source ve3/bin/activate "'&& ./continuous_integration' + ;; + stress-ssl) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/stress && source ve3/bin/activate "'&& ./continuous_integration --use-ssl' + ;; + durability) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/stress && source ve3/bin/activate "'&& python3 durability --num-steps 5' + ;; + gql-behave) + local test_output_dir="$MGBUILD_ROOT_DIR/tests/gql_behave" + local test_output_host_dest="$PROJECT_ROOT/tests/gql_behave" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests && $ACTIVATE_VENV && cd $MGBUILD_ROOT_DIR/tests/gql_behave "'&& ./continuous_integration' + docker cp $build_container:$test_output_dir/gql_behave_status.csv $test_output_host_dest/gql_behave_status.csv + docker cp $build_container:$test_output_dir/gql_behave_status.html $test_output_host_dest/gql_behave_status.html + ;; + macro-benchmark) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && export USER=mg && export LANG=$(echo $LANG) && cd $MGBUILD_ROOT_DIR/tests/macro_benchmark "'&& ./harness QuerySuite MemgraphRunner --groups aggregation 1000_create unwind_create dense_expand match --no-strict' + ;; + mgbench) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/mgbench "'&& ./benchmark.py vendor-native --num-workers-for-benchmark 12 --export-results benchmark_result.json pokec/medium/*/*' + ;; + upload-to-bench-graph) + shift 1 + local SETUP_PASSED_ARGS="export PASSED_ARGS=\"$@\"" + local SETUP_VE3_ENV="virtualenv -p python3 ve3 && source ve3/bin/activate && pip install -r requirements.txt" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tools/bench-graph-client && $SETUP_VE3_ENV && $SETUP_PASSED_ARGS "'&& ./main.py $PASSED_ARGS' + ;; + code-analysis) + shift 1 + local SETUP_PASSED_ARGS="export PASSED_ARGS=\"$@\"" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/code_analysis && $SETUP_PASSED_ARGS "'&& ./python_code_analysis.sh $PASSED_ARGS' + ;; + code-coverage) + local test_output_path="$MGBUILD_ROOT_DIR/tools/github/generated/code_coverage.tar.gz" + local test_output_host_dest="$PROJECT_ROOT/tools/github/generated/code_coverage.tar.gz" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && $ACTIVATE_TOOLCHAIN && cd $MGBUILD_ROOT_DIR/tools/github "'&& ./coverage_convert' + docker exec -u mg $build_container bash -c "cd $MGBUILD_ROOT_DIR/tools/github/generated && tar -czf code_coverage.tar.gz coverage.json html report.json summary.rmu" + mkdir -p $PROJECT_ROOT/tools/github/generated + docker cp $build_container:$test_output_path $test_output_host_dest + ;; + clang-tidy) + shift 1 + local SETUP_PASSED_ARGS="export PASSED_ARGS=\"$@\"" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && export THREADS=$threads && $ACTIVATE_TOOLCHAIN && cd $MGBUILD_ROOT_DIR/tests/code_analysis && $SETUP_PASSED_ARGS "'&& ./clang_tidy.sh $PASSED_ARGS' + ;; + e2e) + # local kafka_container="kafka_kafka_1" + # local kafka_hostname="kafka" + # local pulsar_container="pulsar_pulsar_1" + # local pulsar_hostname="pulsar" + # local setup_hostnames="export KAFKA_HOSTNAME=$kafka_hostname && PULSAR_HOSTNAME=$pulsar_hostname" + # local build_container_network=$(docker inspect $build_container --format='{{ .HostConfig.NetworkMode }}') + # docker network connect --alias $kafka_hostname $build_container_network $kafka_container > /dev/null 2>&1 || echo "Kafka container already inside correct network or something went wrong ..." + # docker network connect --alias $pulsar_hostname $build_container_network $pulsar_container > /dev/null 2>&1 || echo "Kafka container already inside correct network or something went wrong ..." + docker exec -u mg $build_container bash -c "pip install --user networkx && pip3 install --user networkx" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && $ACTIVATE_CARGO && cd $MGBUILD_ROOT_DIR/tests && $ACTIVATE_VENV && source ve3/bin/activate_e2e && cd $MGBUILD_ROOT_DIR/tests/e2e "'&& ./run.sh' + ;; + *) + echo "Error: Unknown test '$1'" + exit 1 + ;; + esac +} + + +################################################## +################### PARSE ARGS ################### +################################################## +if [ "$#" -eq 0 ] || [ "$1" == "-h" ] || [ "$1" == "--help" ]; then + print_help + exit 0 +fi +arch=$DEFAULT_ARCH +build_type=$DEFAULT_BUILD_TYPE +enterprise_license=$DEFAULT_ENTERPRISE_LICENSE +organization_name=$DEFAULT_ORGANIZATION_NAME +os=$DEFAULT_OS +threads=$DEFAULT_THREADS +toolchain_version=$DEFAULT_TOOLCHAIN +command="" +while [[ $# -gt 0 ]]; do + case "$1" in + --arch) + arch=$2 + check_support arch $arch + shift 2 + ;; + --build-type) + build_type=$2 + check_support build_type $build_type + shift 2 + ;; + --enterprise-license) + enterprise_license=$2 + shift 2 + ;; + --organization-name) + organization_name=$2 + shift 2 + ;; + --os) + os=$2 + check_support os $os + shift 2 + ;; + --threads) + threads=$2 + shift 2 + ;; + --toolchain) + toolchain_version=$2 + check_support toolchain $toolchain_version + shift 2 + ;; + *) + if [[ "$1" =~ ^--.* ]]; then + echo -e "Error: Unknown option '$1'" + exit 1 + else + command=$1 + shift 1 + break + fi + ;; + esac +done +check_support os_toolchain_combo $os $toolchain_version + +if [[ "$command" == "" ]]; then + echo -e "Error: Command not provided, please provide command" + print_help + exit 1 +fi + +if docker compose version > /dev/null 2>&1; then + docker_compose_cmd="docker compose" +elif which docker-compose > /dev/null 2>&1; then + docker_compose_cmd="docker-compose" +else + echo -e "Missing command: There has to be installed either 'docker-compose' or 'docker compose'" + exit 1 +fi +echo "Using $docker_compose_cmd" + +################################################## +################# PARSE COMMAND ################## +################################################## +case $command in + build) + cd $SCRIPT_DIR + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml build + else + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml build mgbuild_${toolchain_version}_${os} + fi + ;; + run) + cd $SCRIPT_DIR + pull=false + if [[ "$#" -gt 0 ]]; then + if [[ "$1" == "--pull" ]]; then + pull=true + else + echo "Error: Unknown flag '$1'" + exit 1 + fi + fi + if [[ "$os" == "all" ]]; then + if [[ "$pull" == "true" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures + elif [[ "$docker_compose_cmd" == "docker compose" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures --policy missing + fi + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml up -d + else + if [[ "$pull" == "true" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull mgbuild_${toolchain_version}_${os} + elif ! docker image inspect memgraph/mgbuild:${toolchain_version}_${os} > /dev/null 2>&1; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures mgbuild_${toolchain_version}_${os} + fi + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml up -d mgbuild_${toolchain_version}_${os} + fi + ;; + stop) + cd $SCRIPT_DIR + remove=false + if [[ "$#" -gt 0 ]]; then + if [[ "$1" == "--remove" ]]; then + remove=true + else + echo "Error: Unknown flag '$1'" + exit 1 + fi + fi + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml down + else + docker stop mgbuild_${toolchain_version}_${os} + if [[ "$remove" == "true" ]]; then + docker rm mgbuild_${toolchain_version}_${os} + fi + fi + ;; + pull) + cd $SCRIPT_DIR + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures + else + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull mgbuild_${toolchain_version}_${os} + fi + ;; + push) + docker login $@ + cd $SCRIPT_DIR + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml push --ignore-push-failures + else + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml push mgbuild_${toolchain_version}_${os} + fi + ;; + build-memgraph) + build_memgraph $@ + ;; + package-memgraph) + package_memgraph + ;; + test-memgraph) + test_memgraph $@ + ;; + copy) + copy_memgraph $@ + ;; + *) + echo "Error: Unknown command '$command'" + exit 1 + ;; +esac diff --git a/release/package/rocky-9.3/Dockerfile b/release/package/rocky-9.3/Dockerfile new file mode 100644 index 000000000..4ee3a0d78 --- /dev/null +++ b/release/package/rocky-9.3/Dockerfile @@ -0,0 +1,40 @@ +FROM rockylinux:9.3 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +RUN yum -y update \ + && yum install -y wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/rocky-9.3.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/rocky-9.3.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9.3) +RUN pip3 install --user PyYAML + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/run.sh b/release/package/run.sh deleted file mode 100755 index bbd5ff48a..000000000 --- a/release/package/run.sh +++ /dev/null @@ -1,208 +0,0 @@ -#!/bin/bash - -set -Eeuo pipefail - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -SUPPORTED_OS=( - centos-7 centos-9 - debian-10 debian-11 debian-11-arm - ubuntu-18.04 ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm - fedora-36 - amzn-2 -) - -SUPPORTED_BUILD_TYPES=( - Debug - Release - RelWithDebInfo -) - -PROJECT_ROOT="$SCRIPT_DIR/../.." -TOOLCHAIN_VERSION="toolchain-v4" -ACTIVATE_TOOLCHAIN="source /opt/${TOOLCHAIN_VERSION}/activate" -HOST_OUTPUT_DIR="$PROJECT_ROOT/build/output" - -print_help () { - # TODO(gitbuda): Update the release/package/run.sh help - echo "$0 init|package|docker|test {os} {build_type} [--for-docker|--for-platform]" - echo "" - echo " OSs: ${SUPPORTED_OS[*]}" - echo " Build types: ${SUPPORTED_BUILD_TYPES[*]}" - exit 1 -} - -make_package () { - os="$1" - build_type="$2" - - build_container="mgbuild_$os" - echo "Building Memgraph for $os on $build_container..." - - package_command="" - if [[ "$os" =~ ^"centos".* ]] || [[ "$os" =~ ^"fedora".* ]] || [[ "$os" =~ ^"amzn".* ]]; then - docker exec "$build_container" bash -c "yum -y update" - package_command=" cpack -G RPM --config ../CPackConfig.cmake && rpmlint --file='../../release/rpm/rpmlintrc' memgraph*.rpm " - fi - if [[ "$os" =~ ^"debian".* ]]; then - docker exec "$build_container" bash -c "apt --allow-releaseinfo-change -y update" - package_command=" cpack -G DEB --config ../CPackConfig.cmake " - fi - if [[ "$os" =~ ^"ubuntu".* ]]; then - docker exec "$build_container" bash -c "apt update" - package_command=" cpack -G DEB --config ../CPackConfig.cmake " - fi - telemetry_id_override_flag="" - if [[ "$#" -gt 2 ]]; then - if [[ "$3" == "--for-docker" ]]; then - telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER " - elif [[ "$3" == "--for-platform" ]]; then - telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER-PLATFORM" - else - print_help - exit - fi - fi - - echo "Copying project files..." - # If master is not the current branch, fetch it, because the get_version - # script depends on it. If we are on master, the fetch command is going to - # fail so that's why there is the explicit check. - # Required here because Docker build container can't access remote. - cd "$PROJECT_ROOT" - if [[ "$(git rev-parse --abbrev-ref HEAD)" != "master" ]]; then - git fetch origin master:master - fi - - # Ensure we have a clean build directory - docker exec "$build_container" rm -rf /memgraph - - docker exec "$build_container" mkdir -p /memgraph - # TODO(gitbuda): Revisit copying the whole repo -> makese sense under CI. - docker cp "$PROJECT_ROOT/." "$build_container:/memgraph/" - - container_build_dir="/memgraph/build" - container_output_dir="$container_build_dir/output" - - # TODO(gitbuda): TOOLCHAIN_RUN_DEPS should be installed during the Docker - # image build phase, but that is not easy at this point because the - # environment/os/{os}.sh does not come within the toolchain package. When - # migrating to the next version of toolchain do that, and remove the - # TOOLCHAIN_RUN_DEPS installation from here. - # TODO(gitbuda): On the other side, having this here allows updating deps - # wihout reruning the build containers. - echo "Installing dependencies using '/memgraph/environment/os/$os.sh' script..." - docker exec "$build_container" bash -c "/memgraph/environment/os/$os.sh install TOOLCHAIN_RUN_DEPS" - docker exec "$build_container" bash -c "/memgraph/environment/os/$os.sh install MEMGRAPH_BUILD_DEPS" - - echo "Building targeted package..." - # Fix issue with git marking directory as not safe - docker exec "$build_container" bash -c "cd /memgraph && git config --global --add safe.directory '*'" - docker exec "$build_container" bash -c "cd /memgraph && $ACTIVATE_TOOLCHAIN && ./init" - docker exec "$build_container" bash -c "cd $container_build_dir && rm -rf ./*" - # TODO(gitbuda): cmake fails locally if remote is clone via ssh because of the key -> FIX - if [[ "$os" =~ "-arm" ]]; then - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && cmake -DCMAKE_BUILD_TYPE=$build_type -DMG_ARCH="ARM64" $telemetry_id_override_flag .." - else - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && cmake -DCMAKE_BUILD_TYPE=$build_type $telemetry_id_override_flag .." - fi - # ' is used instead of " because we need to run make within the allowed - # container resources. - # shellcheck disable=SC2016 - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc)' - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc) -B mgconsole' - docker exec "$build_container" bash -c "mkdir -p $container_output_dir && cd $container_output_dir && $ACTIVATE_TOOLCHAIN && $package_command" - - echo "Copying targeted package to host..." - last_package_name=$(docker exec "$build_container" bash -c "cd $container_output_dir && ls -t memgraph* | head -1") - # The operating system folder is introduced because multiple different - # packages could be preserved during the same build "session". - mkdir -p "$HOST_OUTPUT_DIR/$os" - package_host_destination="$HOST_OUTPUT_DIR/$os/$last_package_name" - docker cp "$build_container:$container_output_dir/$last_package_name" "$package_host_destination" - echo "Package saved to $package_host_destination." -} - -case "$1" in - init) - cd "$SCRIPT_DIR" - if ! which "docker-compose" >/dev/null; then - docker_compose_cmd="docker compose" - else - docker_compose_cmd="docker-compose" - fi - $docker_compose_cmd build --build-arg TOOLCHAIN_VERSION="${TOOLCHAIN_VERSION}" - $docker_compose_cmd up -d - ;; - - docker) - # NOTE: Docker is build on top of Debian 11 package. - based_on_os="debian-11" - # shellcheck disable=SC2012 - last_package_name=$(cd "$HOST_OUTPUT_DIR/$based_on_os" && ls -t memgraph* | head -1) - docker_build_folder="$PROJECT_ROOT/release/docker" - cd "$docker_build_folder" - ./package_docker --latest "$HOST_OUTPUT_DIR/$based_on_os/$last_package_name" - # shellcheck disable=SC2012 - docker_image_name=$(cd "$docker_build_folder" && ls -t memgraph* | head -1) - docker_host_folder="$HOST_OUTPUT_DIR/docker" - docker_host_image_path="$docker_host_folder/$docker_image_name" - mkdir -p "$docker_host_folder" - cp "$docker_build_folder/$docker_image_name" "$docker_host_image_path" - echo "Docker images saved to $docker_host_image_path." - ;; - - package) - shift 1 - if [[ "$#" -lt 2 ]]; then - print_help - fi - os="$1" - build_type="$2" - shift 2 - is_os_ok=false - for supported_os in "${SUPPORTED_OS[@]}"; do - if [[ "$supported_os" == "${os}" ]]; then - is_os_ok=true - break - fi - done - is_build_type_ok=false - for supported_build_type in "${SUPPORTED_BUILD_TYPES[@]}"; do - if [[ "$supported_build_type" == "${build_type}" ]]; then - is_build_type_ok=true - break - fi - done - if [[ "$is_os_ok" == true && "$is_build_type_ok" == true ]]; then - make_package "$os" "$build_type" "$@" - else - if [[ "$is_os_ok" == false ]]; then - echo "Unsupported OS: $os" - elif [[ "$is_build_type_ok" == false ]]; then - echo "Unsupported build type: $build_type" - fi - print_help - fi - ;; - - build) - shift 1 - if [[ "$#" -ne 2 ]]; then - print_help - fi - # in the vX format, e.g. v5 - toolchain_version="$1" - # a name of the os folder, e.g. ubuntu-22.04-arm - os="$2" - cd "$SCRIPT_DIR/$os" - docker build -f Dockerfile --build-arg TOOLCHAIN_VERSION="toolchain-$toolchain_version" -t "memgraph/memgraph-builder:${toolchain_version}_$os" . - ;; - - test) - echo "TODO(gitbuda): Test all packages on mgtest containers." - ;; - - *) - print_help - ;; -esac diff --git a/release/package/ubuntu-18.04/Dockerfile b/release/package/ubuntu-18.04/Dockerfile index 97eceead4..b4b090984 100644 --- a/release/package/ubuntu-18.04/Dockerfile +++ b/release/package/ubuntu-18.04/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-18.04.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-18.04.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/ubuntu-20.04/Dockerfile b/release/package/ubuntu-20.04/Dockerfile index 9fea7fd79..30d6cda8e 100644 --- a/release/package/ubuntu-20.04/Dockerfile +++ b/release/package/ubuntu-20.04/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-20.04.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-20.04.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/ubuntu-22.04-arm/Dockerfile b/release/package/ubuntu-22.04-arm/Dockerfile index 56cac8f76..aa7b5b63f 100644 --- a/release/package/ubuntu-22.04-arm/Dockerfile +++ b/release/package/ubuntu-22.04-arm/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-22.04-arm.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-22.04-arm.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/ubuntu-22.04/Dockerfile b/release/package/ubuntu-22.04/Dockerfile index 6bd22589b..652de0f5c 100644 --- a/release/package/ubuntu-22.04/Dockerfile +++ b/release/package/ubuntu-22.04/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-22.04.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-22.04.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4d5d523c6..af88e624a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -45,7 +45,7 @@ set(mg_single_node_v2_sources add_executable(memgraph ${mg_single_node_v2_sources}) target_include_directories(memgraph PUBLIC ${CMAKE_SOURCE_DIR}/include) target_link_libraries(memgraph stdc++fs Threads::Threads - mg-telemetry mg-communication mg-communication-metrics mg-memory mg-utils mg-license mg-settings mg-glue mg-flags mg::system mg::replication_handler) + mg-telemetry mgcxx_text_search tantivy_text_search mg-communication mg-communication-metrics mg-memory mg-utils mg-license mg-settings mg-glue mg-flags mg::system mg::replication_handler) # NOTE: `include/mg_procedure.syms` describes a pattern match for symbols which # should be dynamically exported, so that `dlopen` can correctly link th diff --git a/src/coordination/raft_state.cpp b/src/coordination/raft_state.cpp index 38acfd85e..fd93160b6 100644 --- a/src/coordination/raft_state.cpp +++ b/src/coordination/raft_state.cpp @@ -12,6 +12,7 @@ #ifdef MG_ENTERPRISE #include <chrono> +#include <spdlog/spdlog.h> #include "coordination/coordinator_config.hpp" #include "coordination/coordinator_exceptions.hpp" #include "coordination/raft_state.hpp" @@ -123,7 +124,7 @@ auto RaftState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_po spdlog::info("Request to add server {} to the cluster accepted", endpoint); } else { throw RaftAddServerException("Failed to accept request to add server {} to the cluster with error code {}", - endpoint, cmd_result->get_result_code()); + endpoint, int(cmd_result->get_result_code())); } // Waiting for server to join @@ -173,7 +174,8 @@ auto RaftState::AppendRegisterReplicationInstanceLog(CoordinatorClientConfig con spdlog::info("Request for registering instance {} accepted", config.instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { - spdlog::error("Failed to register instance {} with error code {}", config.instance_name, res->get_result_code()); + spdlog::error("Failed to register instance {} with error code {}", config.instance_name, + int(res->get_result_code())); return false; } @@ -194,7 +196,7 @@ auto RaftState::AppendUnregisterReplicationInstanceLog(std::string_view instance spdlog::info("Request for unregistering instance {} accepted", instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { - spdlog::error("Failed to unregister instance {} with error code {}", instance_name, res->get_result_code()); + spdlog::error("Failed to unregister instance {} with error code {}", instance_name, int(res->get_result_code())); return false; } return true; @@ -214,7 +216,7 @@ auto RaftState::AppendSetInstanceAsMainLog(std::string_view instance_name) -> bo spdlog::info("Request for promoting instance {} accepted", instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { - spdlog::error("Failed to promote instance {} with error code {}", instance_name, res->get_result_code()); + spdlog::error("Failed to promote instance {} with error code {}", instance_name, int(res->get_result_code())); return false; } return true; @@ -233,7 +235,7 @@ auto RaftState::AppendSetInstanceAsReplicaLog(std::string_view instance_name) -> spdlog::info("Request for demoting instance {} accepted", instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { - spdlog::error("Failed to promote instance {} with error code {}", instance_name, res->get_result_code()); + spdlog::error("Failed to promote instance {} with error code {}", instance_name, int(res->get_result_code())); return false; } @@ -252,7 +254,7 @@ auto RaftState::AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool { spdlog::info("Request for updating UUID accepted"); if (res->get_result_code() != nuraft::cmd_result_code::OK) { - spdlog::error("Failed to update UUID with error code {}", res->get_result_code()); + spdlog::error("Failed to update UUID with error code {}", int(res->get_result_code())); return false; } diff --git a/src/csv/include/csv/parsing.hpp b/src/csv/include/csv/parsing.hpp index 66f2913c8..0accc616d 100644 --- a/src/csv/include/csv/parsing.hpp +++ b/src/csv/include/csv/parsing.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -119,6 +119,8 @@ class Reader { auto GetHeader() const -> Header const &; auto GetNextRow(utils::MemoryResource *mem) -> std::optional<Row>; + void Reset(); + private: // Some implementation issues that need clearing up, but this is mainly because // I don't want `boost/iostreams/filtering_stream.hpp` included in this header file diff --git a/src/csv/parsing.cpp b/src/csv/parsing.cpp index 6d03dc7fd..6961a42e4 100644 --- a/src/csv/parsing.cpp +++ b/src/csv/parsing.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -34,6 +34,10 @@ struct Reader::impl { [[nodiscard]] bool HasHeader() const { return read_config_.with_header; } [[nodiscard]] auto Header() const -> Header const & { return header_; } + void Reset() { + line_buffer_.clear(); + line_buffer_.shrink_to_fit(); + } auto GetNextRow(utils::MemoryResource *mem) -> std::optional<Reader::Row>; @@ -42,7 +46,7 @@ struct Reader::impl { void TryInitializeHeader(); - std::optional<utils::pmr::string> GetNextLine(utils::MemoryResource *mem); + bool GetNextLine(); ParsingResult ParseHeader(); @@ -55,6 +59,8 @@ struct Reader::impl { Config read_config_; uint64_t line_count_{1}; uint16_t number_of_columns_{0}; + uint64_t estimated_number_of_columns_{0}; + utils::pmr::string line_buffer_{memory_}; Reader::Header header_{memory_}; }; @@ -129,17 +135,16 @@ void Reader::impl::InitializeStream() { MG_ASSERT(csv_stream_.is_complete(), "Should be 'complete' for correct operation"); } -std::optional<utils::pmr::string> Reader::impl::GetNextLine(utils::MemoryResource *mem) { - utils::pmr::string line(mem); - if (!std::getline(csv_stream_, line)) { +bool Reader::impl::GetNextLine() { + if (!std::getline(csv_stream_, line_buffer_)) { // reached end of file or an I/0 error occurred if (!csv_stream_.good()) { csv_stream_.reset(); // this will close the file_stream_ and clear the chain } - return std::nullopt; + return false; } ++line_count_; - return std::move(line); + return true; } Reader::ParsingResult Reader::impl::ParseHeader() { @@ -170,6 +175,8 @@ void Reader::impl::TryInitializeHeader() { const Reader::Header &Reader::GetHeader() const { return pimpl->Header(); } +void Reader::Reset() { pimpl->Reset(); } + namespace { enum class CsvParserState : uint8_t { INITIAL_FIELD, NEXT_FIELD, QUOTING, EXPECT_DELIMITER, DONE }; @@ -179,6 +186,8 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { utils::pmr::vector<utils::pmr::string> row(mem); if (number_of_columns_ != 0) { row.reserve(number_of_columns_); + } else if (estimated_number_of_columns_ != 0) { + row.reserve(estimated_number_of_columns_); } utils::pmr::string column(memory_); @@ -186,13 +195,12 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { auto state = CsvParserState::INITIAL_FIELD; do { - const auto maybe_line = GetNextLine(mem); - if (!maybe_line) { + if (!GetNextLine()) { // The whole file was processed. break; } - std::string_view line_string_view = *maybe_line; + std::string_view line_string_view = line_buffer_; // remove '\r' from the end in case we have dos file format if (line_string_view.back() == '\r') { @@ -312,6 +320,11 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { fmt::format("Expected {:d} columns in row {:d}, but got {:d}", number_of_columns_, line_count_ - 1, row.size())); } + // To avoid unessisary dynamic growth of the row, remember the number of + // columns for future calls + if (number_of_columns_ == 0 && estimated_number_of_columns_ == 0) { + estimated_number_of_columns_ = row.size(); + } return std::move(row); } @@ -319,7 +332,7 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { std::optional<Reader::Row> Reader::impl::GetNextRow(utils::MemoryResource *mem) { auto row = ParseRow(mem); - if (row.HasError()) { + if (row.HasError()) [[unlikely]] { if (!read_config_.ignore_bad) { throw CsvReadException("CSV Reader: Bad row at line {:d}: {}", line_count_ - 1, row.GetError().message); } @@ -333,7 +346,7 @@ std::optional<Reader::Row> Reader::impl::GetNextRow(utils::MemoryResource *mem) } while (row.HasError()); } - if (row->empty()) { + if (row->empty()) [[unlikely]] { // reached end of file return std::nullopt; } diff --git a/src/dbms/dbms_handler.hpp b/src/dbms/dbms_handler.hpp index b0bbd5758..482423ebf 100644 --- a/src/dbms/dbms_handler.hpp +++ b/src/dbms/dbms_handler.hpp @@ -311,7 +311,7 @@ class DbmsHandler { stats.triggers += info.triggers; stats.streams += info.streams; ++stats.num_databases; - stats.indices += storage_info.label_indices + storage_info.label_property_indices; + stats.indices += storage_info.label_indices + storage_info.label_property_indices + storage_info.text_indices; stats.constraints += storage_info.existence_constraints + storage_info.unique_constraints; ++stats.storage_modes[(int)storage_info.storage_mode]; ++stats.isolation_levels[(int)storage_info.isolation_level]; diff --git a/src/dbms/inmemory/replication_handlers.cpp b/src/dbms/inmemory/replication_handlers.cpp index 3e4a31884..f9ce7a9d8 100644 --- a/src/dbms/inmemory/replication_handlers.cpp +++ b/src/dbms/inmemory/replication_handlers.cpp @@ -589,7 +589,6 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage if (timestamp < storage->timestamp_) { continue; } - SPDLOG_INFO(" Delta {}", applied_deltas); switch (delta.type) { case WalDeltaData::Type::VERTEX_CREATE: { @@ -616,6 +615,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage auto vertex = transaction->FindVertex(delta.vertex_add_remove_label.gid, View::NEW); if (!vertex) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); + // NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2) auto ret = vertex->AddLabel(transaction->NameToLabel(delta.vertex_add_remove_label.label)); if (ret.HasError() || !ret.GetValue()) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); @@ -628,18 +628,21 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage auto vertex = transaction->FindVertex(delta.vertex_add_remove_label.gid, View::NEW); if (!vertex) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); + // NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2) auto ret = vertex->RemoveLabel(transaction->NameToLabel(delta.vertex_add_remove_label.label)); if (ret.HasError() || !ret.GetValue()) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::VERTEX_SET_PROPERTY: { - spdlog::trace(" Vertex {} set property {} to {}", delta.vertex_edge_set_property.gid.AsUint(), - delta.vertex_edge_set_property.property, delta.vertex_edge_set_property.value); + spdlog::trace(" Vertex {} set property", delta.vertex_edge_set_property.gid.AsUint()); + // NOLINTNEXTLINE auto *transaction = get_transaction(timestamp); + // NOLINTNEXTLINE auto vertex = transaction->FindVertex(delta.vertex_edge_set_property.gid, View::NEW); if (!vertex) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); + // NOTE: Phase 1 of the text search feature doesn't have replication in scope auto ret = vertex->SetProperty(transaction->NameToProperty(delta.vertex_edge_set_property.property), delta.vertex_edge_set_property.value); if (ret.HasError()) @@ -684,8 +687,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage break; } case WalDeltaData::Type::EDGE_SET_PROPERTY: { - spdlog::trace(" Edge {} set property {} to {}", delta.vertex_edge_set_property.gid.AsUint(), - delta.vertex_edge_set_property.property, delta.vertex_edge_set_property.value); + spdlog::trace(" Edge {} set property", delta.vertex_edge_set_property.gid.AsUint()); if (!storage->config_.salient.items.properties_on_edges) throw utils::BasicException( "Can't set properties on edges because properties on edges " @@ -854,6 +856,14 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } + case WalDeltaData::Type::TEXT_INDEX_CREATE: { + // NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2) + break; + } + case WalDeltaData::Type::TEXT_INDEX_DROP: { + // NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2) + break; + } case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: { spdlog::trace(" Create existence constraint on :{} ({})", delta.operation_label_property.label, delta.operation_label_property.property); @@ -917,5 +927,4 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage spdlog::debug("Applied {} deltas", applied_deltas); return applied_deltas; } - } // namespace memgraph::dbms diff --git a/src/flags/experimental.cpp b/src/flags/experimental.cpp index 123903c96..8c29142a1 100644 --- a/src/flags/experimental.cpp +++ b/src/flags/experimental.cpp @@ -18,14 +18,15 @@ // Bolt server flags. // NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) -DEFINE_string(experimental_enabled, "", - "Experimental features to be used, comma seperated. Options [system-replication, high-availability]"); - +DEFINE_string( + experimental_enabled, "", + "Experimental features to be used, comma-separated. Options [system-replication, text-search, high-availability]"); using namespace std::string_view_literals; namespace memgraph::flags { auto const mapping = std::map{std::pair{"system-replication"sv, Experiments::SYSTEM_REPLICATION}, + std::pair{"text-search"sv, Experiments::TEXT_SEARCH}, std::pair{"high-availability"sv, Experiments::HIGH_AVAILABILITY}}; auto ExperimentsInstance() -> Experiments & { @@ -45,7 +46,7 @@ bool AreExperimentsEnabled(Experiments experiments) { void InitializeExperimental() { namespace rv = ranges::views; - auto const connonicalize_string = [](auto &&rng) { + auto const canonicalize_string = [](auto &&rng) { auto const is_space = [](auto c) { return c == ' '; }; auto const to_lower = [](unsigned char c) { return std::tolower(c); }; @@ -56,7 +57,7 @@ void InitializeExperimental() { auto const mapping_end = mapping.cend(); using underlying_type = std::underlying_type_t<Experiments>; auto to_set = underlying_type{}; - for (auto &&experiment : FLAGS_experimental_enabled | rv::split(',') | rv::transform(connonicalize_string)) { + for (auto &&experiment : FLAGS_experimental_enabled | rv::split(',') | rv::transform(canonicalize_string)) { if (auto it = mapping.find(experiment); it != mapping_end) { to_set |= static_cast<underlying_type>(it->second); } diff --git a/src/flags/experimental.hpp b/src/flags/experimental.hpp index 5a19889fe..0b209a4e8 100644 --- a/src/flags/experimental.hpp +++ b/src/flags/experimental.hpp @@ -23,7 +23,8 @@ namespace memgraph::flags { // old experiments can be reused once code cleanup has happened enum class Experiments : uint8_t { SYSTEM_REPLICATION = 1 << 0, - HIGH_AVAILABILITY = 1 << 1, + TEXT_SEARCH = 1 << 1, + HIGH_AVAILABILITY = 1 << 2, }; bool AreExperimentsEnabled(Experiments experiments); diff --git a/src/flags/general.cpp b/src/flags/general.cpp index cd2c95c60..37fa17b36 100644 --- a/src/flags/general.cpp +++ b/src/flags/general.cpp @@ -131,6 +131,10 @@ DEFINE_uint64(storage_recovery_thread_count, DEFINE_bool(storage_enable_schema_metadata, false, "Controls whether metadata should be collected about the resident labels and edge types."); +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_bool(storage_delta_on_identical_property_update, true, + "Controls whether updating a property with the same value should create a delta object."); + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DEFINE_bool(telemetry_enabled, false, "Set to true to enable telemetry. We collect information about the " diff --git a/src/flags/general.hpp b/src/flags/general.hpp index a1e8729ab..52f51471d 100644 --- a/src/flags/general.hpp +++ b/src/flags/general.hpp @@ -84,6 +84,8 @@ DECLARE_bool(storage_parallel_schema_recovery); DECLARE_uint64(storage_recovery_thread_count); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_bool(storage_enable_schema_metadata); +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +DECLARE_bool(storage_delta_on_identical_property_update); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_bool(telemetry_enabled); diff --git a/src/flags/run_time_configurable.cpp b/src/flags/run_time_configurable.cpp index a42ebd3d0..6c0fc54ac 100644 --- a/src/flags/run_time_configurable.cpp +++ b/src/flags/run_time_configurable.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -73,11 +73,11 @@ constexpr auto kLogToStderrGFlagsKey = "also_log_to_stderr"; constexpr auto kCartesianProductEnabledSettingKey = "cartesian-product-enabled"; constexpr auto kCartesianProductEnabledGFlagsKey = "cartesian-product-enabled"; -// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -std::atomic<double> execution_timeout_sec_; // Local cache-like thing - -// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -std::atomic<bool> cartesian_product_enabled_{true}; // Local cache-like thing +// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables) +// Local cache-like thing +std::atomic<double> execution_timeout_sec_; +std::atomic<bool> cartesian_product_enabled_{true}; +// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables) auto ToLLEnum(std::string_view val) { const auto ll_enum = memgraph::flags::LogLevelToEnum(val); diff --git a/src/flags/run_time_configurable.hpp b/src/flags/run_time_configurable.hpp index 944a0539f..b215d6540 100644 --- a/src/flags/run_time_configurable.hpp +++ b/src/flags/run_time_configurable.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/glue/SessionHL.cpp b/src/glue/SessionHL.cpp index 6c901516c..6a48f15ca 100644 --- a/src/glue/SessionHL.cpp +++ b/src/glue/SessionHL.cpp @@ -59,12 +59,14 @@ class TypedValueResultStreamBase { public: explicit TypedValueResultStreamBase(memgraph::storage::Storage *storage); - std::vector<memgraph::communication::bolt::Value> DecodeValues( - const std::vector<memgraph::query::TypedValue> &values) const; + void DecodeValues(const std::vector<memgraph::query::TypedValue> &values); + + auto AccessValues() const -> std::vector<memgraph::communication::bolt::Value> const & { return decoded_values_; } protected: // NOTE: Needed only for ToBoltValue conversions memgraph::storage::Storage *storage_; + std::vector<memgraph::communication::bolt::Value> decoded_values_; }; /// Wrapper around TEncoder which converts TypedValue to Value @@ -75,16 +77,18 @@ class TypedValueResultStream : public TypedValueResultStreamBase { TypedValueResultStream(TEncoder *encoder, memgraph::storage::Storage *storage) : TypedValueResultStreamBase{storage}, encoder_(encoder) {} - void Result(const std::vector<memgraph::query::TypedValue> &values) { encoder_->MessageRecord(DecodeValues(values)); } + void Result(const std::vector<memgraph::query::TypedValue> &values) { + DecodeValues(values); + encoder_->MessageRecord(AccessValues()); + } private: TEncoder *encoder_; }; -std::vector<memgraph::communication::bolt::Value> TypedValueResultStreamBase::DecodeValues( - const std::vector<memgraph::query::TypedValue> &values) const { - std::vector<memgraph::communication::bolt::Value> decoded_values; - decoded_values.reserve(values.size()); +void TypedValueResultStreamBase::DecodeValues(const std::vector<memgraph::query::TypedValue> &values) { + decoded_values_.reserve(values.size()); + decoded_values_.clear(); for (const auto &v : values) { auto maybe_value = memgraph::glue::ToBoltValue(v, storage_, memgraph::storage::View::NEW); if (maybe_value.HasError()) { @@ -99,9 +103,8 @@ std::vector<memgraph::communication::bolt::Value> TypedValueResultStreamBase::De throw memgraph::communication::bolt::ClientError("Unexpected storage error when streaming results."); } } - decoded_values.emplace_back(std::move(*maybe_value)); + decoded_values_.emplace_back(std::move(*maybe_value)); } - return decoded_values; } TypedValueResultStreamBase::TypedValueResultStreamBase(memgraph::storage::Storage *storage) : storage_(storage) {} diff --git a/src/memgraph.cpp b/src/memgraph.cpp index d896bcc4c..9bf50131d 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -332,7 +332,8 @@ int main(int argc, char **argv) { .durability_directory = FLAGS_data_directory + "/rocksdb_durability", .wal_directory = FLAGS_data_directory + "/rocksdb_wal"}, .salient.items = {.properties_on_edges = FLAGS_storage_properties_on_edges, - .enable_schema_metadata = FLAGS_storage_enable_schema_metadata}, + .enable_schema_metadata = FLAGS_storage_enable_schema_metadata, + .delta_on_identical_property_update = FLAGS_storage_delta_on_identical_property_update}, .salient.storage_mode = memgraph::flags::ParseStorageMode()}; spdlog::info("config recover on startup {}, flags {} {}", db_config.durability.recover_on_startup, FLAGS_storage_recover_on_startup, FLAGS_data_recovery_on_startup); diff --git a/src/query/db_accessor.hpp b/src/query/db_accessor.hpp index 915ea9936..ee4988e4a 100644 --- a/src/query/db_accessor.hpp +++ b/src/query/db_accessor.hpp @@ -634,6 +634,24 @@ class DbAccessor final { bool EdgeTypeIndexExists(storage::EdgeTypeId edge_type) const { return accessor_->EdgeTypeIndexExists(edge_type); } + bool TextIndexExists(const std::string &index_name) const { return accessor_->TextIndexExists(index_name); } + + void TextIndexAddVertex(const VertexAccessor &vertex) { accessor_->TextIndexAddVertex(vertex.impl_); } + + void TextIndexUpdateVertex(const VertexAccessor &vertex, const std::vector<storage::LabelId> &removed_labels = {}) { + accessor_->TextIndexUpdateVertex(vertex.impl_, removed_labels); + } + + std::vector<storage::Gid> TextIndexSearch(const std::string &index_name, const std::string &search_query, + text_search_mode search_mode) const { + return accessor_->TextIndexSearch(index_name, search_query, search_mode); + } + + std::string TextIndexAggregate(const std::string &index_name, const std::string &search_query, + const std::string &aggregation_query) const { + return accessor_->TextIndexAggregate(index_name, search_query, aggregation_query); + } + std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const { return accessor_->GetIndexStats(label); } @@ -717,6 +735,12 @@ class DbAccessor final { return accessor_->DropIndex(edge_type); } + void CreateTextIndex(const std::string &index_name, storage::LabelId label) { + accessor_->CreateTextIndex(index_name, label, this); + } + + void DropTextIndex(const std::string &index_name) { accessor_->DropTextIndex(index_name); } + utils::BasicResult<storage::StorageExistenceConstraintDefinitionError, void> CreateExistenceConstraint( storage::LabelId label, storage::PropertyId property) { return accessor_->CreateExistenceConstraint(label, property); diff --git a/src/query/dump.cpp b/src/query/dump.cpp index f1dd08c8d..abc147ee8 100644 --- a/src/query/dump.cpp +++ b/src/query/dump.cpp @@ -252,6 +252,10 @@ void DumpLabelPropertyIndex(std::ostream *os, query::DbAccessor *dba, storage::L << ");"; } +void DumpTextIndex(std::ostream *os, query::DbAccessor *dba, const std::string &index_name, storage::LabelId label) { + *os << "CREATE TEXT INDEX " << EscapeName(index_name) << " ON :" << EscapeName(dba->LabelToName(label)) << ";"; +} + void DumpExistenceConstraint(std::ostream *os, query::DbAccessor *dba, storage::LabelId label, storage::PropertyId property) { *os << "CREATE CONSTRAINT ON (u:" << EscapeName(dba->LabelToName(label)) << ") ASSERT EXISTS (u." @@ -286,6 +290,8 @@ PullPlanDump::PullPlanDump(DbAccessor *dba, dbms::DatabaseAccess db_acc) CreateLabelIndicesPullChunk(), // Dump all label property indices CreateLabelPropertyIndicesPullChunk(), + // Dump all text indices + CreateTextIndicesPullChunk(), // Dump all existence constraints CreateExistenceConstraintsPullChunk(), // Dump all unique constraints @@ -412,6 +418,34 @@ PullPlanDump::PullChunk PullPlanDump::CreateLabelPropertyIndicesPullChunk() { }; } +PullPlanDump::PullChunk PullPlanDump::CreateTextIndicesPullChunk() { + // Dump all text indices + return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> { + // Delay the construction of indices vectors + if (!indices_info_) { + indices_info_.emplace(dba_->ListAllIndices()); + } + const auto &text = indices_info_->text_indices; + + size_t local_counter = 0; + while (global_index < text.size() && (!n || local_counter < *n)) { + std::ostringstream os; + const auto &text_index = text[global_index]; + DumpTextIndex(&os, dba_, text_index.first, text_index.second); + stream->Result({TypedValue(os.str())}); + + ++global_index; + ++local_counter; + } + + if (global_index == text.size()) { + return local_counter; + } + + return std::nullopt; + }; +} + PullPlanDump::PullChunk PullPlanDump::CreateExistenceConstraintsPullChunk() { return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> { // Delay the construction of constraint vectors diff --git a/src/query/dump.hpp b/src/query/dump.hpp index 05bd42967..0cf4a82a6 100644 --- a/src/query/dump.hpp +++ b/src/query/dump.hpp @@ -55,6 +55,7 @@ struct PullPlanDump { PullChunk CreateLabelIndicesPullChunk(); PullChunk CreateLabelPropertyIndicesPullChunk(); + PullChunk CreateTextIndicesPullChunk(); PullChunk CreateExistenceConstraintsPullChunk(); PullChunk CreateUniqueConstraintsPullChunk(); PullChunk CreateInternalIndexPullChunk(); diff --git a/src/query/exceptions.hpp b/src/query/exceptions.hpp index 147dc8710..a4c25fbae 100644 --- a/src/query/exceptions.hpp +++ b/src/query/exceptions.hpp @@ -433,4 +433,17 @@ class MultiDatabaseQueryInMulticommandTxException : public QueryException { SPECIALIZE_GET_EXCEPTION_NAME(MultiDatabaseQueryInMulticommandTxException) }; +class TextSearchException : public QueryException { + using QueryException::QueryException; + SPECIALIZE_GET_EXCEPTION_NAME(TextSearchException) +}; + +class TextSearchDisabledException : public TextSearchException { + public: + TextSearchDisabledException() + : TextSearchException( + "To use text indices and text search, start Memgraph with the experimental text search feature enabled.") {} + SPECIALIZE_GET_EXCEPTION_NAME(TextSearchDisabledException) +}; + } // namespace memgraph::query diff --git a/src/query/frontend/ast/ast.cpp b/src/query/frontend/ast/ast.cpp index 7da5c09a0..f0d09d453 100644 --- a/src/query/frontend/ast/ast.cpp +++ b/src/query/frontend/ast/ast.cpp @@ -189,6 +189,9 @@ constexpr utils::TypeInfo query::IndexQuery::kType{utils::TypeId::AST_INDEX_QUER constexpr utils::TypeInfo query::EdgeIndexQuery::kType{utils::TypeId::AST_EDGE_INDEX_QUERY, "EdgeIndexQuery", &query::Query::kType}; +constexpr utils::TypeInfo query::TextIndexQuery::kType{utils::TypeId::AST_TEXT_INDEX_QUERY, "TextIndexQuery", + &query::Query::kType}; + constexpr utils::TypeInfo query::Create::kType{utils::TypeId::AST_CREATE, "Create", &query::Clause::kType}; constexpr utils::TypeInfo query::CallProcedure::kType{utils::TypeId::AST_CALL_PROCEDURE, "CallProcedure", diff --git a/src/query/frontend/ast/ast.hpp b/src/query/frontend/ast/ast.hpp index 29f7be3cf..e3d7bc0b2 100644 --- a/src/query/frontend/ast/ast.hpp +++ b/src/query/frontend/ast/ast.hpp @@ -2273,6 +2273,37 @@ class EdgeIndexQuery : public memgraph::query::Query { friend class AstStorage; }; +class TextIndexQuery : public memgraph::query::Query { + public: + static const utils::TypeInfo kType; + const utils::TypeInfo &GetTypeInfo() const override { return kType; } + + enum class Action { CREATE, DROP }; + + TextIndexQuery() = default; + + DEFVISITABLE(QueryVisitor<void>); + + memgraph::query::TextIndexQuery::Action action_; + memgraph::query::LabelIx label_; + std::string index_name_; + + TextIndexQuery *Clone(AstStorage *storage) const override { + TextIndexQuery *object = storage->Create<TextIndexQuery>(); + object->action_ = action_; + object->label_ = storage->GetLabelIx(label_.name); + object->index_name_ = index_name_; + return object; + } + + protected: + TextIndexQuery(Action action, LabelIx label, std::string index_name) + : action_(action), label_(std::move(label)), index_name_(index_name) {} + + private: + friend class AstStorage; +}; + class Create : public memgraph::query::Clause { public: static const utils::TypeInfo kType; diff --git a/src/query/frontend/ast/ast_visitor.hpp b/src/query/frontend/ast/ast_visitor.hpp index bf11878da..cc6aed138 100644 --- a/src/query/frontend/ast/ast_visitor.hpp +++ b/src/query/frontend/ast/ast_visitor.hpp @@ -83,6 +83,7 @@ class ExplainQuery; class ProfileQuery; class IndexQuery; class EdgeIndexQuery; +class TextIndexQuery; class DatabaseInfoQuery; class SystemInfoQuery; class ConstraintQuery; @@ -144,11 +145,11 @@ class ExpressionVisitor template <class TResult> class QueryVisitor - : public utils::Visitor<TResult, CypherQuery, ExplainQuery, ProfileQuery, IndexQuery, EdgeIndexQuery, AuthQuery, - DatabaseInfoQuery, SystemInfoQuery, ConstraintQuery, DumpQuery, ReplicationQuery, - LockPathQuery, FreeMemoryQuery, TriggerQuery, IsolationLevelQuery, CreateSnapshotQuery, - StreamQuery, SettingQuery, VersionQuery, ShowConfigQuery, TransactionQueueQuery, - StorageModeQuery, AnalyzeGraphQuery, MultiDatabaseQuery, ShowDatabasesQuery, - EdgeImportModeQuery, CoordinatorQuery> {}; + : public utils::Visitor<TResult, CypherQuery, ExplainQuery, ProfileQuery, IndexQuery, EdgeIndexQuery, + TextIndexQuery, AuthQuery, DatabaseInfoQuery, SystemInfoQuery, ConstraintQuery, DumpQuery, + ReplicationQuery, LockPathQuery, FreeMemoryQuery, TriggerQuery, IsolationLevelQuery, + CreateSnapshotQuery, StreamQuery, SettingQuery, VersionQuery, ShowConfigQuery, + TransactionQueueQuery, StorageModeQuery, AnalyzeGraphQuery, MultiDatabaseQuery, + ShowDatabasesQuery, EdgeImportModeQuery, CoordinatorQuery> {}; } // namespace memgraph::query diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index 6da48c97c..35ccb3670 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -243,6 +243,13 @@ antlrcpp::Any CypherMainVisitor::visitIndexQuery(MemgraphCypher::IndexQueryConte return index_query; } +antlrcpp::Any CypherMainVisitor::visitTextIndexQuery(MemgraphCypher::TextIndexQueryContext *ctx) { + MG_ASSERT(ctx->children.size() == 1, "TextIndexQuery should have exactly one child!"); + auto *text_index_query = std::any_cast<TextIndexQuery *>(ctx->children[0]->accept(this)); + query_ = text_index_query; + return text_index_query; +} + antlrcpp::Any CypherMainVisitor::visitCreateIndex(MemgraphCypher::CreateIndexContext *ctx) { auto *index_query = storage_->Create<IndexQuery>(); index_query->action_ = IndexQuery::Action::CREATE; @@ -286,6 +293,21 @@ antlrcpp::Any CypherMainVisitor::visitDropEdgeIndex(MemgraphCypher::DropEdgeInde return index_query; } +antlrcpp::Any CypherMainVisitor::visitCreateTextIndex(MemgraphCypher::CreateTextIndexContext *ctx) { + auto *index_query = storage_->Create<TextIndexQuery>(); + index_query->index_name_ = std::any_cast<std::string>(ctx->indexName()->accept(this)); + index_query->action_ = TextIndexQuery::Action::CREATE; + index_query->label_ = AddLabel(std::any_cast<std::string>(ctx->labelName()->accept(this))); + return index_query; +} + +antlrcpp::Any CypherMainVisitor::visitDropTextIndex(MemgraphCypher::DropTextIndexContext *ctx) { + auto *index_query = storage_->Create<TextIndexQuery>(); + index_query->index_name_ = std::any_cast<std::string>(ctx->indexName()->accept(this)); + index_query->action_ = TextIndexQuery::Action::DROP; + return index_query; +} + antlrcpp::Any CypherMainVisitor::visitAuthQuery(MemgraphCypher::AuthQueryContext *ctx) { MG_ASSERT(ctx->children.size() == 1, "AuthQuery should have exactly one child!"); auto *auth_query = std::any_cast<AuthQuery *>(ctx->children[0]->accept(this)); diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index 8c65345c8..53738af61 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -153,6 +153,11 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitEdgeIndexQuery(MemgraphCypher::EdgeIndexQueryContext *ctx) override; + /** + * @return TextIndexQuery* + */ + antlrcpp::Any visitTextIndexQuery(MemgraphCypher::TextIndexQueryContext *ctx) override; + /** * @return ExplainQuery* */ @@ -500,7 +505,7 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { antlrcpp::Any visitCreateIndex(MemgraphCypher::CreateIndexContext *ctx) override; /** - * @return DropIndex* + * @return IndexQuery* */ antlrcpp::Any visitDropIndex(MemgraphCypher::DropIndexContext *ctx) override; @@ -514,6 +519,16 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitDropEdgeIndex(MemgraphCypher::DropEdgeIndexContext *ctx) override; + /** + * @return TextIndexQuery* + */ + antlrcpp::Any visitCreateTextIndex(MemgraphCypher::CreateTextIndexContext *ctx) override; + + /** + * @return TextIndexQuery* + */ + antlrcpp::Any visitDropTextIndex(MemgraphCypher::DropTextIndexContext *ctx) override; + /** * @return AuthQuery* */ diff --git a/src/query/frontend/opencypher/grammar/Cypher.g4 b/src/query/frontend/opencypher/grammar/Cypher.g4 index 7fa218598..911615314 100644 --- a/src/query/frontend/opencypher/grammar/Cypher.g4 +++ b/src/query/frontend/opencypher/grammar/Cypher.g4 @@ -25,6 +25,7 @@ statement : query ; query : cypherQuery | indexQuery + | textIndexQuery | explainQuery | profileQuery | databaseInfoQuery @@ -65,6 +66,8 @@ cypherQuery : singleQuery ( cypherUnion )* ( queryMemoryLimit )? ; indexQuery : createIndex | dropIndex; +textIndexQuery : createTextIndex | dropTextIndex; + singleQuery : clause ( clause )* ; cypherUnion : ( UNION ALL singleQuery ) @@ -342,6 +345,12 @@ createIndex : CREATE INDEX ON ':' labelName ( '(' propertyKeyName ')' )? ; dropIndex : DROP INDEX ON ':' labelName ( '(' propertyKeyName ')' )? ; +indexName : symbolicName ; + +createTextIndex : CREATE TEXT INDEX indexName ON ':' labelName ; + +dropTextIndex : DROP TEXT INDEX indexName ; + doubleLiteral : FloatingLiteral ; cypherKeyword : ALL diff --git a/src/query/frontend/opencypher/grammar/CypherLexer.g4 b/src/query/frontend/opencypher/grammar/CypherLexer.g4 index 3e3c640d6..fb8a30b0f 100644 --- a/src/query/frontend/opencypher/grammar/CypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/CypherLexer.g4 @@ -131,6 +131,7 @@ SHOW : S H O W ; SINGLE : S I N G L E ; STARTS : S T A R T S ; STORAGE : S T O R A G E ; +TEXT : T E X T ; THEN : T H E N ; TRUE : T R U E ; UNION : U N I O N ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index 378310c22..ad15d6213 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -134,6 +134,7 @@ symbolicName : UnescapedSymbolicName query : cypherQuery | indexQuery | edgeIndexQuery + | textIndexQuery | explainQuery | profileQuery | databaseInfoQuery diff --git a/src/query/frontend/semantic/required_privileges.cpp b/src/query/frontend/semantic/required_privileges.cpp index 15726e3e2..d87fcb10e 100644 --- a/src/query/frontend/semantic/required_privileges.cpp +++ b/src/query/frontend/semantic/required_privileges.cpp @@ -29,6 +29,8 @@ class PrivilegeExtractor : public QueryVisitor<void>, public HierarchicalTreeVis void Visit(EdgeIndexQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); } + void Visit(TextIndexQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); } + void Visit(AnalyzeGraphQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); } void Visit(AuthQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::AUTH); } diff --git a/src/query/frontend/semantic/symbol.hpp b/src/query/frontend/semantic/symbol.hpp index 0cfb86608..1a5aa2756 100644 --- a/src/query/frontend/semantic/symbol.hpp +++ b/src/query/frontend/semantic/symbol.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/query/frontend/stripped.cpp b/src/query/frontend/stripped.cpp index 9740cd463..5ea26b041 100644 --- a/src/query/frontend/stripped.cpp +++ b/src/query/frontend/stripped.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/query/interpret/eval.cpp b/src/query/interpret/eval.cpp index 8bd308420..7c5d838a5 100644 --- a/src/query/interpret/eval.cpp +++ b/src/query/interpret/eval.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -13,12 +13,12 @@ namespace memgraph::query { -int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what) { +int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, std::string_view what) { TypedValue value = expr->Accept(*evaluator); try { return value.ValueInt(); } catch (TypedValueException &e) { - throw QueryRuntimeException(what + " must be an int"); + throw QueryRuntimeException(std::string(what) + " must be an int"); } } diff --git a/src/query/interpret/eval.hpp b/src/query/interpret/eval.hpp index 19fb34d3a..92c410b75 100644 --- a/src/query/interpret/eval.hpp +++ b/src/query/interpret/eval.hpp @@ -1216,7 +1216,7 @@ class ExpressionEvaluator : public ExpressionVisitor<TypedValue> { /// @param what - Name of what's getting evaluated. Used for user feedback (via /// exception) when the evaluated value is not an int. /// @throw QueryRuntimeException if expression doesn't evaluate to an int. -int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what); +int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, std::string_view what); std::optional<size_t> EvaluateMemoryLimit(ExpressionVisitor<TypedValue> &eval, Expression *memory_limit, size_t memory_scale); diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index a5c81cc72..332054485 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -39,6 +39,7 @@ #include "dbms/dbms_handler.hpp" #include "dbms/global.hpp" #include "dbms/inmemory/storage_helper.hpp" +#include "flags/experimental.hpp" #include "flags/replication.hpp" #include "flags/run_time_configurable.hpp" #include "glue/communication.hpp" @@ -246,27 +247,6 @@ std::optional<std::string> GetOptionalStringValue(query::Expression *expression, return {}; }; -bool IsAllShortestPathsQuery(const std::vector<memgraph::query::Clause *> &clauses) { - for (const auto &clause : clauses) { - if (clause->GetTypeInfo() != Match::kType) { - continue; - } - auto *match_clause = utils::Downcast<Match>(clause); - for (const auto &pattern : match_clause->patterns_) { - for (const auto &atom : pattern->atoms_) { - if (atom->GetTypeInfo() != EdgeAtom::kType) { - continue; - } - auto *edge_atom = utils::Downcast<EdgeAtom>(atom); - if (edge_atom->type_ == EdgeAtom::Type::ALL_SHORTEST_PATHS) { - return true; - } - } - } - } - return false; -} - inline auto convertFromCoordinatorToReplicationMode(const CoordinatorQuery::SyncMode &sync_mode) -> replication_coordination_glue::ReplicationMode { switch (sync_mode) { @@ -1733,8 +1713,7 @@ struct PullPlan { std::shared_ptr<QueryUserOrRole> user_or_role, std::atomic<TransactionStatus> *transaction_status, std::shared_ptr<utils::AsyncTimer> tx_timer, TriggerContextCollector *trigger_context_collector = nullptr, - std::optional<size_t> memory_limit = {}, bool use_monotonic_memory = true, - FrameChangeCollector *frame_change_collector_ = nullptr); + std::optional<size_t> memory_limit = {}, FrameChangeCollector *frame_change_collector_ = nullptr); std::optional<plan::ProfilingStatsWithTotalTime> Pull(AnyStream *stream, std::optional<int> n, const std::vector<Symbol> &output_symbols, @@ -1759,26 +1738,17 @@ struct PullPlan { // we have to keep track of any unsent results from previous `PullPlan::Pull` // manually by using this flag. bool has_unsent_results_ = false; - - // In the case of LOAD CSV, we want to use only PoolResource without MonotonicMemoryResource - // to reuse allocated memory. As LOAD CSV is processing row by row - // it is possible to reduce memory usage significantly if MemoryResource deals with memory allocation - // can reuse memory that was allocated on processing the first row on all subsequent rows. - // This flag signals to `PullPlan::Pull` which MemoryResource to use - bool use_monotonic_memory_; }; PullPlan::PullPlan(const std::shared_ptr<PlanWrapper> plan, const Parameters ¶meters, const bool is_profile_query, DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, std::shared_ptr<QueryUserOrRole> user_or_role, std::atomic<TransactionStatus> *transaction_status, std::shared_ptr<utils::AsyncTimer> tx_timer, TriggerContextCollector *trigger_context_collector, - const std::optional<size_t> memory_limit, bool use_monotonic_memory, - FrameChangeCollector *frame_change_collector) + const std::optional<size_t> memory_limit, FrameChangeCollector *frame_change_collector) : plan_(plan), cursor_(plan->plan().MakeCursor(execution_memory)), frame_(plan->symbol_table().max_position(), execution_memory), - memory_limit_(memory_limit), - use_monotonic_memory_(use_monotonic_memory) { + memory_limit_(memory_limit) { ctx_.db_accessor = dba; ctx_.symbol_table = plan->symbol_table(); ctx_.evaluation_context.timestamp = QueryTimestamp(); @@ -1804,6 +1774,7 @@ PullPlan::PullPlan(const std::shared_ptr<PlanWrapper> plan, const Parameters &pa ctx_.is_profile_query = is_profile_query; ctx_.trigger_context_collector = trigger_context_collector; ctx_.frame_change_collector = frame_change_collector; + ctx_.evaluation_context.memory = execution_memory; } std::optional<plan::ProfilingStatsWithTotalTime> PullPlan::Pull(AnyStream *stream, std::optional<int> n, @@ -1827,43 +1798,14 @@ std::optional<plan::ProfilingStatsWithTotalTime> PullPlan::Pull(AnyStream *strea } }}; - // Set up temporary memory for a single Pull. Initial memory comes from the - // stack. 256 KiB should fit on the stack and should be more than enough for a - // single `Pull`. - static constexpr size_t stack_size = 256UL * 1024UL; - char stack_data[stack_size]; - - utils::ResourceWithOutOfMemoryException resource_with_exception; - utils::MonotonicBufferResource monotonic_memory{&stack_data[0], stack_size, &resource_with_exception}; - std::optional<utils::PoolResource> pool_memory; - static constexpr auto kMaxBlockPerChunks = 128; - - if (!use_monotonic_memory_) { - pool_memory.emplace(kMaxBlockPerChunks, kExecutionPoolMaxBlockSize, &resource_with_exception, - &resource_with_exception); - } else { - // We can throw on every query because a simple queries for deleting will use only - // the stack allocated buffer. - // Also, we want to throw only when the query engine requests more memory and not the storage - // so we add the exception to the allocator. - // TODO (mferencevic): Tune the parameters accordingly. - pool_memory.emplace(kMaxBlockPerChunks, 1024, &monotonic_memory, &resource_with_exception); - } - - ctx_.evaluation_context.memory = &*pool_memory; - // Returns true if a result was pulled. const auto pull_result = [&]() -> bool { return cursor_->Pull(frame_, ctx_); }; - const auto stream_values = [&]() { - // TODO: The streamed values should also probably use the above memory. - std::vector<TypedValue> values; - values.reserve(output_symbols.size()); - - for (const auto &symbol : output_symbols) { - values.emplace_back(frame_[symbol]); + auto values = std::vector<TypedValue>(output_symbols.size()); + const auto stream_values = [&] { + for (auto const i : ranges::views::iota(0UL, output_symbols.size())) { + values[i] = frame_[output_symbols[i]]; } - stream->Result(values); }; @@ -1973,7 +1915,6 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper, std::function<void()> handler; if (query_upper == "BEGIN") { - ResetInterpreter(); // TODO: Evaluate doing move(extras). Currently the extras is very small, but this will be important if it ever // becomes large. handler = [this, extras = extras] { @@ -2051,30 +1992,6 @@ inline static void TryCaching(const AstStorage &ast_storage, FrameChangeCollecto } } -bool IsLoadCsvQuery(const std::vector<memgraph::query::Clause *> &clauses) { - return std::any_of(clauses.begin(), clauses.end(), - [](memgraph::query::Clause const *clause) { return clause->GetTypeInfo() == LoadCsv::kType; }); -} - -bool IsCallBatchedProcedureQuery(const std::vector<memgraph::query::Clause *> &clauses) { - EvaluationContext evaluation_context; - - return std::ranges::any_of(clauses, [&evaluation_context](memgraph::query::Clause *clause) -> bool { - if (!(clause->GetTypeInfo() == CallProcedure::kType)) return false; - auto *call_procedure_clause = utils::Downcast<CallProcedure>(clause); - - const auto &maybe_found = memgraph::query::procedure::FindProcedure( - procedure::gModuleRegistry, call_procedure_clause->procedure_name_, evaluation_context.memory); - if (!maybe_found) { - throw QueryRuntimeException("There is no procedure named '{}'.", call_procedure_clause->procedure_name_); - } - const auto &[module, proc] = *maybe_found; - if (!proc->info.is_batched) return false; - spdlog::trace("Using PoolResource for batched query procedure"); - return true; - }); -} - PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map<std::string, TypedValue> *summary, InterpreterContext *interpreter_context, CurrentDB ¤t_db, utils::MemoryResource *execution_memory, std::vector<Notification> *notifications, @@ -2094,7 +2011,6 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map<std::string, spdlog::info("Running query with memory limit of {}", utils::GetReadableSize(*memory_limit)); } auto clauses = cypher_query->single_query_->clauses_; - bool contains_csv = false; if (std::any_of(clauses.begin(), clauses.end(), [](const auto *clause) { return clause->GetTypeInfo() == LoadCsv::kType; })) { notifications->emplace_back( @@ -2102,13 +2018,8 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map<std::string, "It's important to note that the parser parses the values as strings. It's up to the user to " "convert the parsed row values to the appropriate type. This can be done using the built-in " "conversion functions such as ToInteger, ToFloat, ToBoolean etc."); - contains_csv = true; } - // If this is LOAD CSV query, use PoolResource without MonotonicMemoryResource as we want to reuse allocated memory - auto use_monotonic_memory = - !contains_csv && !IsCallBatchedProcedureQuery(clauses) && !IsAllShortestPathsQuery(clauses); - MG_ASSERT(current_db.execution_db_accessor_, "Cypher query expects a current DB transaction"); auto *dba = &*current_db @@ -2147,7 +2058,7 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map<std::string, current_db.trigger_context_collector_ ? &*current_db.trigger_context_collector_ : nullptr; auto pull_plan = std::make_shared<PullPlan>( plan, parsed_query.parameters, false, dba, interpreter_context, execution_memory, std::move(user_or_role), - transaction_status, std::move(tx_timer), trigger_context_collector, memory_limit, use_monotonic_memory, + transaction_status, std::move(tx_timer), trigger_context_collector, memory_limit, frame_change_collector->IsTrackingValues() ? frame_change_collector : nullptr); return PreparedQuery{std::move(header), std::move(parsed_query.required_privileges), [pull_plan = std::move(pull_plan), output_symbols = std::move(output_symbols), summary]( @@ -2261,18 +2172,6 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra auto *cypher_query = utils::Downcast<CypherQuery>(parsed_inner_query.query); - bool contains_csv = false; - auto clauses = cypher_query->single_query_->clauses_; - if (std::any_of(clauses.begin(), clauses.end(), - [](const auto *clause) { return clause->GetTypeInfo() == LoadCsv::kType; })) { - contains_csv = true; - } - - // If this is LOAD CSV, BatchedProcedure or AllShortest query, use PoolResource without MonotonicMemoryResource as we - // want to reuse allocated memory - auto use_monotonic_memory = - !contains_csv && !IsCallBatchedProcedureQuery(clauses) && !IsAllShortestPathsQuery(clauses); - MG_ASSERT(cypher_query, "Cypher grammar should not allow other queries in PROFILE"); EvaluationContext evaluation_context; evaluation_context.timestamp = QueryTimestamp(); @@ -2306,14 +2205,14 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra // We want to execute the query we are profiling lazily, so we delay // the construction of the corresponding context. stats_and_total_time = std::optional<plan::ProfilingStatsWithTotalTime>{}, - pull_plan = std::shared_ptr<PullPlanVector>(nullptr), transaction_status, use_monotonic_memory, - frame_change_collector, tx_timer = std::move(tx_timer)]( - AnyStream *stream, std::optional<int> n) mutable -> std::optional<QueryHandlerResult> { + pull_plan = std::shared_ptr<PullPlanVector>(nullptr), transaction_status, frame_change_collector, + tx_timer = std::move(tx_timer)](AnyStream *stream, + std::optional<int> n) mutable -> std::optional<QueryHandlerResult> { // No output symbols are given so that nothing is streamed. if (!stats_and_total_time) { stats_and_total_time = PullPlan(plan, parameters, true, dba, interpreter_context, execution_memory, std::move(user_or_role), - transaction_status, std::move(tx_timer), nullptr, memory_limit, use_monotonic_memory, + transaction_status, std::move(tx_timer), nullptr, memory_limit, frame_change_collector->IsTrackingValues() ? frame_change_collector : nullptr) .Pull(stream, {}, {}, summary); pull_plan = std::make_shared<PullPlanVector>(ProfilingStatsToTable(*stats_and_total_time)); @@ -2811,6 +2710,75 @@ PreparedQuery PrepareEdgeIndexQuery(ParsedQuery parsed_query, bool in_explicit_t RWType::W}; } +PreparedQuery PrepareTextIndexQuery(ParsedQuery parsed_query, bool in_explicit_transaction, + std::vector<Notification> *notifications, CurrentDB ¤t_db) { + if (in_explicit_transaction) { + throw IndexInMulticommandTxException(); + } + + auto *text_index_query = utils::Downcast<TextIndexQuery>(parsed_query.query); + std::function<void(Notification &)> handler; + + // TODO: we will need transaction for replication + MG_ASSERT(current_db.db_acc_, "Text index query expects a current DB"); + auto &db_acc = *current_db.db_acc_; + + MG_ASSERT(current_db.db_transactional_accessor_, "Text index query expects a current DB transaction"); + auto *dba = &*current_db.execution_db_accessor_; + + // Creating an index influences computed plan costs. + auto invalidate_plan_cache = [plan_cache = db_acc->plan_cache()] { + plan_cache->WithLock([&](auto &cache) { cache.reset(); }); + }; + + auto *storage = db_acc->storage(); + auto label = storage->NameToLabel(text_index_query->label_.name); + auto &index_name = text_index_query->index_name_; + + Notification index_notification(SeverityLevel::INFO); + switch (text_index_query->action_) { + case TextIndexQuery::Action::CREATE: { + index_notification.code = NotificationCode::CREATE_INDEX; + index_notification.title = fmt::format("Created text index on label {}.", text_index_query->label_.name); + // TODO: not just storage + invalidate_plan_cache. Need a DB transaction (for replication) + handler = [dba, label, index_name, + invalidate_plan_cache = std::move(invalidate_plan_cache)](Notification &index_notification) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw TextSearchDisabledException(); + } + dba->CreateTextIndex(index_name, label); + utils::OnScopeExit invalidator(invalidate_plan_cache); + }; + break; + } + case TextIndexQuery::Action::DROP: { + index_notification.code = NotificationCode::DROP_INDEX; + index_notification.title = fmt::format("Dropped text index on label {}.", text_index_query->label_.name); + // TODO: not just storage + invalidate_plan_cache. Need a DB transaction (for replication) + handler = [dba, index_name, + invalidate_plan_cache = std::move(invalidate_plan_cache)](Notification &index_notification) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw TextSearchDisabledException(); + } + dba->DropTextIndex(index_name); + utils::OnScopeExit invalidator(invalidate_plan_cache); + }; + break; + } + } + + return PreparedQuery{ + {}, + std::move(parsed_query.required_privileges), + [handler = std::move(handler), notifications, index_notification = std::move(index_notification)]( + AnyStream * /*stream*/, std::optional<int> /*unused*/) mutable { + handler(index_notification); + notifications->push_back(index_notification); + return QueryHandlerResult::COMMIT; // TODO: Will need to become COMMIT when we fix replication + }, + RWType::W}; +} + PreparedQuery PrepareAuthQuery(ParsedQuery parsed_query, bool in_explicit_transaction, InterpreterContext *interpreter_context, Interpreter &interpreter) { if (in_explicit_transaction) { @@ -3601,7 +3569,7 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici } MG_ASSERT(current_db.db_acc_, "Database info query expects a current DB"); - MG_ASSERT(current_db.db_transactional_accessor_, "Database ifo query expects a current DB transaction"); + MG_ASSERT(current_db.db_transactional_accessor_, "Database info query expects a current DB transaction"); auto *dba = &*current_db.execution_db_accessor_; auto *info_query = utils::Downcast<DatabaseInfoQuery>(parsed_query.query); @@ -3616,10 +3584,11 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici const std::string_view label_index_mark{"label"}; const std::string_view label_property_index_mark{"label+property"}; const std::string_view edge_type_index_mark{"edge-type"}; + const std::string_view text_index_mark{"text"}; auto info = dba->ListAllIndices(); auto storage_acc = database->Access(); std::vector<std::vector<TypedValue>> results; - results.reserve(info.label.size() + info.label_property.size()); + results.reserve(info.label.size() + info.label_property.size() + info.text_indices.size()); for (const auto &item : info.label) { results.push_back({TypedValue(label_index_mark), TypedValue(storage->LabelToName(item)), TypedValue(), TypedValue(static_cast<int>(storage_acc->ApproximateVertexCount(item)))}); @@ -3634,6 +3603,10 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici results.push_back({TypedValue(edge_type_index_mark), TypedValue(storage->EdgeTypeToName(item)), TypedValue(), TypedValue(static_cast<int>(storage_acc->ApproximateEdgeCount(item)))}); } + for (const auto &[index_name, label] : info.text_indices) { + results.push_back({TypedValue(fmt::format("{} (name: {})", text_index_mark, index_name)), + TypedValue(storage->LabelToName(label)), TypedValue(), TypedValue()}); + } std::sort(results.begin(), results.end(), [&label_index_mark](const auto &record_1, const auto &record_2) { const auto type_1 = record_1[0].ValueString(); const auto type_2 = record_2[0].ValueString(); @@ -4276,6 +4249,7 @@ PreparedQuery PrepareShowDatabasesQuery(ParsedQuery parsed_query, InterpreterCon std::optional<uint64_t> Interpreter::GetTransactionId() const { return current_transaction_; } void Interpreter::BeginTransaction(QueryExtras const &extras) { + ResetInterpreter(); const auto prepared_query = PrepareTransactionQuery("BEGIN", extras); prepared_query.query_handler(nullptr, {}); } @@ -4310,12 +4284,12 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, const auto upper_case_query = utils::ToUpperCase(query_string); const auto trimmed_query = utils::Trim(upper_case_query); if (trimmed_query == "BEGIN" || trimmed_query == "COMMIT" || trimmed_query == "ROLLBACK") { - auto resource = utils::MonotonicBufferResource(kExecutionMemoryBlockSize); - auto prepared_query = PrepareTransactionQuery(trimmed_query, extras); - auto &query_execution = - query_executions_.emplace_back(QueryExecution::Create(std::move(resource), std::move(prepared_query))); - std::optional<int> qid = - in_explicit_transaction_ ? static_cast<int>(query_executions_.size() - 1) : std::optional<int>{}; + if (trimmed_query == "BEGIN") { + ResetInterpreter(); + } + auto &query_execution = query_executions_.emplace_back(QueryExecution::Create()); + query_execution->prepared_query = PrepareTransactionQuery(trimmed_query, extras); + auto qid = in_explicit_transaction_ ? static_cast<int>(query_executions_.size() - 1) : std::optional<int>{}; return {query_execution->prepared_query->header, query_execution->prepared_query->privileges, qid, {}}; } @@ -4345,35 +4319,8 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, ParseQuery(query_string, params, &interpreter_context_->ast_cache, interpreter_context_->config.query); auto parsing_time = parsing_timer.Elapsed().count(); - CypherQuery const *const cypher_query = [&]() -> CypherQuery * { - if (auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query)) { - return cypher_query; - } - if (auto *profile_query = utils::Downcast<ProfileQuery>(parsed_query.query)) { - return profile_query->cypher_query_; - } - return nullptr; - }(); // IILE - - auto const [usePool, hasAllShortestPaths] = [&]() -> std::pair<bool, bool> { - if (!cypher_query) { - return {false, false}; - } - auto const &clauses = cypher_query->single_query_->clauses_; - bool hasAllShortestPaths = IsAllShortestPathsQuery(clauses); - // Using PoolResource without MonotonicMemoryResouce for LOAD CSV reduces memory usage. - bool usePool = hasAllShortestPaths || IsCallBatchedProcedureQuery(clauses) || IsLoadCsvQuery(clauses); - return {usePool, hasAllShortestPaths}; - }(); // IILE - // Setup QueryExecution - // its MemoryResource is mostly used for allocations done on Frame and storing `row`s - if (usePool) { - query_executions_.emplace_back(QueryExecution::Create(utils::PoolResource(128, kExecutionPoolMaxBlockSize))); - } else { - query_executions_.emplace_back(QueryExecution::Create(utils::MonotonicBufferResource(kExecutionMemoryBlockSize))); - } - + query_executions_.emplace_back(QueryExecution::Create()); auto &query_execution = query_executions_.back(); query_execution_ptr = &query_execution; @@ -4421,13 +4368,15 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, utils::Downcast<ProfileQuery>(parsed_query.query) || utils::Downcast<DumpQuery>(parsed_query.query) || utils::Downcast<TriggerQuery>(parsed_query.query) || utils::Downcast<AnalyzeGraphQuery>(parsed_query.query) || utils::Downcast<IndexQuery>(parsed_query.query) || utils::Downcast<EdgeIndexQuery>(parsed_query.query) || - utils::Downcast<DatabaseInfoQuery>(parsed_query.query) || utils::Downcast<ConstraintQuery>(parsed_query.query); + utils::Downcast<TextIndexQuery>(parsed_query.query) || utils::Downcast<DatabaseInfoQuery>(parsed_query.query) || + utils::Downcast<ConstraintQuery>(parsed_query.query); if (!in_explicit_transaction_ && requires_db_transaction) { // TODO: ATM only a single database, will change when we have multiple database transactions bool could_commit = utils::Downcast<CypherQuery>(parsed_query.query) != nullptr; bool unique = utils::Downcast<IndexQuery>(parsed_query.query) != nullptr || utils::Downcast<EdgeIndexQuery>(parsed_query.query) != nullptr || + utils::Downcast<TextIndexQuery>(parsed_query.query) != nullptr || utils::Downcast<ConstraintQuery>(parsed_query.query) != nullptr || upper_case_query.find(kSchemaAssert) != std::string::npos; SetupDatabaseTransaction(could_commit, unique); @@ -4442,9 +4391,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, utils::Timer planning_timer; PreparedQuery prepared_query; - utils::MemoryResource *memory_resource = - std::visit([](auto &execution_memory) -> utils::MemoryResource * { return &execution_memory; }, - query_execution->execution_memory); + utils::MemoryResource *memory_resource = query_execution->execution_memory.resource(); frame_change_collector_.reset(); frame_change_collector_.emplace(); if (utils::Downcast<CypherQuery>(parsed_query.query)) { @@ -4455,10 +4402,10 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, prepared_query = PrepareExplainQuery(std::move(parsed_query), &query_execution->summary, &query_execution->notifications, interpreter_context_, current_db_); } else if (utils::Downcast<ProfileQuery>(parsed_query.query)) { - prepared_query = PrepareProfileQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->summary, - &query_execution->notifications, interpreter_context_, current_db_, - &query_execution->execution_memory_with_exception, user_or_role_, - &transaction_status_, current_timeout_timer_, &*frame_change_collector_); + prepared_query = + PrepareProfileQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->summary, + &query_execution->notifications, interpreter_context_, current_db_, memory_resource, + user_or_role_, &transaction_status_, current_timeout_timer_, &*frame_change_collector_); } else if (utils::Downcast<DumpQuery>(parsed_query.query)) { prepared_query = PrepareDumpQuery(std::move(parsed_query), current_db_); } else if (utils::Downcast<IndexQuery>(parsed_query.query)) { @@ -4467,6 +4414,9 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, } else if (utils::Downcast<EdgeIndexQuery>(parsed_query.query)) { prepared_query = PrepareEdgeIndexQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->notifications, current_db_); + } else if (utils::Downcast<TextIndexQuery>(parsed_query.query)) { + prepared_query = PrepareTextIndexQuery(std::move(parsed_query), in_explicit_transaction_, + &query_execution->notifications, current_db_); } else if (utils::Downcast<AnalyzeGraphQuery>(parsed_query.query)) { prepared_query = PrepareAnalyzeGraphQuery(std::move(parsed_query), in_explicit_transaction_, current_db_); } else if (utils::Downcast<AuthQuery>(parsed_query.query)) { @@ -4660,7 +4610,7 @@ void RunTriggersAfterCommit(dbms::DatabaseAccess db_acc, InterpreterContext *int std::atomic<TransactionStatus> *transaction_status) { // Run the triggers for (const auto &trigger : db_acc->trigger_store()->AfterCommitTriggers().access()) { - utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + QueryAllocator execution_memory{}; // create a new transaction for each trigger auto tx_acc = db_acc->Access(); @@ -4671,7 +4621,7 @@ void RunTriggersAfterCommit(dbms::DatabaseAccess db_acc, InterpreterContext *int auto trigger_context = original_trigger_context; trigger_context.AdaptForAccessor(&db_accessor); try { - trigger.Execute(&db_accessor, &execution_memory, flags::run_time::GetExecutionTimeout(), + trigger.Execute(&db_accessor, execution_memory.resource(), flags::run_time::GetExecutionTimeout(), &interpreter_context->is_shutting_down, transaction_status, trigger_context); } catch (const utils::BasicException &exception) { spdlog::warn("Trigger '{}' failed with exception:\n{}", trigger.Name(), exception.what()); @@ -4825,11 +4775,12 @@ void Interpreter::Commit() { if (trigger_context) { // Run the triggers for (const auto &trigger : db->trigger_store()->BeforeCommitTriggers().access()) { - utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + QueryAllocator execution_memory{}; AdvanceCommand(); try { - trigger.Execute(&*current_db_.execution_db_accessor_, &execution_memory, flags::run_time::GetExecutionTimeout(), - &interpreter_context_->is_shutting_down, &transaction_status_, *trigger_context); + trigger.Execute(&*current_db_.execution_db_accessor_, execution_memory.resource(), + flags::run_time::GetExecutionTimeout(), &interpreter_context_->is_shutting_down, + &transaction_status_, *trigger_context); } catch (const utils::BasicException &e) { throw utils::BasicException( fmt::format("Trigger '{}' caused the transaction to fail.\nException: {}", trigger.Name(), e.what())); diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index f18bd6721..5366b4472 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -65,6 +65,54 @@ extern const Event SuccessfulQuery; namespace memgraph::query { +struct QueryAllocator { + QueryAllocator() = default; + QueryAllocator(QueryAllocator const &) = delete; + QueryAllocator &operator=(QueryAllocator const &) = delete; + + // No move addresses to pool & monotonic fields must be stable + QueryAllocator(QueryAllocator &&) = delete; + QueryAllocator &operator=(QueryAllocator &&) = delete; + + auto resource() -> utils::MemoryResource * { +#ifndef MG_MEMORY_PROFILE + return &pool; +#else + return upstream_resource(); +#endif + } + auto resource_without_pool() -> utils::MemoryResource * { +#ifndef MG_MEMORY_PROFILE + return &monotonic; +#else + return upstream_resource(); +#endif + } + auto resource_without_pool_or_mono() -> utils::MemoryResource * { return upstream_resource(); } + + private: + // At least one page to ensure not sharing page with other subsystems + static constexpr auto kMonotonicInitialSize = 4UL * 1024UL; + // TODO: need to profile to check for good defaults, also maybe PoolResource + // needs to be smarter. We expect more reuse of smaller objects than larger + // objects. 64*1024B is maybe wasteful, whereas 256*32B maybe sensible. + // Depends on number of small objects expected. + static constexpr auto kPoolBlockPerChunk = 64UL; + static constexpr auto kPoolMaxBlockSize = 1024UL; + + static auto upstream_resource() -> utils::MemoryResource * { + // singleton ResourceWithOutOfMemoryException + // explicitly backed by NewDeleteResource + static auto upstream = utils::ResourceWithOutOfMemoryException{utils::NewDeleteResource()}; + return &upstream; + } + +#ifndef MG_MEMORY_PROFILE + memgraph::utils::MonotonicBufferResource monotonic{kMonotonicInitialSize, upstream_resource()}; + memgraph::utils::PoolResource pool{kPoolBlockPerChunk, &monotonic, upstream_resource()}; +#endif +}; + struct InterpreterContext; inline constexpr size_t kExecutionMemoryBlockSize = 1UL * 1024UL * 1024UL; @@ -304,45 +352,25 @@ class Interpreter final { } struct QueryExecution { - std::variant<utils::MonotonicBufferResource, utils::PoolResource> execution_memory; - utils::ResourceWithOutOfMemoryException execution_memory_with_exception; - std::optional<PreparedQuery> prepared_query; + QueryAllocator execution_memory; // NOTE: before all other fields which uses this memory + std::optional<PreparedQuery> prepared_query; std::map<std::string, TypedValue> summary; std::vector<Notification> notifications; - static auto Create(std::variant<utils::MonotonicBufferResource, utils::PoolResource> memory_resource, - std::optional<PreparedQuery> prepared_query = std::nullopt) -> std::unique_ptr<QueryExecution> { - return std::make_unique<QueryExecution>(std::move(memory_resource), std::move(prepared_query)); - } + static auto Create() -> std::unique_ptr<QueryExecution> { return std::make_unique<QueryExecution>(); } - explicit QueryExecution(std::variant<utils::MonotonicBufferResource, utils::PoolResource> memory_resource, - std::optional<PreparedQuery> prepared_query) - : execution_memory(std::move(memory_resource)), prepared_query{std::move(prepared_query)} { - std::visit( - [&](auto &memory_resource) { - execution_memory_with_exception = utils::ResourceWithOutOfMemoryException(&memory_resource); - }, - execution_memory); - }; + explicit QueryExecution() = default; QueryExecution(const QueryExecution &) = delete; - QueryExecution(QueryExecution &&) = default; + QueryExecution(QueryExecution &&) = delete; QueryExecution &operator=(const QueryExecution &) = delete; - QueryExecution &operator=(QueryExecution &&) = default; + QueryExecution &operator=(QueryExecution &&) = delete; - ~QueryExecution() { - // We should always release the execution memory AFTER we - // destroy the prepared query which is using that instance - // of execution memory. - prepared_query.reset(); - std::visit([](auto &memory_resource) { memory_resource.Release(); }, execution_memory); - } + ~QueryExecution() = default; void CleanRuntimeData() { - if (prepared_query.has_value()) { - prepared_query.reset(); - } + prepared_query.reset(); notifications.clear(); } }; @@ -413,9 +441,7 @@ std::map<std::string, TypedValue> Interpreter::Pull(TStream *result_stream, std: try { // Wrap the (statically polymorphic) stream type into a common type which // the handler knows. - AnyStream stream{result_stream, - std::visit([](auto &execution_memory) -> utils::MemoryResource * { return &execution_memory; }, - query_execution->execution_memory)}; + AnyStream stream{result_stream, query_execution->execution_memory.resource()}; const auto maybe_res = query_execution->prepared_query->query_handler(&stream, n); // Stream is using execution memory of the query_execution which // can be deleted after its execution so the stream should be cleared diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index d39ade99f..e6dca1b12 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -32,6 +32,7 @@ #include "spdlog/spdlog.h" #include "csv/parsing.hpp" +#include "flags/experimental.hpp" #include "license/license.hpp" #include "query/auth_checker.hpp" #include "query/context.hpp" @@ -69,6 +70,7 @@ #include "utils/pmr/vector.hpp" #include "utils/readable_size.hpp" #include "utils/string.hpp" +#include "utils/tag.hpp" #include "utils/temporal.hpp" #include "utils/typeinfo.hpp" @@ -266,6 +268,10 @@ VertexAccessor &CreateLocalVertex(const NodeCreationInfo &node_info, Frame *fram } MultiPropsInitChecked(&new_node, properties); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexAddVertex(new_node); + } + (*frame)[node_info.symbol] = new_node; return (*frame)[node_info.symbol].ValueVertex(); } @@ -329,7 +335,7 @@ CreateExpand::CreateExpand(NodeCreationInfo node_info, EdgeCreationInfo edge_inf ACCEPT_WITH_INPUT(CreateExpand) UniqueCursorPtr CreateExpand::MakeCursor(utils::MemoryResource *mem) const { - memgraph::metrics::IncrementCounter(memgraph::metrics::CreateNodeOperator); + memgraph::metrics::IncrementCounter(memgraph::metrics::CreateExpandOperator); return MakeUniqueCursorPtr<CreateExpandCursor>(mem, *this, mem); } @@ -865,17 +871,15 @@ bool Expand::ExpandCursor::Pull(Frame &frame, ExecutionContext &context) { SCOPED_PROFILE_OP_BY_REF(self_); // A helper function for expanding a node from an edge. - auto pull_node = [this, &frame](const EdgeAccessor &new_edge, EdgeAtom::Direction direction) { + auto pull_node = [this, &frame]<EdgeAtom::Direction direction>(const EdgeAccessor &new_edge, + utils::tag_value<direction>) { if (self_.common_.existing_node) return; - switch (direction) { - case EdgeAtom::Direction::IN: - frame[self_.common_.node_symbol] = new_edge.From(); - break; - case EdgeAtom::Direction::OUT: - frame[self_.common_.node_symbol] = new_edge.To(); - break; - case EdgeAtom::Direction::BOTH: - LOG_FATAL("Must indicate exact expansion direction here"); + if constexpr (direction == EdgeAtom::Direction::IN) { + frame[self_.common_.node_symbol] = new_edge.From(); + } else if constexpr (direction == EdgeAtom::Direction::OUT) { + frame[self_.common_.node_symbol] = new_edge.To(); + } else { + LOG_FATAL("Must indicate exact expansion direction here"); } }; @@ -894,7 +898,7 @@ bool Expand::ExpandCursor::Pull(Frame &frame, ExecutionContext &context) { #endif frame[self_.common_.edge_symbol] = edge; - pull_node(edge, EdgeAtom::Direction::IN); + pull_node(edge, utils::tag_v<EdgeAtom::Direction::IN>); return true; } @@ -914,7 +918,7 @@ bool Expand::ExpandCursor::Pull(Frame &frame, ExecutionContext &context) { } #endif frame[self_.common_.edge_symbol] = edge; - pull_node(edge, EdgeAtom::Direction::OUT); + pull_node(edge, utils::tag_v<EdgeAtom::Direction::OUT>); return true; } @@ -1008,12 +1012,12 @@ bool Expand::ExpandCursor::InitEdges(Frame &frame, ExecutionContext &context) { auto existing_node = *expansion_info_.existing_node; auto edges_result = UnwrapEdgesResult(vertex.InEdges(self_.view_, self_.common_.edge_types, existing_node)); - in_edges_.emplace(edges_result.edges); + in_edges_.emplace(std::move(edges_result.edges)); num_expanded_first = edges_result.expanded_count; } } else { auto edges_result = UnwrapEdgesResult(vertex.InEdges(self_.view_, self_.common_.edge_types)); - in_edges_.emplace(edges_result.edges); + in_edges_.emplace(std::move(edges_result.edges)); num_expanded_first = edges_result.expanded_count; } if (in_edges_) { @@ -1027,12 +1031,12 @@ bool Expand::ExpandCursor::InitEdges(Frame &frame, ExecutionContext &context) { if (expansion_info_.existing_node) { auto existing_node = *expansion_info_.existing_node; auto edges_result = UnwrapEdgesResult(vertex.OutEdges(self_.view_, self_.common_.edge_types, existing_node)); - out_edges_.emplace(edges_result.edges); + out_edges_.emplace(std::move(edges_result.edges)); num_expanded_second = edges_result.expanded_count; } } else { auto edges_result = UnwrapEdgesResult(vertex.OutEdges(self_.view_, self_.common_.edge_types)); - out_edges_.emplace(edges_result.edges); + out_edges_.emplace(std::move(edges_result.edges)); num_expanded_second = edges_result.expanded_count; } if (out_edges_) { @@ -1118,14 +1122,14 @@ auto ExpandFromVertex(const VertexAccessor &vertex, EdgeAtom::Direction directio if (direction != EdgeAtom::Direction::OUT) { auto edges = UnwrapEdgesResult(vertex.InEdges(view, edge_types)).edges; - if (edges.begin() != edges.end()) { + if (!edges.empty()) { chain_elements.emplace_back(wrapper(EdgeAtom::Direction::IN, std::move(edges))); } } if (direction != EdgeAtom::Direction::IN) { auto edges = UnwrapEdgesResult(vertex.OutEdges(view, edge_types)).edges; - if (edges.begin() != edges.end()) { + if (!edges.empty()) { chain_elements.emplace_back(wrapper(EdgeAtom::Direction::OUT, std::move(edges))); } } @@ -1245,8 +1249,13 @@ class ExpandVariableCursor : public Cursor { } // reset the frame value to an empty edge list - auto *pull_memory = context.evaluation_context.memory; - frame[self_.common_.edge_symbol] = TypedValue::TVector(pull_memory); + if (frame[self_.common_.edge_symbol].IsList()) { + // Preserve the list capacity if possible + frame[self_.common_.edge_symbol].ValueList().clear(); + } else { + auto *pull_memory = context.evaluation_context.memory; + frame[self_.common_.edge_symbol] = TypedValue::TVector(pull_memory); + } return true; } @@ -2988,6 +2997,9 @@ bool SetProperty::SetPropertyCursor::Pull(Frame &frame, ExecutionContext &contex context.trigger_context_collector->RegisterSetObjectProperty(lhs.ValueVertex(), self_.property_, TypedValue{std::move(old_value)}, TypedValue{rhs}); } + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex()); + } break; } case TypedValue::Type::Edge: { @@ -3144,6 +3156,9 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr case TypedValue::Type::Vertex: { PropertiesMap new_properties = get_props(rhs.ValueVertex()); update_props(new_properties); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context->db_accessor->TextIndexUpdateVertex(rhs.ValueVertex()); + } break; } case TypedValue::Type::Map: { @@ -3201,6 +3216,9 @@ bool SetProperties::SetPropertiesCursor::Pull(Frame &frame, ExecutionContext &co } #endif SetPropertiesOnRecord(&lhs.ValueVertex(), rhs, self_.op_, &context, cached_name_id_); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex()); + } break; case TypedValue::Type::Edge: #ifdef MG_ENTERPRISE @@ -3292,6 +3310,10 @@ bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { } } + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexUpdateVertex(vertex); + } + return true; } @@ -3363,6 +3385,9 @@ bool RemoveProperty::RemovePropertyCursor::Pull(Frame &frame, ExecutionContext & } #endif remove_prop(&lhs.ValueVertex()); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex()); + } break; case TypedValue::Type::Edge: #ifdef MG_ENTERPRISE @@ -3455,6 +3480,10 @@ bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &cont } } + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexUpdateVertex(vertex, EvaluateLabels(self_.labels_, evaluator, context.db_accessor)); + } + return true; } @@ -4475,9 +4504,8 @@ class UnwindCursor : public Cursor { TypedValue input_value = self_.input_expression_->Accept(evaluator); if (input_value.type() != TypedValue::Type::List) throw QueryRuntimeException("Argument of UNWIND must be a list, but '{}' was provided.", input_value.type()); - // Copy the evaluted input_value_list to our vector. - // eval memory != query memory - input_value_ = input_value.ValueList(); + // Move the evaluted input_value_list to our vector. + input_value_ = std::move(input_value.ValueList()); input_value_it_ = input_value_.begin(); } @@ -5337,6 +5365,7 @@ class LoadCsvCursor : public Cursor { "1"); } did_pull_ = true; + reader_->Reset(); } auto row = reader_->GetNextRow(context.evaluation_context.memory); diff --git a/src/query/plan/operator.hpp b/src/query/plan/operator.hpp index fa736cd86..a716fa645 100644 --- a/src/query/plan/operator.hpp +++ b/src/query/plan/operator.hpp @@ -76,18 +76,13 @@ using UniqueCursorPtr = std::unique_ptr<Cursor, std::function<void(Cursor *)>>; template <class TCursor, class... TArgs> std::unique_ptr<Cursor, std::function<void(Cursor *)>> MakeUniqueCursorPtr(utils::Allocator<TCursor> allocator, TArgs &&...args) { - auto *ptr = allocator.allocate(1); - try { - auto *cursor = new (ptr) TCursor(std::forward<TArgs>(args)...); - return std::unique_ptr<Cursor, std::function<void(Cursor *)>>(cursor, [allocator](Cursor *base_ptr) mutable { - auto *p = static_cast<TCursor *>(base_ptr); - p->~TCursor(); - allocator.deallocate(p, 1); - }); - } catch (...) { - allocator.deallocate(ptr, 1); - throw; - } + auto *cursor = allocator.template new_object<TCursor>(std::forward<TArgs>(args)...); + auto dtr = [allocator](Cursor *base_ptr) mutable { + auto *p = static_cast<TCursor *>(base_ptr); + allocator.delete_object(p); + }; + // TODO: not std::function + return std::unique_ptr<Cursor, std::function<void(Cursor *)>>(cursor, std::move(dtr)); } class Once; diff --git a/src/query/plan/vertex_count_cache.hpp b/src/query/plan/vertex_count_cache.hpp index 802f4e09f..69e002c0a 100644 --- a/src/query/plan/vertex_count_cache.hpp +++ b/src/query/plan/vertex_count_cache.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/query/procedure/mg_procedure_impl.cpp b/src/query/procedure/mg_procedure_impl.cpp index d6ce3c7b7..a2bc23aa3 100644 --- a/src/query/procedure/mg_procedure_impl.cpp +++ b/src/query/procedure/mg_procedure_impl.cpp @@ -23,6 +23,8 @@ #include <utility> #include <variant> +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" #include "license/license.hpp" #include "mg_procedure.h" #include "module.hpp" @@ -32,6 +34,7 @@ #include "query/procedure/fmt.hpp" #include "query/procedure/mg_procedure_helpers.hpp" #include "query/stream/common.hpp" +#include "storage/v2/indices/text_index.hpp" #include "storage/v2/property_value.hpp" #include "storage/v2/storage_mode.hpp" #include "storage/v2/view.hpp" @@ -1843,6 +1846,11 @@ mgp_error mgp_vertex_set_property(struct mgp_vertex *v, const char *property_nam const auto result = std::visit( [prop_key, property_value](auto &impl) { return impl.SetProperty(prop_key, ToPropertyValue(*property_value)); }, v->impl); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) { + auto v_impl = v->getImpl(); + v->graph->getImpl()->TextIndexUpdateVertex(v_impl); + } + if (result.HasError()) { switch (result.GetError()) { case memgraph::storage::Error::DELETED_OBJECT: @@ -1899,6 +1907,11 @@ mgp_error mgp_vertex_set_properties(struct mgp_vertex *v, struct mgp_map *proper } const auto result = v->getImpl().UpdateProperties(props); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) { + auto v_impl = v->getImpl(); + v->graph->getImpl()->TextIndexUpdateVertex(v_impl); + } + if (result.HasError()) { switch (result.GetError()) { case memgraph::storage::Error::DELETED_OBJECT: @@ -1956,6 +1969,10 @@ mgp_error mgp_vertex_add_label(struct mgp_vertex *v, mgp_label label) { } const auto result = std::visit([label_id](auto &impl) { return impl.AddLabel(label_id); }, v->impl); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) { + auto v_impl = v->getImpl(); + v->graph->getImpl()->TextIndexUpdateVertex(v_impl); + } if (result.HasError()) { switch (result.GetError()) { @@ -1998,6 +2015,10 @@ mgp_error mgp_vertex_remove_label(struct mgp_vertex *v, mgp_label label) { throw ImmutableObjectException{"Cannot remove a label from an immutable vertex!"}; } const auto result = std::visit([label_id](auto &impl) { return impl.RemoveLabel(label_id); }, v->impl); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) { + auto v_impl = v->getImpl(); + v->graph->getImpl()->TextIndexUpdateVertex(v_impl, {label_id}); + } if (result.HasError()) { switch (result.GetError()) { @@ -2590,7 +2611,7 @@ mgp_error mgp_edge_iter_properties(mgp_edge *e, mgp_memory *memory, mgp_properti mgp_error mgp_graph_get_vertex_by_id(mgp_graph *graph, mgp_vertex_id id, mgp_memory *memory, mgp_vertex **result) { return WrapExceptions( [graph, id, memory]() -> mgp_vertex * { - std::optional<memgraph::query::VertexAccessor> maybe_vertex = std::visit( + auto maybe_vertex = std::visit( [graph, id](auto *impl) { return impl->FindVertex(memgraph::storage::Gid::FromInt(id.as_int), graph->view); }, @@ -2967,6 +2988,10 @@ mgp_error mgp_graph_create_vertex(struct mgp_graph *graph, mgp_memory *memory, m } auto *vertex = std::visit( [=](auto *impl) { return NewRawMgpObject<mgp_vertex>(memory, impl->InsertVertex(), graph); }, graph->impl); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH)) { + auto v_impl = vertex->getImpl(); + vertex->graph->getImpl()->TextIndexAddVertex(v_impl); + } auto &ctx = graph->ctx; ctx->execution_stats[memgraph::query::ExecutionStats::Key::CREATED_NODES] += 1; @@ -3324,6 +3349,140 @@ mgp_error mgp_graph_delete_edge(struct mgp_graph *graph, mgp_edge *edge) { }); } +mgp_error mgp_graph_has_text_index(mgp_graph *graph, const char *index_name, int *result) { + return WrapExceptions([graph, index_name, result]() { + std::visit(memgraph::utils::Overloaded{ + [&](memgraph::query::DbAccessor *impl) { *result = impl->TextIndexExists(index_name); }, + [&](memgraph::query::SubgraphDbAccessor *impl) { + *result = impl->GetAccessor()->TextIndexExists(index_name); + }}, + graph->impl); + }); +} + +mgp_vertex *GetVertexByGid(mgp_graph *graph, memgraph::storage::Gid id, mgp_memory *memory) { + auto get_vertex_by_gid = memgraph::utils::Overloaded{ + [graph, id, memory](memgraph::query::DbAccessor *impl) -> mgp_vertex * { + auto maybe_vertex = impl->FindVertex(id, graph->view); + if (!maybe_vertex) return nullptr; + return NewRawMgpObject<mgp_vertex>(memory, *maybe_vertex, graph); + }, + [graph, id, memory](memgraph::query::SubgraphDbAccessor *impl) -> mgp_vertex * { + auto maybe_vertex = impl->FindVertex(id, graph->view); + if (!maybe_vertex) return nullptr; + return NewRawMgpObject<mgp_vertex>( + memory, memgraph::query::SubgraphVertexAccessor(*maybe_vertex, impl->getGraph()), graph); + }}; + return std::visit(get_vertex_by_gid, graph->impl); +} + +void WrapTextSearch(mgp_graph *graph, mgp_memory *memory, mgp_map **result, + const std::vector<memgraph::storage::Gid> &vertex_ids = {}, + const std::optional<std::string> &error_msg = std::nullopt) { + if (const auto err = mgp_map_make_empty(memory, result); err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a mgp_map"); + } + + mgp_value *error_value; + if (error_msg.has_value()) { + if (const auto err = mgp_value_make_string(error_msg.value().data(), memory, &error_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a string mgp_value"); + } + } + + mgp_list *search_results{}; + if (const auto err = mgp_list_make_empty(vertex_ids.size(), memory, &search_results); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a mgp_list"); + } + + for (const auto &vertex_id : vertex_ids) { + mgp_value *vertex; + if (const auto err = mgp_value_make_vertex(GetVertexByGid(graph, vertex_id, memory), &vertex); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a vertex mgp_value"); + } + if (const auto err = mgp_list_append(search_results, vertex); err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error( + "Retrieving text search results failed during insertion of the mgp_value into the result list"); + } + } + + mgp_value *search_results_value; + if (const auto err = mgp_value_make_list(search_results, &search_results_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a list mgp_value"); + } + + if (error_msg.has_value()) { + if (const auto err = mgp_map_insert(*result, "error_msg", error_value); err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text index search error failed during insertion into mgp_map"); + } + return; + } + + if (const auto err = mgp_map_insert(*result, "search_results", search_results_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text index search results failed during insertion into mgp_map"); + } +} + +void WrapTextIndexAggregation(mgp_memory *memory, mgp_map **result, const std::string &aggregation_result, + const std::optional<std::string> &error_msg = std::nullopt) { + if (const auto err = mgp_map_make_empty(memory, result); err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a mgp_map"); + } + + mgp_value *aggregation_result_or_error_value; + if (const auto err = mgp_value_make_string(error_msg.value_or(aggregation_result).data(), memory, + &aggregation_result_or_error_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a string mgp_value"); + } + + if (error_msg.has_value()) { + if (const auto err = mgp_map_insert(*result, "error_msg", aggregation_result_or_error_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text index aggregation error failed during insertion into mgp_map"); + } + return; + } + + if (const auto err = mgp_map_insert(*result, "aggregation_results", aggregation_result_or_error_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text index aggregation results failed during insertion into mgp_map"); + } +} + +mgp_error mgp_graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query, + text_search_mode search_mode, mgp_memory *memory, mgp_map **result) { + return WrapExceptions([graph, memory, index_name, search_query, search_mode, result]() { + std::vector<memgraph::storage::Gid> found_vertices_ids; + std::optional<std::string> error_msg = std::nullopt; + try { + found_vertices_ids = graph->getImpl()->TextIndexSearch(index_name, search_query, search_mode); + } catch (memgraph::query::QueryException &e) { + error_msg = e.what(); + } + WrapTextSearch(graph, memory, result, found_vertices_ids, error_msg); + }); +} + +mgp_error mgp_graph_aggregate_over_text_index(mgp_graph *graph, const char *index_name, const char *search_query, + const char *aggregation_query, mgp_memory *memory, mgp_map **result) { + return WrapExceptions([graph, memory, index_name, search_query, aggregation_query, result]() { + std::string search_results; + std::optional<std::string> error_msg = std::nullopt; + try { + search_results = graph->getImpl()->TextIndexAggregate(index_name, search_query, aggregation_query); + } catch (memgraph::query::QueryException &e) { + error_msg = e.what(); + } + WrapTextIndexAggregation(memory, result, search_results, error_msg); + }); +} + #ifdef MG_ENTERPRISE namespace { void NextPermitted(mgp_vertices_iterator &it) { diff --git a/src/query/procedure/mg_procedure_impl.hpp b/src/query/procedure/mg_procedure_impl.hpp index 17cac4eca..a91b4386c 100644 --- a/src/query/procedure/mg_procedure_impl.hpp +++ b/src/query/procedure/mg_procedure_impl.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -562,6 +562,13 @@ struct mgp_graph { memgraph::query::ExecutionContext *ctx; memgraph::storage::StorageMode storage_mode; + memgraph::query::DbAccessor *getImpl() const { + return std::visit( + memgraph::utils::Overloaded{[](memgraph::query::DbAccessor *impl) { return impl; }, + [](memgraph::query::SubgraphDbAccessor *impl) { return impl->GetAccessor(); }}, + this->impl); + } + static mgp_graph WritableGraph(memgraph::query::DbAccessor &acc, memgraph::storage::View view, memgraph::query::ExecutionContext &ctx) { return mgp_graph{&acc, view, &ctx, acc.GetStorageMode()}; diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 437389128..151a33dad 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -191,9 +191,9 @@ std::shared_ptr<Trigger::TriggerPlan> Trigger::GetPlan(DbAccessor *db_accessor) return trigger_plan_; } -void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, - const double max_execution_time_sec, std::atomic<bool> *is_shutting_down, - std::atomic<TransactionStatus> *transaction_status, const TriggerContext &context) const { +void Trigger::Execute(DbAccessor *dba, utils::MemoryResource *execution_memory, const double max_execution_time_sec, + std::atomic<bool> *is_shutting_down, std::atomic<TransactionStatus> *transaction_status, + const TriggerContext &context) const { if (!context.ShouldEventTrigger(event_type_)) { return; } @@ -214,22 +214,7 @@ void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution ctx.is_shutting_down = is_shutting_down; ctx.transaction_status = transaction_status; ctx.is_profile_query = false; - - // Set up temporary memory for a single Pull. Initial memory comes from the - // stack. 256 KiB should fit on the stack and should be more than enough for a - // single `Pull`. - static constexpr size_t stack_size = 256UL * 1024UL; - char stack_data[stack_size]; - - // We can throw on every query because a simple queries for deleting will use only - // the stack allocated buffer. - // Also, we want to throw only when the query engine requests more memory and not the storage - // so we add the exception to the allocator. - utils::ResourceWithOutOfMemoryException resource_with_exception; - utils::MonotonicBufferResource monotonic_memory(&stack_data[0], stack_size, &resource_with_exception); - // TODO (mferencevic): Tune the parameters accordingly. - utils::PoolResource pool_memory(128, 1024, &monotonic_memory); - ctx.evaluation_context.memory = &pool_memory; + ctx.evaluation_context.memory = execution_memory; auto cursor = plan.plan().MakeCursor(execution_memory); Frame frame{plan.symbol_table().max_position(), execution_memory}; diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp index 91c74579e..24bbf50ee 100644 --- a/src/query/trigger.hpp +++ b/src/query/trigger.hpp @@ -39,7 +39,7 @@ struct Trigger { utils::SkipList<QueryCacheEntry> *query_cache, DbAccessor *db_accessor, const InterpreterConfig::Query &query_config, std::shared_ptr<QueryUserOrRole> owner); - void Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, double max_execution_time_sec, + void Execute(DbAccessor *dba, utils::MemoryResource *execution_memory, double max_execution_time_sec, std::atomic<bool> *is_shutting_down, std::atomic<TransactionStatus> *transaction_status, const TriggerContext &context) const; diff --git a/src/storage/v2/CMakeLists.txt b/src/storage/v2/CMakeLists.txt index ec5108d63..49601eb54 100644 --- a/src/storage/v2/CMakeLists.txt +++ b/src/storage/v2/CMakeLists.txt @@ -20,6 +20,7 @@ add_library(mg-storage-v2 STATIC vertex_info_cache.cpp storage.cpp indices/indices.cpp + indices/text_index.cpp all_vertices_iterable.cpp edges_iterable.cpp vertices_iterable.cpp @@ -45,4 +46,5 @@ add_library(mg-storage-v2 STATIC inmemory/replication/recovery.cpp ) -target_link_libraries(mg-storage-v2 mg::replication Threads::Threads mg-utils gflags absl::flat_hash_map mg-rpc mg-slk mg-events mg-memory) +target_include_directories(mg-storage-v2 PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_link_libraries(mg-storage-v2 mg::replication Threads::Threads mg-utils mg-flags gflags absl::flat_hash_map mg-rpc mg-slk mg-events mg-memory mgcxx_text_search tantivy_text_search) diff --git a/src/storage/v2/config.hpp b/src/storage/v2/config.hpp index b2a55a40a..419f29b85 100644 --- a/src/storage/v2/config.hpp +++ b/src/storage/v2/config.hpp @@ -37,6 +37,7 @@ struct SalientConfig { struct Items { bool properties_on_edges{true}; bool enable_schema_metadata{false}; + bool delta_on_identical_property_update{true}; friend bool operator==(const Items &lrh, const Items &rhs) = default; } items; diff --git a/src/storage/v2/disk/durable_metadata.cpp b/src/storage/v2/disk/durable_metadata.cpp index fe2c558ae..13d515af2 100644 --- a/src/storage/v2/disk/durable_metadata.cpp +++ b/src/storage/v2/disk/durable_metadata.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -26,6 +26,7 @@ constexpr const char *kVertexCountDescr = "vertex_count"; constexpr const char *kEdgeDountDescr = "edge_count"; constexpr const char *kLabelIndexStr = "label_index"; constexpr const char *kLabelPropertyIndexStr = "label_property_index"; +constexpr const char *kTextIndexStr = "text_index"; constexpr const char *kExistenceConstraintsStr = "existence_constraints"; constexpr const char *kUniqueConstraintsStr = "unique_constraints"; } // namespace @@ -144,6 +145,31 @@ bool DurableMetadata::PersistLabelPropertyIndexAndExistenceConstraintDeletion(La return true; } +bool DurableMetadata::PersistTextIndexCreation(const std::string &index_name, LabelId label) { + const std::string index_name_label_pair = index_name + "," + label.ToString(); + if (auto text_index_store = durability_kvstore_.Get(kTextIndexStr); text_index_store.has_value()) { + std::string &value = text_index_store.value(); + value += "|"; + value += index_name_label_pair; + return durability_kvstore_.Put(kTextIndexStr, value); + } + return durability_kvstore_.Put(kTextIndexStr, index_name_label_pair); +} + +bool DurableMetadata::PersistTextIndexDeletion(const std::string &index_name, LabelId label) { + const std::string index_name_label_pair = index_name + "," + label.ToString(); + if (auto text_index_store = durability_kvstore_.Get(kTextIndexStr); text_index_store.has_value()) { + const std::string &value = text_index_store.value(); + std::vector<std::string> text_indices = utils::Split(value, "|"); + std::erase(text_indices, index_name_label_pair); + if (text_indices.empty()) { + return durability_kvstore_.Delete(kTextIndexStr); + } + return durability_kvstore_.Put(kTextIndexStr, utils::Join(text_indices, "|")); + } + return true; +} + bool DurableMetadata::PersistUniqueConstraintCreation(LabelId label, const std::set<PropertyId> &properties) { const std::string entry = utils::GetKeyForUniqueConstraintsDurability(label, properties); diff --git a/src/storage/v2/disk/durable_metadata.hpp b/src/storage/v2/disk/durable_metadata.hpp index 168cce469..4aaa8a707 100644 --- a/src/storage/v2/disk/durable_metadata.hpp +++ b/src/storage/v2/disk/durable_metadata.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -53,6 +53,10 @@ class DurableMetadata { bool PersistLabelPropertyIndexAndExistenceConstraintDeletion(LabelId label, PropertyId property, const std::string &key); + bool PersistTextIndexCreation(const std::string &index_name, LabelId label); + + bool PersistTextIndexDeletion(const std::string &index_name, LabelId label); + bool PersistUniqueConstraintCreation(LabelId label, const std::set<PropertyId> &properties); bool PersistUniqueConstraintDeletion(LabelId label, const std::set<PropertyId> &properties); diff --git a/src/storage/v2/disk/storage.cpp b/src/storage/v2/disk/storage.cpp index 21fa5ecc7..4dbd248f7 100644 --- a/src/storage/v2/disk/storage.cpp +++ b/src/storage/v2/disk/storage.cpp @@ -29,6 +29,8 @@ #include <rocksdb/utilities/transaction.h> #include <rocksdb/utilities/transaction_db.h> +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" #include "kvstore/kvstore.hpp" #include "spdlog/spdlog.h" #include "storage/v2/constraints/unique_constraints.hpp" @@ -856,6 +858,7 @@ StorageInfo DiskStorage::GetInfo(memgraph::replication_coordination_glue::Replic const auto &lbl = access->ListAllIndices(); info.label_indices = lbl.label.size(); info.label_property_indices = lbl.label_property.size(); + info.text_indices = lbl.text_indices.size(); const auto &con = access->ListAllConstraints(); info.existence_constraints = con.existence.size(); info.unique_constraints = con.unique.size(); @@ -1670,6 +1673,18 @@ utils::BasicResult<StorageManipulationError, void> DiskStorage::DiskAccessor::Co case MetadataDelta::Action::LABEL_PROPERTY_INDEX_STATS_CLEAR: { throw utils::NotYetImplemented("ClearIndexStats(stats) is not implemented for DiskStorage."); } break; + case MetadataDelta::Action::TEXT_INDEX_CREATE: { + const auto &info = md_delta.text_index; + if (!disk_storage->durable_metadata_.PersistTextIndexCreation(info.index_name, info.label)) { + return StorageManipulationError{PersistenceError{}}; + } + } break; + case MetadataDelta::Action::TEXT_INDEX_DROP: { + const auto &info = md_delta.text_index; + if (!disk_storage->durable_metadata_.PersistTextIndexDeletion(info.index_name, info.label)) { + return StorageManipulationError{PersistenceError{}}; + } + } break; case MetadataDelta::Action::EXISTENCE_CONSTRAINT_CREATE: { const auto &info = md_delta.label_property; if (!disk_storage->durable_metadata_.PersistLabelPropertyIndexAndExistenceConstraintCreation( @@ -1768,6 +1783,9 @@ utils::BasicResult<StorageManipulationError, void> DiskStorage::DiskAccessor::Co return StorageManipulationError{SerializationError{}}; } spdlog::trace("rocksdb: Commit successful"); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + disk_storage->indices_.text_index_.Commit(); + } is_transaction_active_ = false; @@ -1886,6 +1904,9 @@ void DiskStorage::DiskAccessor::Abort() { // query_plan_accumulate_aggregate.cpp transaction_.disk_transaction_->Rollback(); transaction_.disk_transaction_->ClearSnapshot(); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + storage_->indices_.text_index_.Rollback(); + } delete transaction_.disk_transaction_; transaction_.disk_transaction_ = nullptr; is_transaction_active_ = false; @@ -2092,7 +2113,11 @@ IndicesInfo DiskStorage::DiskAccessor::ListAllIndices() const { auto *disk_label_index = static_cast<DiskLabelIndex *>(on_disk->indices_.label_index_.get()); auto *disk_label_property_index = static_cast<DiskLabelPropertyIndex *>(on_disk->indices_.label_property_index_.get()); - return {disk_label_index->ListIndices(), disk_label_property_index->ListIndices()}; + auto &text_index = storage_->indices_.text_index_; + return {disk_label_index->ListIndices(), + disk_label_property_index->ListIndices(), + {/* edge type indices */}, + text_index.ListIndices()}; } ConstraintsInfo DiskStorage::DiskAccessor::ListAllConstraints() const { auto *disk_storage = static_cast<DiskStorage *>(storage_); diff --git a/src/storage/v2/durability/durability.cpp b/src/storage/v2/durability/durability.cpp index fbbedbee5..db8bcd93b 100644 --- a/src/storage/v2/durability/durability.cpp +++ b/src/storage/v2/durability/durability.cpp @@ -151,7 +151,8 @@ void RecoverConstraints(const RecoveredIndicesAndConstraints::ConstraintsMetadat void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadata &indices_metadata, Indices *indices, utils::SkipList<Vertex> *vertices, NameIdMapper *name_id_mapper, - const std::optional<ParallelizedSchemaCreationInfo> ¶llel_exec_info) { + const std::optional<ParallelizedSchemaCreationInfo> ¶llel_exec_info, + const std::optional<std::filesystem::path> &storage_dir) { spdlog::info("Recreating indices from metadata."); // Recover label indices. @@ -211,6 +212,26 @@ void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadat } spdlog::info("Edge-type indices are recreated."); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + // Recover text indices. + spdlog::info("Recreating {} text indices from metadata.", indices_metadata.text_indices.size()); + auto &mem_text_index = indices->text_index_; + for (const auto &[index_name, label] : indices_metadata.text_indices) { + try { + if (!storage_dir.has_value()) { + throw RecoveryFailure("There must exist a storage directory in order to recover text indices!"); + } + + mem_text_index.RecoverIndex(storage_dir.value(), index_name, label, vertices->access(), name_id_mapper); + } catch (...) { + throw RecoveryFailure("The text index must be created here!"); + } + spdlog::info("Text index {} on :{} is recreated from metadata", index_name, + name_id_mapper->IdToName(label.AsUint())); + } + spdlog::info("Text indices are recreated."); + } + spdlog::info("Indices are recreated."); } @@ -331,8 +352,13 @@ std::optional<RecoveryInfo> Recovery::RecoverData(std::string *uuid, Replication repl_storage_state.epoch_.SetEpoch(std::move(recovered_snapshot->snapshot_info.epoch_id)); if (!utils::DirExists(wal_directory_)) { + std::optional<std::filesystem::path> storage_dir = std::nullopt; + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + storage_dir = config.durability.storage_directory; + } + RecoverIndicesAndStats(indices_constraints.indices, indices, vertices, name_id_mapper, - GetParallelExecInfoIndices(recovery_info, config)); + GetParallelExecInfoIndices(recovery_info, config), storage_dir); RecoverConstraints(indices_constraints.constraints, constraints, vertices, name_id_mapper, GetParallelExecInfo(recovery_info, config)); return recovered_snapshot->recovery_info; @@ -467,8 +493,13 @@ std::optional<RecoveryInfo> Recovery::RecoverData(std::string *uuid, Replication spdlog::info("All necessary WAL files are loaded successfully."); } + std::optional<std::filesystem::path> storage_dir = std::nullopt; + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + storage_dir = config.durability.storage_directory; + } + RecoverIndicesAndStats(indices_constraints.indices, indices, vertices, name_id_mapper, - GetParallelExecInfoIndices(recovery_info, config)); + GetParallelExecInfoIndices(recovery_info, config), storage_dir); RecoverConstraints(indices_constraints.constraints, constraints, vertices, name_id_mapper, GetParallelExecInfo(recovery_info, config)); diff --git a/src/storage/v2/durability/durability.hpp b/src/storage/v2/durability/durability.hpp index 97e2c7efc..5170b3b04 100644 --- a/src/storage/v2/durability/durability.hpp +++ b/src/storage/v2/durability/durability.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -102,7 +102,8 @@ std::optional<std::vector<WalDurabilityInfo>> GetWalFiles(const std::filesystem: /// @throw RecoveryFailure void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadata &indices_metadata, Indices *indices, utils::SkipList<Vertex> *vertices, NameIdMapper *name_id_mapper, - const std::optional<ParallelizedSchemaCreationInfo> ¶llel_exec_info = std::nullopt); + const std::optional<ParallelizedSchemaCreationInfo> ¶llel_exec_info = std::nullopt, + const std::optional<std::filesystem::path> &storage_dir = std::nullopt); // Helper function used to recover all discovered constraints. The // constraints must be recovered after the data recovery is done diff --git a/src/storage/v2/durability/marker.hpp b/src/storage/v2/durability/marker.hpp index ac0cc074d..18d693e51 100644 --- a/src/storage/v2/durability/marker.hpp +++ b/src/storage/v2/durability/marker.hpp @@ -64,6 +64,8 @@ enum class Marker : uint8_t { DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR = 0x64, DELTA_EDGE_TYPE_INDEX_CREATE = 0x65, DELTA_EDGE_TYPE_INDEX_DROP = 0x66, + DELTA_TEXT_INDEX_CREATE = 0x67, + DELTA_TEXT_INDEX_DROP = 0x68, VALUE_FALSE = 0x00, VALUE_TRUE = 0xff, @@ -110,6 +112,8 @@ static const Marker kMarkersAll[] = { Marker::DELTA_LABEL_PROPERTY_INDEX_DROP, Marker::DELTA_EDGE_TYPE_INDEX_CREATE, Marker::DELTA_EDGE_TYPE_INDEX_DROP, + Marker::DELTA_TEXT_INDEX_CREATE, + Marker::DELTA_TEXT_INDEX_DROP, Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE, Marker::DELTA_EXISTENCE_CONSTRAINT_DROP, Marker::DELTA_UNIQUE_CONSTRAINT_CREATE, diff --git a/src/storage/v2/durability/metadata.hpp b/src/storage/v2/durability/metadata.hpp index c8ee27b2f..f36fc068d 100644 --- a/src/storage/v2/durability/metadata.hpp +++ b/src/storage/v2/durability/metadata.hpp @@ -44,6 +44,7 @@ struct RecoveredIndicesAndConstraints { std::vector<std::pair<LabelId, LabelIndexStats>> label_stats; std::vector<std::pair<LabelId, std::pair<PropertyId, LabelPropertyIndexStats>>> label_property_stats; std::vector<EdgeTypeId> edge; + std::vector<std::pair<std::string, LabelId>> text_indices; } indices; struct ConstraintsMetadata { diff --git a/src/storage/v2/durability/serialization.cpp b/src/storage/v2/durability/serialization.cpp index 28ba64943..becfa7f34 100644 --- a/src/storage/v2/durability/serialization.cpp +++ b/src/storage/v2/durability/serialization.cpp @@ -353,6 +353,8 @@ std::optional<PropertyValue> Decoder::ReadPropertyValue() { case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP: case Marker::DELTA_EDGE_TYPE_INDEX_CREATE: case Marker::DELTA_EDGE_TYPE_INDEX_DROP: + case Marker::DELTA_TEXT_INDEX_CREATE: + case Marker::DELTA_TEXT_INDEX_DROP: case Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE: case Marker::DELTA_EXISTENCE_CONSTRAINT_DROP: case Marker::DELTA_UNIQUE_CONSTRAINT_CREATE: @@ -459,6 +461,8 @@ bool Decoder::SkipPropertyValue() { case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP: case Marker::DELTA_EDGE_TYPE_INDEX_CREATE: case Marker::DELTA_EDGE_TYPE_INDEX_DROP: + case Marker::DELTA_TEXT_INDEX_CREATE: + case Marker::DELTA_TEXT_INDEX_DROP: case Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE: case Marker::DELTA_EXISTENCE_CONSTRAINT_DROP: case Marker::DELTA_UNIQUE_CONSTRAINT_CREATE: diff --git a/src/storage/v2/durability/snapshot.cpp b/src/storage/v2/durability/snapshot.cpp index 5fea3dfa5..0e3bb96e3 100644 --- a/src/storage/v2/durability/snapshot.cpp +++ b/src/storage/v2/durability/snapshot.cpp @@ -13,6 +13,8 @@ #include <thread> +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" #include "spdlog/spdlog.h" #include "storage/v2/durability/exceptions.hpp" #include "storage/v2/durability/paths.hpp" @@ -2004,6 +2006,24 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis spdlog::info("Metadata of edge-type indices are recovered."); } + // Recover text indices. + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + auto size = snapshot.ReadUint(); + if (!size) throw RecoveryFailure("Couldn't recover the number of text indices!"); + spdlog::info("Recovering metadata of {} text indices.", *size); + for (uint64_t i = 0; i < *size; ++i) { + auto index_name = snapshot.ReadString(); + if (!index_name.has_value()) throw RecoveryFailure("Couldn't read text index name!"); + auto label = snapshot.ReadUint(); + if (!label) throw RecoveryFailure("Couldn't read text index label!"); + AddRecoveredIndexConstraint(&indices_constraints.indices.text_indices, + {index_name.value(), get_label_from_id(*label)}, "The text index already exists!"); + SPDLOG_TRACE("Recovered metadata of text index {} for :{}", index_name.value(), + name_id_mapper->IdToName(snapshot_id_map.at(*label))); + } + spdlog::info("Metadata of text indices are recovered."); + } + spdlog::info("Metadata of indices are recovered."); } @@ -2493,6 +2513,16 @@ void CreateSnapshot(Storage *storage, Transaction *transaction, const std::files write_mapping(item); } } + + // Write text indices. + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + auto text_indices = storage->indices_.text_index_.ListIndices(); + snapshot.WriteUint(text_indices.size()); + for (const auto &[index_name, label] : text_indices) { + snapshot.WriteString(index_name); + write_mapping(label); + } + } } // Write constraints. diff --git a/src/storage/v2/durability/storage_global_operation.hpp b/src/storage/v2/durability/storage_global_operation.hpp index 7dd635e9d..d9c77b6c6 100644 --- a/src/storage/v2/durability/storage_global_operation.hpp +++ b/src/storage/v2/durability/storage_global_operation.hpp @@ -25,6 +25,8 @@ enum class StorageMetadataOperation { LABEL_PROPERTY_INDEX_STATS_CLEAR, EDGE_TYPE_INDEX_CREATE, EDGE_TYPE_INDEX_DROP, + TEXT_INDEX_CREATE, + TEXT_INDEX_DROP, EXISTENCE_CONSTRAINT_CREATE, EXISTENCE_CONSTRAINT_DROP, UNIQUE_CONSTRAINT_CREATE, diff --git a/src/storage/v2/durability/wal.cpp b/src/storage/v2/durability/wal.cpp index 5c40ab1c5..c684d818c 100644 --- a/src/storage/v2/durability/wal.cpp +++ b/src/storage/v2/durability/wal.cpp @@ -99,6 +99,10 @@ Marker OperationToMarker(StorageMetadataOperation operation) { return Marker::DELTA_EDGE_TYPE_INDEX_CREATE; case StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: return Marker::DELTA_EDGE_TYPE_INDEX_DROP; + case StorageMetadataOperation::TEXT_INDEX_CREATE: + return Marker::DELTA_TEXT_INDEX_CREATE; + case StorageMetadataOperation::TEXT_INDEX_DROP: + return Marker::DELTA_TEXT_INDEX_DROP; case StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: return Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE; case StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP: @@ -172,6 +176,10 @@ WalDeltaData::Type MarkerToWalDeltaDataType(Marker marker) { return WalDeltaData::Type::LABEL_PROPERTY_INDEX_CREATE; case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP: return WalDeltaData::Type::LABEL_PROPERTY_INDEX_DROP; + case Marker::DELTA_TEXT_INDEX_CREATE: + return WalDeltaData::Type::TEXT_INDEX_CREATE; + case Marker::DELTA_TEXT_INDEX_DROP: + return WalDeltaData::Type::TEXT_INDEX_DROP; case Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_SET: return WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_SET; case Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR: @@ -382,6 +390,21 @@ WalDeltaData ReadSkipWalDeltaData(BaseDecoder *decoder) { if (!decoder->SkipString()) throw RecoveryFailure("Invalid WAL data!"); } } + break; + } + case WalDeltaData::Type::TEXT_INDEX_CREATE: + case WalDeltaData::Type::TEXT_INDEX_DROP: { + if constexpr (read_data) { + auto index_name = decoder->ReadString(); + if (!index_name) throw RecoveryFailure("Invalid WAL data!"); + delta.operation_text.index_name = std::move(*index_name); + auto label = decoder->ReadString(); + if (!label) throw RecoveryFailure("Invalid WAL data!"); + delta.operation_text.label = std::move(*label); + } else { + if (!decoder->SkipString() || !decoder->SkipString()) throw RecoveryFailure("Invalid WAL data!"); + } + break; } } @@ -529,6 +552,12 @@ bool operator==(const WalDeltaData &a, const WalDeltaData &b) { case WalDeltaData::Type::LABEL_PROPERTY_INDEX_CREATE: case WalDeltaData::Type::LABEL_PROPERTY_INDEX_DROP: + case WalDeltaData::Type::TEXT_INDEX_CREATE: + return a.operation_text.index_name == b.operation_text.index_name && + a.operation_text.label == b.operation_text.label; + case WalDeltaData::Type::TEXT_INDEX_DROP: + return a.operation_text.index_name == b.operation_text.index_name && + a.operation_text.label == b.operation_text.label; case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: case WalDeltaData::Type::EXISTENCE_CONSTRAINT_DROP: return a.operation_label_property.label == b.operation_label_property.label && @@ -675,7 +704,8 @@ void EncodeTransactionEnd(BaseEncoder *encoder, uint64_t timestamp) { } void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation, - LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats, + const std::optional<std::string> text_index_name, LabelId label, + const std::set<PropertyId> &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp) { encoder->WriteMarker(Marker::SECTION_DELTA); encoder->WriteUint(timestamp); @@ -731,6 +761,14 @@ void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, Storage case StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: { MG_ASSERT(false, "Invalid function call!"); } + case StorageMetadataOperation::TEXT_INDEX_CREATE: + case StorageMetadataOperation::TEXT_INDEX_DROP: { + MG_ASSERT(text_index_name.has_value(), "Text indices must be named!"); + encoder->WriteMarker(OperationToMarker(operation)); + encoder->WriteString(text_index_name.value()); + encoder->WriteString(name_id_mapper->IdToName(label.AsUint())); + break; + } } } @@ -752,6 +790,8 @@ void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, Storage case StorageMetadataOperation::LABEL_INDEX_STATS_SET: case StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE: case StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP: + case StorageMetadataOperation::TEXT_INDEX_CREATE: + case StorageMetadataOperation::TEXT_INDEX_DROP: case StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: case StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP: case StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_SET: @@ -1000,6 +1040,20 @@ RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConst "The label index stats doesn't exist!"); break; } + case WalDeltaData::Type::TEXT_INDEX_CREATE: { + auto index_name = delta.operation_text.index_name; + auto label = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_text.label)); + AddRecoveredIndexConstraint(&indices_constraints->indices.text_indices, {index_name, label}, + "The text index already exists!"); + break; + } + case WalDeltaData::Type::TEXT_INDEX_DROP: { + auto index_name = delta.operation_text.index_name; + auto label = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_text.label)); + RemoveRecoveredIndexConstraint(&indices_constraints->indices.text_indices, {index_name, label}, + "The text index doesn't exist!"); + break; + } case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: { auto label_id = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_label_property.label)); auto property_id = PropertyId::FromUint(name_id_mapper->NameToId(delta.operation_label_property.property)); @@ -1148,10 +1202,11 @@ void WalFile::AppendTransactionEnd(uint64_t timestamp) { UpdateStats(timestamp); } -void WalFile::AppendOperation(StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties, - const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, - uint64_t timestamp) { - EncodeOperation(&wal_, name_id_mapper_, operation, label, properties, stats, property_stats, timestamp); +void WalFile::AppendOperation(StorageMetadataOperation operation, const std::optional<std::string> text_index_name, + LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats, + const LabelPropertyIndexStats &property_stats, uint64_t timestamp) { + EncodeOperation(&wal_, name_id_mapper_, operation, text_index_name, label, properties, stats, property_stats, + timestamp); UpdateStats(timestamp); } diff --git a/src/storage/v2/durability/wal.hpp b/src/storage/v2/durability/wal.hpp index 516487e0d..4990e6979 100644 --- a/src/storage/v2/durability/wal.hpp +++ b/src/storage/v2/durability/wal.hpp @@ -69,6 +69,8 @@ struct WalDeltaData { LABEL_PROPERTY_INDEX_STATS_CLEAR, EDGE_INDEX_CREATE, EDGE_INDEX_DROP, + TEXT_INDEX_CREATE, + TEXT_INDEX_DROP, EXISTENCE_CONSTRAINT_CREATE, EXISTENCE_CONSTRAINT_DROP, UNIQUE_CONSTRAINT_CREATE, @@ -127,6 +129,11 @@ struct WalDeltaData { std::string property; std::string stats; } operation_label_property_stats; + + struct { + std::string index_name; + std::string label; + } operation_text; }; bool operator==(const WalDeltaData &a, const WalDeltaData &b); @@ -163,6 +170,8 @@ constexpr bool IsWalDeltaDataTypeTransactionEndVersion15(const WalDeltaData::Typ case WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_CLEAR: case WalDeltaData::Type::EDGE_INDEX_CREATE: case WalDeltaData::Type::EDGE_INDEX_DROP: + case WalDeltaData::Type::TEXT_INDEX_CREATE: + case WalDeltaData::Type::TEXT_INDEX_DROP: case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: case WalDeltaData::Type::EXISTENCE_CONSTRAINT_DROP: case WalDeltaData::Type::UNIQUE_CONSTRAINT_CREATE: @@ -213,7 +222,8 @@ void EncodeTransactionEnd(BaseEncoder *encoder, uint64_t timestamp); /// Function used to encode non-transactional operation. void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation, - LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats, + const std::optional<std::string> text_index_name, LabelId label, + const std::set<PropertyId> &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp); void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation, @@ -248,8 +258,9 @@ class WalFile { void AppendTransactionEnd(uint64_t timestamp); - void AppendOperation(StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties, - const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp); + void AppendOperation(StorageMetadataOperation operation, const std::optional<std::string> text_index_name, + LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats, + const LabelPropertyIndexStats &property_stats, uint64_t timestamp); void AppendOperation(StorageMetadataOperation operation, EdgeTypeId edge_type, uint64_t timestamp); diff --git a/src/storage/v2/edge_accessor.cpp b/src/storage/v2/edge_accessor.cpp index 62a9f4bcd..ba354371e 100644 --- a/src/storage/v2/edge_accessor.cpp +++ b/src/storage/v2/edge_accessor.cpp @@ -130,9 +130,13 @@ Result<storage::PropertyValue> EdgeAccessor::SetProperty(PropertyId property, co if (edge_.ptr->deleted) return Error::DELETED_OBJECT; using ReturnType = decltype(edge_.ptr->properties.GetProperty(property)); std::optional<ReturnType> current_value; + const bool skip_duplicate_write = !storage_->config_.salient.items.delta_on_identical_property_update; utils::AtomicMemoryBlock atomic_memory_block{ - [¤t_value, &property, &value, transaction = transaction_, edge = edge_]() { + [¤t_value, &property, &value, transaction = transaction_, edge = edge_, skip_duplicate_write]() { current_value.emplace(edge.ptr->properties.GetProperty(property)); + if (skip_duplicate_write && current_value == value) { + return; + } // We could skip setting the value if the previous one is the same to the new // one. This would save some memory as a delta would not be created as well as // avoid copying the value. The reason we are not doing that is because the @@ -184,12 +188,14 @@ Result<std::vector<std::tuple<PropertyId, PropertyValue, PropertyValue>>> EdgeAc if (edge_.ptr->deleted) return Error::DELETED_OBJECT; + const bool skip_duplicate_write = !storage_->config_.salient.items.delta_on_identical_property_update; using ReturnType = decltype(edge_.ptr->properties.UpdateProperties(properties)); std::optional<ReturnType> id_old_new_change; utils::AtomicMemoryBlock atomic_memory_block{ - [transaction_ = transaction_, edge_ = edge_, &properties, &id_old_new_change]() { + [transaction_ = transaction_, edge_ = edge_, &properties, &id_old_new_change, skip_duplicate_write]() { id_old_new_change.emplace(edge_.ptr->properties.UpdateProperties(properties)); for (auto &[property, old_value, new_value] : *id_old_new_change) { + if (skip_duplicate_write && old_value == new_value) continue; CreateAndLinkDelta(transaction_, edge_.ptr, Delta::SetPropertyTag(), property, std::move(old_value)); } }}; diff --git a/src/storage/v2/indices/indices.cpp b/src/storage/v2/indices/indices.cpp index 6068f888f..1cbde2362 100644 --- a/src/storage/v2/indices/indices.cpp +++ b/src/storage/v2/indices/indices.cpp @@ -16,6 +16,7 @@ #include "storage/v2/inmemory/edge_type_index.hpp" #include "storage/v2/inmemory/label_index.hpp" #include "storage/v2/inmemory/label_property_index.hpp" +#include "storage/v2/storage.hpp" namespace memgraph::storage { diff --git a/src/storage/v2/indices/indices.hpp b/src/storage/v2/indices/indices.hpp index 40cff577f..6f1bc44db 100644 --- a/src/storage/v2/indices/indices.hpp +++ b/src/storage/v2/indices/indices.hpp @@ -18,6 +18,7 @@ #include "storage/v2/indices/edge_type_index.hpp" #include "storage/v2/indices/label_index.hpp" #include "storage/v2/indices/label_property_index.hpp" +#include "storage/v2/indices/text_index.hpp" #include "storage/v2/storage_mode.hpp" namespace memgraph::storage { @@ -31,12 +32,12 @@ struct Indices { Indices &operator=(Indices &&) = delete; ~Indices() = default; - /// This function should be called from garbage collection to clean-up the + /// This function should be called from garbage collection to clean up the /// index. /// TODO: unused in disk indices void RemoveObsoleteEntries(uint64_t oldest_active_start_timestamp, std::stop_token token) const; - /// Surgical removal of entries that was inserted this transaction + /// Surgical removal of entries that were inserted in this transaction /// TODO: unused in disk indices void AbortEntries(LabelId labelId, std::span<Vertex *const> vertices, uint64_t exact_start_timestamp) const; void AbortEntries(PropertyId property, std::span<std::pair<PropertyValue, Vertex *> const> vertices, @@ -71,6 +72,7 @@ struct Indices { std::unique_ptr<LabelIndex> label_index_; std::unique_ptr<LabelPropertyIndex> label_property_index_; std::unique_ptr<EdgeTypeIndex> edge_type_index_; + mutable TextIndex text_index_; }; } // namespace memgraph::storage diff --git a/src/storage/v2/indices/text_index.cpp b/src/storage/v2/indices/text_index.cpp new file mode 100644 index 000000000..1c9488097 --- /dev/null +++ b/src/storage/v2/indices/text_index.cpp @@ -0,0 +1,430 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "storage/v2/indices/text_index.hpp" +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" +#include "query/db_accessor.hpp" +#include "storage/v2/view.hpp" +#include "text_search.hpp" + +namespace memgraph::storage { + +std::string GetPropertyName(PropertyId prop_id, memgraph::query::DbAccessor *db) { return db->PropertyToName(prop_id); } + +std::string GetPropertyName(PropertyId prop_id, NameIdMapper *name_id_mapper) { + return name_id_mapper->IdToName(prop_id.AsUint()); +} + +inline std::string TextIndex::MakeIndexPath(const std::filesystem::path &storage_dir, const std::string &index_name) { + return (storage_dir / kTextIndicesDirectory / index_name).string(); +} + +void TextIndex::CreateEmptyIndex(const std::filesystem::path &storage_dir, const std::string &index_name, + LabelId label) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + if (index_.contains(index_name)) { + throw query::TextSearchException("Text index \"{}\" already exists.", index_name); + } + + try { + nlohmann::json mappings = {}; + mappings["properties"] = {}; + mappings["properties"]["metadata"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}}; + mappings["properties"]["data"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}}; + mappings["properties"]["all"] = {{"type", "text"}, {"fast", true}, {"stored", true}, {"text", true}}; + + index_.emplace(index_name, TextIndexData{.context_ = mgcxx::text_search::create_index( + MakeIndexPath(storage_dir, index_name), + mgcxx::text_search::IndexConfig{.mappings = mappings.dump()}), + .scope_ = label}); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + label_to_index_.emplace(label, index_name); +} + +template <typename T> +nlohmann::json TextIndex::SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver) { + nlohmann::json serialized_properties = nlohmann::json::value_t::object; + for (const auto &[prop_id, prop_value] : properties) { + switch (prop_value.type()) { + case PropertyValue::Type::Bool: + serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueBool(); + break; + case PropertyValue::Type::Int: + serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueInt(); + break; + case PropertyValue::Type::Double: + serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueDouble(); + break; + case PropertyValue::Type::String: + serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueString(); + break; + case PropertyValue::Type::Null: + case PropertyValue::Type::List: + case PropertyValue::Type::Map: + case PropertyValue::Type::TemporalData: + default: + continue; + } + } + + return serialized_properties; +} + +std::string TextIndex::StringifyProperties(const std::map<PropertyId, PropertyValue> &properties) { + std::vector<std::string> indexable_properties_as_string; + for (const auto &[_, prop_value] : properties) { + switch (prop_value.type()) { + case PropertyValue::Type::Bool: + indexable_properties_as_string.push_back(prop_value.ValueBool() ? "true" : "false"); + break; + case PropertyValue::Type::Int: + indexable_properties_as_string.push_back(std::to_string(prop_value.ValueInt())); + break; + case PropertyValue::Type::Double: + indexable_properties_as_string.push_back(std::to_string(prop_value.ValueDouble())); + break; + case PropertyValue::Type::String: + indexable_properties_as_string.push_back(prop_value.ValueString()); + break; + // NOTE: As the following types aren‘t indexed in Tantivy, they don’t appear in the property value string either. + case PropertyValue::Type::Null: + case PropertyValue::Type::List: + case PropertyValue::Type::Map: + case PropertyValue::Type::TemporalData: + default: + continue; + } + } + return utils::Join(indexable_properties_as_string, " "); +} + +std::vector<mgcxx::text_search::Context *> TextIndex::GetApplicableTextIndices(const std::vector<LabelId> &labels) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + std::vector<mgcxx::text_search::Context *> applicable_text_indices; + for (const auto &label : labels) { + if (label_to_index_.contains(label)) { + applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)).context_); + } + } + return applicable_text_indices; +} + +void TextIndex::LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties, + const std::string &property_values_as_str, + const std::vector<mgcxx::text_search::Context *> &applicable_text_indices) { + if (applicable_text_indices.empty()) { + return; + } + + // NOTE: Text indexes are presently all-property indices. If we allow text indexes restricted to specific properties, + // an indexable document should be created for each applicable index. + nlohmann::json document = {}; + document["data"] = properties; + document["all"] = property_values_as_str; + document["metadata"] = {}; + document["metadata"]["gid"] = gid; + document["metadata"]["deleted"] = false; + document["metadata"]["is_node"] = true; + + for (auto *index_context : applicable_text_indices) { + try { + mgcxx::text_search::add_document( + *index_context, + mgcxx::text_search::DocumentInput{ + .data = document.dump(-1, ' ', false, nlohmann::json::error_handler_t::replace)}, + kDoSkipCommit); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + } +} + +void TextIndex::CommitLoadedNodes(mgcxx::text_search::Context &index_context) { + // As CREATE TEXT INDEX (...) queries don’t accumulate deltas, db_transactional_accessor_->Commit() does not reach + // the code area where changes to indices are committed. To get around that without needing to commit text indices + // after every such query, we commit here. + try { + mgcxx::text_search::commit(index_context); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } +} + +void TextIndex::AddNode( + Vertex *vertex_after_update, NameIdMapper *name_id_mapper, + const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + auto applicable_text_indices = + maybe_applicable_text_indices.value_or(GetApplicableTextIndices(vertex_after_update->labels)); + if (applicable_text_indices.empty()) { + return; + } + + auto vertex_properties = vertex_after_update->properties.Properties(); + LoadNodeToTextIndices(vertex_after_update->gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper), + StringifyProperties(vertex_properties), applicable_text_indices); +} + +void TextIndex::UpdateNode(Vertex *vertex_after_update, NameIdMapper *name_id_mapper, + const std::vector<LabelId> &removed_labels) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + if (!removed_labels.empty()) { + auto indexes_to_remove_node_from = GetApplicableTextIndices(removed_labels); + RemoveNode(vertex_after_update, indexes_to_remove_node_from); + } + + auto applicable_text_indices = GetApplicableTextIndices(vertex_after_update->labels); + if (applicable_text_indices.empty()) return; + RemoveNode(vertex_after_update, applicable_text_indices); + AddNode(vertex_after_update, name_id_mapper, applicable_text_indices); +} + +void TextIndex::RemoveNode( + Vertex *vertex_after_update, + const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + auto search_node_to_be_deleted = + mgcxx::text_search::SearchInput{.search_query = fmt::format("metadata.gid:{}", vertex_after_update->gid.AsInt())}; + + for (auto *index_context : + maybe_applicable_text_indices.value_or(GetApplicableTextIndices(vertex_after_update->labels))) { + try { + mgcxx::text_search::delete_document(*index_context, search_node_to_be_deleted, kDoSkipCommit); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + } +} + +void TextIndex::CreateIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label, + memgraph::query::DbAccessor *db) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + CreateEmptyIndex(storage_dir, index_name, label); + + for (const auto &v : db->Vertices(View::NEW)) { + if (!v.HasLabel(View::NEW, label).GetValue()) { + continue; + } + + auto vertex_properties = v.Properties(View::NEW).GetValue(); + LoadNodeToTextIndices(v.Gid().AsInt(), SerializeProperties(vertex_properties, db), + StringifyProperties(vertex_properties), {&index_.at(index_name).context_}); + } + + CommitLoadedNodes(index_.at(index_name).context_); +} + +void TextIndex::RecoverIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label, + memgraph::utils::SkipList<Vertex>::Accessor vertices, NameIdMapper *name_id_mapper) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + // Clear Tantivy-internal files if they exist from previous sessions + std::filesystem::remove_all(storage_dir / kTextIndicesDirectory / index_name); + + CreateEmptyIndex(storage_dir, index_name, label); + + for (const auto &v : vertices) { + if (std::find(v.labels.begin(), v.labels.end(), label) == v.labels.end()) { + continue; + } + + auto vertex_properties = v.properties.Properties(); + LoadNodeToTextIndices(v.gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper), + StringifyProperties(vertex_properties), {&index_.at(index_name).context_}); + } + + CommitLoadedNodes(index_.at(index_name).context_); +} + +LabelId TextIndex::DropIndex(const std::filesystem::path &storage_dir, const std::string &index_name) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + if (!index_.contains(index_name)) { + throw query::TextSearchException("Text index \"{}\" doesn’t exist.", index_name); + } + + try { + mgcxx::text_search::drop_index(MakeIndexPath(storage_dir, index_name)); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + auto deleted_index_label = index_.at(index_name).scope_; + + index_.erase(index_name); + std::erase_if(label_to_index_, [index_name](const auto &item) { return item.second == index_name; }); + + return deleted_index_label; +} + +bool TextIndex::IndexExists(const std::string &index_name) const { return index_.contains(index_name); } + +mgcxx::text_search::SearchOutput TextIndex::SearchGivenProperties(const std::string &index_name, + const std::string &search_query) { + try { + return mgcxx::text_search::search( + index_.at(index_name).context_, + mgcxx::text_search::SearchInput{.search_query = search_query, .return_fields = {"metadata"}}); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + + return mgcxx::text_search::SearchOutput{}; +} + +mgcxx::text_search::SearchOutput TextIndex::RegexSearch(const std::string &index_name, + const std::string &search_query) { + try { + return mgcxx::text_search::regex_search( + index_.at(index_name).context_, + mgcxx::text_search::SearchInput{ + .search_fields = {"all"}, .search_query = search_query, .return_fields = {"metadata"}}); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + + return mgcxx::text_search::SearchOutput{}; +} + +mgcxx::text_search::SearchOutput TextIndex::SearchAllProperties(const std::string &index_name, + const std::string &search_query) { + try { + return mgcxx::text_search::search( + index_.at(index_name).context_, + mgcxx::text_search::SearchInput{ + .search_fields = {"all"}, .search_query = search_query, .return_fields = {"metadata"}}); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + + return mgcxx::text_search::SearchOutput{}; +} + +std::vector<Gid> TextIndex::Search(const std::string &index_name, const std::string &search_query, + text_search_mode search_mode) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + if (!index_.contains(index_name)) { + throw query::TextSearchException("Text index \"{}\" doesn’t exist.", index_name); + } + + mgcxx::text_search::SearchOutput search_results; + switch (search_mode) { + case text_search_mode::SPECIFIED_PROPERTIES: + search_results = SearchGivenProperties(index_name, search_query); + break; + case text_search_mode::REGEX: + search_results = RegexSearch(index_name, search_query); + break; + case text_search_mode::ALL_PROPERTIES: + search_results = SearchAllProperties(index_name, search_query); + break; + default: + throw query::TextSearchException( + "Unsupported search mode: please use one of text_search.search, text_search.search_all, or " + "text_search.regex_search."); + } + + std::vector<Gid> found_nodes; + for (const auto &doc : search_results.docs) { + // The CXX .data() method (https://cxx.rs/binding/string.html) may overestimate string length, causing JSON parsing + // errors downstream. We prevent this by resizing the converted string with the correctly-working .length() method. + std::string doc_string = doc.data.data(); + doc_string.resize(doc.data.length()); + auto doc_json = nlohmann::json::parse(doc_string); + found_nodes.push_back(storage::Gid::FromString(doc_json["metadata"]["gid"].dump())); + } + return found_nodes; +} + +std::string TextIndex::Aggregate(const std::string &index_name, const std::string &search_query, + const std::string &aggregation_query) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + if (!index_.contains(index_name)) { + throw query::TextSearchException("Text index \"{}\" doesn’t exist.", index_name); + } + + mgcxx::text_search::DocumentOutput aggregation_result; + try { + aggregation_result = mgcxx::text_search::aggregate( + index_.at(index_name).context_, + mgcxx::text_search::SearchInput{ + .search_fields = {"all"}, .search_query = search_query, .aggregation_query = aggregation_query}); + + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + // The CXX .data() method (https://cxx.rs/binding/string.html) may overestimate string length, causing JSON parsing + // errors downstream. We prevent this by resizing the converted string with the correctly-working .length() method. + std::string result_string = aggregation_result.data.data(); + result_string.resize(aggregation_result.data.length()); + return result_string; +} + +void TextIndex::Commit() { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + for (auto &[_, index_data] : index_) { + mgcxx::text_search::commit(index_data.context_); + } +} + +void TextIndex::Rollback() { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + for (auto &[_, index_data] : index_) { + mgcxx::text_search::rollback(index_data.context_); + } +} + +std::vector<std::pair<std::string, LabelId>> TextIndex::ListIndices() const { + std::vector<std::pair<std::string, LabelId>> ret; + ret.reserve(index_.size()); + for (const auto &[index_name, index_data] : index_) { + ret.emplace_back(index_name, index_data.scope_); + } + return ret; +} + +} // namespace memgraph::storage diff --git a/src/storage/v2/indices/text_index.hpp b/src/storage/v2/indices/text_index.hpp new file mode 100644 index 000000000..af4748c6e --- /dev/null +++ b/src/storage/v2/indices/text_index.hpp @@ -0,0 +1,105 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include <json/json.hpp> +#include "mg_procedure.h" +#include "storage/v2/id_types.hpp" +#include "storage/v2/name_id_mapper.hpp" +#include "storage/v2/vertex.hpp" +#include "text_search.hpp" + +namespace memgraph::query { +class DbAccessor; +} + +namespace memgraph::storage { +struct TextIndexData { + mgcxx::text_search::Context context_; + LabelId scope_; +}; + +class TextIndex { + private: + static constexpr bool kDoSkipCommit = true; + static constexpr std::string_view kTextIndicesDirectory = "text_indices"; + + inline std::string MakeIndexPath(const std::filesystem::path &storage_dir, const std::string &index_name); + + void CreateEmptyIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label); + + template <typename T> + nlohmann::json SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver); + + std::string StringifyProperties(const std::map<PropertyId, PropertyValue> &properties); + + std::vector<mgcxx::text_search::Context *> GetApplicableTextIndices(const std::vector<LabelId> &labels); + + void LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties, + const std::string &property_values_as_str, + const std::vector<mgcxx::text_search::Context *> &applicable_text_indices); + + void CommitLoadedNodes(mgcxx::text_search::Context &index_context); + + mgcxx::text_search::SearchOutput SearchGivenProperties(const std::string &index_name, + const std::string &search_query); + + mgcxx::text_search::SearchOutput RegexSearch(const std::string &index_name, const std::string &search_query); + + mgcxx::text_search::SearchOutput SearchAllProperties(const std::string &index_name, const std::string &search_query); + + public: + TextIndex() = default; + + TextIndex(const TextIndex &) = delete; + TextIndex(TextIndex &&) = delete; + TextIndex &operator=(const TextIndex &) = delete; + TextIndex &operator=(TextIndex &&) = delete; + + ~TextIndex() = default; + + std::map<std::string, TextIndexData> index_; + std::map<LabelId, std::string> label_to_index_; + + void AddNode( + Vertex *vertex, NameIdMapper *name_id_mapper, + const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices = std::nullopt); + + void UpdateNode(Vertex *vertex, NameIdMapper *name_id_mapper, const std::vector<LabelId> &removed_labels = {}); + + void RemoveNode( + Vertex *vertex, + const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices = std::nullopt); + + void CreateIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label, + memgraph::query::DbAccessor *db); + + void RecoverIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label, + memgraph::utils::SkipList<Vertex>::Accessor vertices, NameIdMapper *name_id_mapper); + + LabelId DropIndex(const std::filesystem::path &storage_dir, const std::string &index_name); + + bool IndexExists(const std::string &index_name) const; + + std::vector<Gid> Search(const std::string &index_name, const std::string &search_query, text_search_mode search_mode); + + std::string Aggregate(const std::string &index_name, const std::string &search_query, + const std::string &aggregation_query); + + void Commit(); + + void Rollback(); + + std::vector<std::pair<std::string, LabelId>> ListIndices() const; +}; + +} // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/storage.cpp b/src/storage/v2/inmemory/storage.cpp index dab56750b..dbaa56ce2 100644 --- a/src/storage/v2/inmemory/storage.cpp +++ b/src/storage/v2/inmemory/storage.cpp @@ -15,6 +15,8 @@ #include <functional> #include <optional> #include "dbms/constants.hpp" +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" #include "memory/global_memory_control.hpp" #include "storage/v2/durability/durability.hpp" #include "storage/v2/durability/snapshot.hpp" @@ -890,6 +892,10 @@ utils::BasicResult<StorageManipulationError, void> InMemoryStorage::InMemoryAcce commit_timestamp_.reset(); // We have aborted, hence we have not committed return StorageManipulationError{*unique_constraint_violation}; } + + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + mem_storage->indices_.text_index_.Commit(); + } } is_transaction_active_ = false; @@ -1213,6 +1219,9 @@ void InMemoryStorage::InMemoryAccessor::Abort() { for (auto const &[property, prop_vertices] : property_cleanup) { storage_->indices_.AbortEntries(property, prop_vertices, transaction_.start_timestamp); } + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + storage_->indices_.text_index_.Rollback(); + } // VERTICES { @@ -1846,6 +1855,7 @@ StorageInfo InMemoryStorage::GetInfo(memgraph::replication_coordination_glue::Re const auto &lbl = access->ListAllIndices(); info.label_indices = lbl.label.size(); info.label_property_indices = lbl.label_property.size(); + info.text_indices = lbl.text_indices.size(); const auto &con = access->ListAllConstraints(); info.existence_constraints = con.existence.size(); info.unique_constraints = con.unique.size(); @@ -2107,6 +2117,16 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final AppendToWalDataDefinition(durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_CLEAR, info.label, final_commit_timestamp); } break; + case MetadataDelta::Action::TEXT_INDEX_CREATE: { + const auto &info = md_delta.text_index; + AppendToWalDataDefinition(durability::StorageMetadataOperation::TEXT_INDEX_CREATE, info.index_name, info.label, + final_commit_timestamp); + } break; + case MetadataDelta::Action::TEXT_INDEX_DROP: { + const auto &info = md_delta.text_index; + AppendToWalDataDefinition(durability::StorageMetadataOperation::TEXT_INDEX_DROP, info.index_name, info.label, + final_commit_timestamp); + } break; case MetadataDelta::Action::EXISTENCE_CONSTRAINT_CREATE: { const auto &info = md_delta.label_property; AppendToWalDataDefinition(durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE, info.label, @@ -2137,11 +2157,13 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final return repl_storage_state_.FinalizeTransaction(final_commit_timestamp, this, std::move(db_acc)); } -void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, +void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, + const std::optional<std::string> text_index_name, LabelId label, const std::set<PropertyId> &properties, LabelIndexStats stats, LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp) { - wal_file_->AppendOperation(operation, label, properties, stats, property_stats, final_commit_timestamp); + wal_file_->AppendOperation(operation, text_index_name, label, properties, stats, property_stats, + final_commit_timestamp); repl_storage_state_.AppendOperation(operation, label, properties, stats, property_stats, final_commit_timestamp); } @@ -2155,12 +2177,13 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera const std::set<PropertyId> &properties, LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp) { - return AppendToWalDataDefinition(operation, label, properties, {}, property_stats, final_commit_timestamp); + return AppendToWalDataDefinition(operation, std::nullopt, label, properties, {}, property_stats, + final_commit_timestamp); } void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, LabelIndexStats stats, uint64_t final_commit_timestamp) { - return AppendToWalDataDefinition(operation, label, {}, stats, {}, final_commit_timestamp); + return AppendToWalDataDefinition(operation, std::nullopt, label, {}, stats, {}, final_commit_timestamp); } void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, @@ -2174,6 +2197,12 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera return AppendToWalDataDefinition(operation, label, {}, {}, final_commit_timestamp); } +void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, + const std::optional<std::string> text_index_name, LabelId label, + uint64_t final_commit_timestamp) { + return AppendToWalDataDefinition(operation, text_index_name, label, {}, {}, {}, final_commit_timestamp); +} + utils::BasicResult<InMemoryStorage::CreateSnapshotError> InMemoryStorage::CreateSnapshot( memgraph::replication_coordination_glue::ReplicationRole replication_role) { using memgraph::replication_coordination_glue::ReplicationRole; @@ -2301,7 +2330,9 @@ IndicesInfo InMemoryStorage::InMemoryAccessor::ListAllIndices() const { auto *mem_label_property_index = static_cast<InMemoryLabelPropertyIndex *>(in_memory->indices_.label_property_index_.get()); auto *mem_edge_type_index = static_cast<InMemoryEdgeTypeIndex *>(in_memory->indices_.edge_type_index_.get()); - return {mem_label_index->ListIndices(), mem_label_property_index->ListIndices(), mem_edge_type_index->ListIndices()}; + auto &text_index = storage_->indices_.text_index_; + return {mem_label_index->ListIndices(), mem_label_property_index->ListIndices(), mem_edge_type_index->ListIndices(), + text_index.ListIndices()}; } ConstraintsInfo InMemoryStorage::InMemoryAccessor::ListAllConstraints() const { const auto *mem_storage = static_cast<InMemoryStorage *>(storage_); diff --git a/src/storage/v2/inmemory/storage.hpp b/src/storage/v2/inmemory/storage.hpp index 6d10e0fbd..6532471f3 100644 --- a/src/storage/v2/inmemory/storage.hpp +++ b/src/storage/v2/inmemory/storage.hpp @@ -398,7 +398,7 @@ class InMemoryStorage final : public Storage { StorageInfo GetBaseInfo() override; StorageInfo GetInfo(memgraph::replication_coordination_glue::ReplicationRole replication_role) override; - /// Return true in all cases excepted if any sync replicas have not sent confirmation. + /// Return true in all cases except if any sync replicas have not sent confirmation. [[nodiscard]] bool AppendToWal(const Transaction &transaction, uint64_t final_commit_timestamp, DatabaseAccessProtector db_acc); void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, @@ -412,9 +412,13 @@ class InMemoryStorage final : public Storage { void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties, LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp); - void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, + void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, + const std::optional<std::string> text_index_name, LabelId label, const std::set<PropertyId> &properties, LabelIndexStats stats, LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp); + void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, + const std::optional<std::string> text_index_name, LabelId label, + uint64_t final_commit_timestamp); uint64_t CommitTimestamp(std::optional<uint64_t> desired_commit_timestamp = {}); diff --git a/src/storage/v2/metadata_delta.hpp b/src/storage/v2/metadata_delta.hpp index b34966a62..e4616161d 100644 --- a/src/storage/v2/metadata_delta.hpp +++ b/src/storage/v2/metadata_delta.hpp @@ -37,6 +37,8 @@ struct MetadataDelta { LABEL_PROPERTY_INDEX_STATS_CLEAR, EDGE_INDEX_CREATE, EDGE_INDEX_DROP, + TEXT_INDEX_CREATE, + TEXT_INDEX_DROP, EXISTENCE_CONSTRAINT_CREATE, EXISTENCE_CONSTRAINT_DROP, UNIQUE_CONSTRAINT_CREATE, @@ -63,6 +65,10 @@ struct MetadataDelta { } edge_index_create; static constexpr struct EdgeIndexDrop { } edge_index_drop; + static constexpr struct TextIndexCreate { + } text_index_create; + static constexpr struct TextIndexDrop { + } text_index_drop; static constexpr struct ExistenceConstraintCreate { } existence_constraint_create; static constexpr struct ExistenceConstraintDrop { @@ -98,6 +104,12 @@ struct MetadataDelta { MetadataDelta(EdgeIndexDrop /*tag*/, EdgeTypeId edge_type) : action(Action::EDGE_INDEX_DROP), edge_type(edge_type) {} + MetadataDelta(TextIndexCreate /*tag*/, std::string index_name, LabelId label) + : action(Action::TEXT_INDEX_CREATE), text_index{index_name, label} {} + + MetadataDelta(TextIndexDrop /*tag*/, std::string index_name, LabelId label) + : action(Action::TEXT_INDEX_DROP), text_index{index_name, label} {} + MetadataDelta(ExistenceConstraintCreate /*tag*/, LabelId label, PropertyId property) : action(Action::EXISTENCE_CONSTRAINT_CREATE), label_property{label, property} {} @@ -127,6 +139,8 @@ struct MetadataDelta { case Action::LABEL_PROPERTY_INDEX_STATS_CLEAR: case Action::EDGE_INDEX_CREATE: case Action::EDGE_INDEX_DROP: + case Action::TEXT_INDEX_CREATE: + case Action::TEXT_INDEX_DROP: case Action::EXISTENCE_CONSTRAINT_CREATE: case Action::EXISTENCE_CONSTRAINT_DROP: break; @@ -164,6 +178,11 @@ struct MetadataDelta { PropertyId property; LabelPropertyIndexStats stats; } label_property_stats; + + struct { + std::string index_name; + LabelId label; + } text_index; }; }; diff --git a/src/storage/v2/property_store.cpp b/src/storage/v2/property_store.cpp index adf3440a2..0cfee0f98 100644 --- a/src/storage/v2/property_store.cpp +++ b/src/storage/v2/property_store.cpp @@ -118,7 +118,7 @@ enum class Type : uint8_t { STRING = 0x50, LIST = 0x60, MAP = 0x70, - TEMPORAL_DATA = 0x80 + TEMPORAL_DATA = 0x80, }; const uint8_t kMaskType = 0xf0; diff --git a/src/storage/v2/property_value.hpp b/src/storage/v2/property_value.hpp index e48be008a..161ad151a 100644 --- a/src/storage/v2/property_value.hpp +++ b/src/storage/v2/property_value.hpp @@ -92,7 +92,28 @@ class PropertyValue { // TODO: Implement copy assignment operators for primitive types. // TODO: Implement copy and move assignment operators for non-primitive types. - ~PropertyValue() { DestroyValue(); } + ~PropertyValue() { + switch (type_) { + // destructor for primitive types does nothing + case Type::Null: + case Type::Bool: + case Type::Int: + case Type::Double: + case Type::TemporalData: + return; + + // destructor for non primitive types since we used placement new + case Type::String: + std::destroy_at(&string_v.val_); + return; + case Type::List: + std::destroy_at(&list_v.val_); + return; + case Type::Map: + std::destroy_at(&map_v.val_); + return; + } + } Type type() const { return type_; } @@ -189,8 +210,6 @@ class PropertyValue { } private: - void DestroyValue() noexcept; - // NOTE: this may look strange but it is for better data layout // https://eel.is/c++draft/class.union#general-note-1 union { @@ -357,13 +376,13 @@ inline PropertyValue::PropertyValue(const PropertyValue &other) : type_(other.ty this->double_v.val_ = other.double_v.val_; return; case Type::String: - new (&string_v.val_) std::string(other.string_v.val_); + std::construct_at(&string_v.val_, other.string_v.val_); return; case Type::List: - new (&list_v.val_) std::vector<PropertyValue>(other.list_v.val_); + std::construct_at(&list_v.val_, other.list_v.val_); return; case Type::Map: - new (&map_v.val_) std::map<std::string, PropertyValue>(other.map_v.val_); + std::construct_at(&map_v.val_, other.map_v.val_); return; case Type::TemporalData: this->temporal_data_v.val_ = other.temporal_data_v.val_; @@ -371,7 +390,7 @@ inline PropertyValue::PropertyValue(const PropertyValue &other) : type_(other.ty } } -inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(std::exchange(other.type_, Type::Null)) { +inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(other.type_) { switch (type_) { case Type::Null: break; @@ -386,15 +405,12 @@ inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(std: break; case Type::String: std::construct_at(&string_v.val_, std::move(other.string_v.val_)); - std::destroy_at(&other.string_v.val_); break; case Type::List: std::construct_at(&list_v.val_, std::move(other.list_v.val_)); - std::destroy_at(&other.list_v.val_); break; case Type::Map: std::construct_at(&map_v.val_, std::move(other.map_v.val_)); - std::destroy_at(&other.map_v.val_); break; case Type::TemporalData: temporal_data_v.val_ = other.temporal_data_v.val_; @@ -403,38 +419,88 @@ inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(std: } inline PropertyValue &PropertyValue::operator=(const PropertyValue &other) { - if (this == &other) return *this; + if (type_ == other.type_) { + if (this == &other) return *this; + switch (other.type_) { + case Type::Null: + break; + case Type::Bool: + bool_v.val_ = other.bool_v.val_; + break; + case Type::Int: + int_v.val_ = other.int_v.val_; + break; + case Type::Double: + double_v.val_ = other.double_v.val_; + break; + case Type::String: + string_v.val_ = other.string_v.val_; + break; + case Type::List: + list_v.val_ = other.list_v.val_; + break; + case Type::Map: + map_v.val_ = other.map_v.val_; + break; + case Type::TemporalData: + temporal_data_v.val_ = other.temporal_data_v.val_; + break; + } + return *this; + } else { + // destroy + switch (type_) { + case Type::Null: + break; + case Type::Bool: + break; + case Type::Int: + break; + case Type::Double: + break; + case Type::String: + std::destroy_at(&string_v.val_); + break; + case Type::List: + std::destroy_at(&list_v.val_); + break; + case Type::Map: + std::destroy_at(&map_v.val_); + break; + case Type::TemporalData: + break; + } + // construct + auto *new_this = std::launder(this); + switch (other.type_) { + case Type::Null: + break; + case Type::Bool: + new_this->bool_v.val_ = other.bool_v.val_; + break; + case Type::Int: + new_this->int_v.val_ = other.int_v.val_; + break; + case Type::Double: + new_this->double_v.val_ = other.double_v.val_; + break; + case Type::String: + std::construct_at(&new_this->string_v.val_, other.string_v.val_); + break; + case Type::List: + std::construct_at(&new_this->list_v.val_, other.list_v.val_); + break; + case Type::Map: + std::construct_at(&new_this->map_v.val_, other.map_v.val_); + break; + case Type::TemporalData: + new_this->temporal_data_v.val_ = other.temporal_data_v.val_; + break; + } - DestroyValue(); - type_ = other.type_; - - switch (other.type_) { - case Type::Null: - break; - case Type::Bool: - this->bool_v.val_ = other.bool_v.val_; - break; - case Type::Int: - this->int_v.val_ = other.int_v.val_; - break; - case Type::Double: - this->double_v.val_ = other.double_v.val_; - break; - case Type::String: - new (&string_v.val_) std::string(other.string_v.val_); - break; - case Type::List: - new (&list_v.val_) std::vector<PropertyValue>(other.list_v.val_); - break; - case Type::Map: - new (&map_v.val_) std::map<std::string, PropertyValue>(other.map_v.val_); - break; - case Type::TemporalData: - this->temporal_data_v.val_ = other.temporal_data_v.val_; - break; + new_this->type_ = other.type_; + return *new_this; } - - return *this; } inline PropertyValue &PropertyValue::operator=(PropertyValue &&other) noexcept { @@ -456,48 +522,71 @@ inline PropertyValue &PropertyValue::operator=(PropertyValue &&other) noexcept { break; case Type::String: string_v.val_ = std::move(other.string_v.val_); - std::destroy_at(&other.string_v.val_); break; case Type::List: list_v.val_ = std::move(other.list_v.val_); - std::destroy_at(&other.list_v.val_); break; case Type::Map: map_v.val_ = std::move(other.map_v.val_); - std::destroy_at(&other.map_v.val_); break; case Type::TemporalData: temporal_data_v.val_ = other.temporal_data_v.val_; break; } - other.type_ = Type::Null; return *this; } else { - std::destroy_at(this); - return *std::construct_at(std::launder(this), std::move(other)); - } -} + // destroy + switch (type_) { + case Type::Null: + break; + case Type::Bool: + break; + case Type::Int: + break; + case Type::Double: + break; + case Type::String: + std::destroy_at(&string_v.val_); + break; + case Type::List: + std::destroy_at(&list_v.val_); + break; + case Type::Map: + std::destroy_at(&map_v.val_); + break; + case Type::TemporalData: + break; + } + // construct (no need to destroy moved from type) + auto *new_this = std::launder(this); + switch (other.type_) { + case Type::Null: + break; + case Type::Bool: + new_this->bool_v.val_ = other.bool_v.val_; + break; + case Type::Int: + new_this->int_v.val_ = other.int_v.val_; + break; + case Type::Double: + new_this->double_v.val_ = other.double_v.val_; + break; + case Type::String: + std::construct_at(&new_this->string_v.val_, std::move(other.string_v.val_)); + break; + case Type::List: + std::construct_at(&new_this->list_v.val_, std::move(other.list_v.val_)); + break; + case Type::Map: + std::construct_at(&new_this->map_v.val_, std::move(other.map_v.val_)); + break; + case Type::TemporalData: + new_this->temporal_data_v.val_ = other.temporal_data_v.val_; + break; + } -inline void PropertyValue::DestroyValue() noexcept { - switch (std::exchange(type_, Type::Null)) { - // destructor for primitive types does nothing - case Type::Null: - case Type::Bool: - case Type::Int: - case Type::Double: - case Type::TemporalData: - return; - - // destructor for non primitive types since we used placement new - case Type::String: - std::destroy_at(&string_v.val_); - return; - case Type::List: - std::destroy_at(&list_v.val_); - return; - case Type::Map: - std::destroy_at(&map_v.val_); - return; + new_this->type_ = other.type_; + return *new_this; } } diff --git a/src/storage/v2/replication/replication_client.cpp b/src/storage/v2/replication/replication_client.cpp index 3c1081206..ee1394fdb 100644 --- a/src/storage/v2/replication/replication_client.cpp +++ b/src/storage/v2/replication/replication_client.cpp @@ -13,6 +13,7 @@ #include "replication/replication_client.hpp" #include "storage/v2/inmemory/storage.hpp" +#include "storage/v2/replication/enums.hpp" #include "storage/v2/storage.hpp" #include "utils/exceptions.hpp" #include "utils/on_scope_exit.hpp" @@ -247,11 +248,13 @@ bool ReplicationStorageClient::FinalizeTransactionReplication(Storage *storage, MG_ASSERT(replica_stream_, "Missing stream for transaction deltas"); try { auto response = replica_stream_->Finalize(); - return replica_state_.WithLock([storage, &response, db_acc = std::move(db_acc), this](auto &state) mutable { + // NOLINTNEXTLINE + return replica_state_.WithLock([storage, response, db_acc = std::move(db_acc), this](auto &state) mutable { replica_stream_.reset(); if (!response.success || state == replication::ReplicaState::RECOVERY) { state = replication::ReplicaState::RECOVERY; - client_.thread_pool_.AddTask([storage, &response, db_acc = std::move(db_acc), this] { + // NOLINTNEXTLINE + client_.thread_pool_.AddTask([storage, response, db_acc = std::move(db_acc), this] { this->RecoverReplica(response.current_commit_timestamp, storage); }); return false; @@ -403,8 +406,9 @@ void ReplicaStream::AppendOperation(durability::StorageMetadataOperation operati const std::set<PropertyId> &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp) { replication::Encoder encoder(stream_.GetBuilder()); - EncodeOperation(&encoder, storage_->name_id_mapper_.get(), operation, label, properties, stats, property_stats, - timestamp); + // NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2) -> text index name not sent here + EncodeOperation(&encoder, storage_->name_id_mapper_.get(), operation, std::nullopt, label, properties, stats, + property_stats, timestamp); } void ReplicaStream::AppendOperation(durability::StorageMetadataOperation operation, EdgeTypeId edge_type, diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index 536a504a0..db4bec8be 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -13,6 +13,8 @@ #include "absl/container/flat_hash_set.h" #include "spdlog/spdlog.h" +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" #include "storage/v2/disk/name_id_mapper.hpp" #include "storage/v2/storage.hpp" #include "storage/v2/transaction.hpp" @@ -273,6 +275,12 @@ Storage::Accessor::DetachDelete(std::vector<VertexAccessor *> nodes, std::vector return maybe_deleted_vertices.GetError(); } + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + for (auto *node : nodes_to_delete) { + storage_->indices_.text_index_.RemoveNode(node); + } + } + auto deleted_vertices = maybe_deleted_vertices.GetValue(); return std::make_optional<ReturnType>(std::move(deleted_vertices), std::move(deleted_edges)); @@ -543,4 +551,19 @@ void Storage::Accessor::MarkEdgeAsDeleted(Edge *edge) { } } +void Storage::Accessor::CreateTextIndex(const std::string &index_name, LabelId label, query::DbAccessor *db) { + MG_ASSERT(unique_guard_.owns_lock(), "Creating a text index requires unique access to storage!"); + storage_->indices_.text_index_.CreateIndex(storage_->config_.durability.storage_directory, index_name, label, db); + transaction_.md_deltas.emplace_back(MetadataDelta::text_index_create, index_name, label); + memgraph::metrics::IncrementCounter(memgraph::metrics::ActiveTextIndices); +} + +void Storage::Accessor::DropTextIndex(const std::string &index_name) { + MG_ASSERT(unique_guard_.owns_lock(), "Dropping a text index requires unique access to storage!"); + auto deleted_index_label = + storage_->indices_.text_index_.DropIndex(storage_->config_.durability.storage_directory, index_name); + transaction_.md_deltas.emplace_back(MetadataDelta::text_index_drop, index_name, deleted_index_label); + memgraph::metrics::DecrementCounter(memgraph::metrics::ActiveTextIndices); +} + } // namespace memgraph::storage diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 58936bd56..a4436b1b7 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -20,6 +20,7 @@ #include "io/network/endpoint.hpp" #include "kvstore/kvstore.hpp" +#include "mg_procedure.h" #include "query/exceptions.hpp" #include "replication/config.hpp" #include "replication/replication_server.hpp" @@ -53,6 +54,7 @@ extern const Event SnapshotCreationLatency_us; extern const Event ActiveLabelIndices; extern const Event ActiveLabelPropertyIndices; +extern const Event ActiveTextIndices; } // namespace memgraph::metrics namespace memgraph::storage { @@ -63,6 +65,7 @@ struct IndicesInfo { std::vector<LabelId> label; std::vector<std::pair<LabelId, PropertyId>> label_property; std::vector<EdgeTypeId> edge_type; + std::vector<std::pair<std::string, LabelId>> text_indices; }; struct ConstraintsInfo { @@ -78,6 +81,7 @@ struct StorageInfo { uint64_t disk_usage; uint64_t label_indices; uint64_t label_property_indices; + uint64_t text_indices; uint64_t existence_constraints; uint64_t unique_constraints; StorageMode storage_mode; @@ -95,6 +99,7 @@ static inline nlohmann::json ToJson(const StorageInfo &info) { res["disk"] = info.disk_usage; res["label_indices"] = info.label_indices; res["label_prop_indices"] = info.label_property_indices; + res["text_indices"] = info.text_indices; res["existence_constraints"] = info.existence_constraints; res["unique_constraints"] = info.unique_constraints; res["storage_mode"] = storage::StorageModeToString(info.storage_mode); @@ -232,6 +237,28 @@ class Storage { virtual bool EdgeTypeIndexExists(EdgeTypeId edge_type) const = 0; + bool TextIndexExists(const std::string &index_name) const { + return storage_->indices_.text_index_.IndexExists(index_name); + } + + void TextIndexAddVertex(const VertexAccessor &vertex) { + storage_->indices_.text_index_.AddNode(vertex.vertex_, storage_->name_id_mapper_.get()); + } + + void TextIndexUpdateVertex(const VertexAccessor &vertex, const std::vector<LabelId> &removed_labels = {}) { + storage_->indices_.text_index_.UpdateNode(vertex.vertex_, storage_->name_id_mapper_.get(), removed_labels); + } + + std::vector<Gid> TextIndexSearch(const std::string &index_name, const std::string &search_query, + text_search_mode search_mode) const { + return storage_->indices_.text_index_.Search(index_name, search_query, search_mode); + } + + std::string TextIndexAggregate(const std::string &index_name, const std::string &search_query, + const std::string &aggregation_query) const { + return storage_->indices_.text_index_.Aggregate(index_name, search_query, aggregation_query); + } + virtual IndicesInfo ListAllIndices() const = 0; virtual ConstraintsInfo ListAllConstraints() const = 0; @@ -284,6 +311,10 @@ class Storage { virtual utils::BasicResult<StorageIndexDefinitionError, void> DropIndex(EdgeTypeId edge_type) = 0; + void CreateTextIndex(const std::string &index_name, LabelId label, query::DbAccessor *db); + + void DropTextIndex(const std::string &index_name); + virtual utils::BasicResult<StorageExistenceConstraintDefinitionError, void> CreateExistenceConstraint( LabelId label, PropertyId property) = 0; diff --git a/src/storage/v2/vertex_accessor.cpp b/src/storage/v2/vertex_accessor.cpp index 7d78070a8..83dcc003b 100644 --- a/src/storage/v2/vertex_accessor.cpp +++ b/src/storage/v2/vertex_accessor.cpp @@ -261,20 +261,31 @@ Result<PropertyValue> VertexAccessor::SetProperty(PropertyId property, const Pro if (vertex_->deleted) return Error::DELETED_OBJECT; - auto current_value = vertex_->properties.GetProperty(property); - // We could skip setting the value if the previous one is the same to the new - // one. This would save some memory as a delta would not be created as well as - // avoid copying the value. The reason we are not doing that is because the - // current code always follows the logical pattern of "create a delta" and - // "modify in-place". Additionally, the created delta will make other - // transactions get a SERIALIZATION_ERROR. - + PropertyValue current_value; + const bool skip_duplicate_write = !storage_->config_.salient.items.delta_on_identical_property_update; utils::AtomicMemoryBlock atomic_memory_block{ - [transaction = transaction_, vertex = vertex_, &value, &property, ¤t_value]() { + [transaction = transaction_, vertex = vertex_, &value, &property, ¤t_value, skip_duplicate_write]() { + current_value = vertex->properties.GetProperty(property); + // We could skip setting the value if the previous one is the same to the new + // one. This would save some memory as a delta would not be created as well as + // avoid copying the value. The reason we are not doing that is because the + // current code always follows the logical pattern of "create a delta" and + // "modify in-place". Additionally, the created delta will make other + // transactions get a SERIALIZATION_ERROR. + if (skip_duplicate_write && current_value == value) { + return true; + } + CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), property, current_value); vertex->properties.SetProperty(property, value); + + return false; }}; - std::invoke(atomic_memory_block); + const bool early_exit = std::invoke(atomic_memory_block); + + if (early_exit) { + return std::move(current_value); + } if (transaction_->constraint_verification_info) { if (!value.IsNull()) { @@ -339,27 +350,29 @@ Result<std::vector<std::tuple<PropertyId, PropertyValue, PropertyValue>>> Vertex if (vertex_->deleted) return Error::DELETED_OBJECT; + const bool skip_duplicate_update = storage_->config_.salient.items.delta_on_identical_property_update; using ReturnType = decltype(vertex_->properties.UpdateProperties(properties)); std::optional<ReturnType> id_old_new_change; - utils::AtomicMemoryBlock atomic_memory_block{ - [storage = storage_, transaction = transaction_, vertex = vertex_, &properties, &id_old_new_change]() { - id_old_new_change.emplace(vertex->properties.UpdateProperties(properties)); - if (!id_old_new_change.has_value()) { - return; + utils::AtomicMemoryBlock atomic_memory_block{[storage = storage_, transaction = transaction_, vertex = vertex_, + &properties, &id_old_new_change, skip_duplicate_update]() { + id_old_new_change.emplace(vertex->properties.UpdateProperties(properties)); + if (!id_old_new_change.has_value()) { + return; + } + for (auto &[id, old_value, new_value] : *id_old_new_change) { + storage->indices_.UpdateOnSetProperty(id, new_value, vertex, *transaction); + if (skip_duplicate_update && old_value == new_value) continue; + CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), id, std::move(old_value)); + transaction->manyDeltasCache.Invalidate(vertex, id); + if (transaction->constraint_verification_info) { + if (!new_value.IsNull()) { + transaction->constraint_verification_info->AddedProperty(vertex); + } else { + transaction->constraint_verification_info->RemovedProperty(vertex); } - for (auto &[id, old_value, new_value] : *id_old_new_change) { - storage->indices_.UpdateOnSetProperty(id, new_value, vertex, *transaction); - CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), id, std::move(old_value)); - transaction->manyDeltasCache.Invalidate(vertex, id); - if (transaction->constraint_verification_info) { - if (!new_value.IsNull()) { - transaction->constraint_verification_info->AddedProperty(vertex); - } else { - transaction->constraint_verification_info->RemovedProperty(vertex); - } - } - } - }}; + } + } + }}; std::invoke(atomic_memory_block); return id_old_new_change.has_value() ? std::move(id_old_new_change.value()) : ReturnType{}; diff --git a/src/utils/atomic_memory_block.hpp b/src/utils/atomic_memory_block.hpp index 31a3cf3a9..5ae2aab35 100644 --- a/src/utils/atomic_memory_block.hpp +++ b/src/utils/atomic_memory_block.hpp @@ -29,12 +29,10 @@ class [[nodiscard]] AtomicMemoryBlock { AtomicMemoryBlock &operator=(AtomicMemoryBlock &&) = delete; ~AtomicMemoryBlock() = default; - void operator()() { - { - utils::MemoryTracker::OutOfMemoryExceptionBlocker oom_blocker; - function_(); - } - total_memory_tracker.DoCheck(); + auto operator()() -> std::invoke_result_t<Callable> { + auto check_on_exit = OnScopeExit{[&] { total_memory_tracker.DoCheck(); }}; + utils::MemoryTracker::OutOfMemoryExceptionBlocker oom_blocker; + return function_(); } private: diff --git a/src/utils/event_counter.cpp b/src/utils/event_counter.cpp index da396ff56..f457a6762 100644 --- a/src/utils/event_counter.cpp +++ b/src/utils/event_counter.cpp @@ -61,6 +61,7 @@ \ M(ActiveLabelIndices, Index, "Number of active label indices in the system.") \ M(ActiveLabelPropertyIndices, Index, "Number of active label property indices in the system.") \ + M(ActiveTextIndices, Index, "Number of active text indices in the system.") \ \ M(StreamsCreated, Stream, "Number of Streams created.") \ M(MessagesConsumed, Stream, "Number of consumed streamed messages.") \ diff --git a/src/utils/memory.cpp b/src/utils/memory.cpp index d09f70fc3..6b1f26c11 100644 --- a/src/utils/memory.cpp +++ b/src/utils/memory.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -150,128 +150,133 @@ void *MonotonicBufferResource::DoAllocate(size_t bytes, size_t alignment) { namespace impl { -Pool::Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *memory) - : blocks_per_chunk_(blocks_per_chunk), block_size_(block_size), chunks_(memory) {} - -Pool::~Pool() { MG_ASSERT(chunks_.empty(), "You need to call Release before destruction!"); } - -void *Pool::Allocate() { - auto allocate_block_from_chunk = [this](Chunk *chunk) { - unsigned char *available_block = chunk->data + (chunk->first_available_block_ix * block_size_); - // Update free-list pointer (index in our case) by reading "next" from the - // available_block. - chunk->first_available_block_ix = *available_block; - --chunk->blocks_available; - return available_block; - }; - if (last_alloc_chunk_ && last_alloc_chunk_->blocks_available > 0U) - return allocate_block_from_chunk(last_alloc_chunk_); - // Find a Chunk with available memory. - for (auto &chunk : chunks_) { - if (chunk.blocks_available > 0U) { - last_alloc_chunk_ = &chunk; - return allocate_block_from_chunk(last_alloc_chunk_); - } - } - // We haven't found a Chunk with available memory, so allocate a new one. - if (block_size_ > std::numeric_limits<size_t>::max() / blocks_per_chunk_) throw BadAlloc("Allocation size overflow"); - size_t data_size = blocks_per_chunk_ * block_size_; +Pool::Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *chunk_memory) + : blocks_per_chunk_(blocks_per_chunk), block_size_(block_size), chunks_(chunk_memory) { // Use the next pow2 of block_size_ as alignment, so that we cover alignment // requests between 1 and block_size_. Users of this class should make sure // that requested alignment of particular blocks is never greater than the // block itself. - size_t alignment = Ceil2(block_size_); - if (alignment < block_size_) throw BadAlloc("Allocation alignment overflow"); - auto *data = reinterpret_cast<unsigned char *>(GetUpstreamResource()->Allocate(data_size, alignment)); - // Form a free-list of blocks in data. - for (unsigned char i = 0U; i < blocks_per_chunk_; ++i) { - *(data + (i * block_size_)) = i + 1U; - } - Chunk chunk{data, 0, blocks_per_chunk_}; - // Insert the big block in the sorted position. - auto it = std::lower_bound(chunks_.begin(), chunks_.end(), chunk, - [](const auto &a, const auto &b) { return a.data < b.data; }); - try { - it = chunks_.insert(it, chunk); - } catch (...) { - GetUpstreamResource()->Deallocate(data, data_size, alignment); - throw; - } + if (block_size_ > std::numeric_limits<size_t>::max() / blocks_per_chunk_) throw BadAlloc("Allocation size overflow"); +} - last_alloc_chunk_ = &*it; - last_dealloc_chunk_ = &*it; - return allocate_block_from_chunk(last_alloc_chunk_); +Pool::~Pool() { + if (!chunks_.empty()) { + auto *resource = GetUpstreamResource(); + auto const dataSize = blocks_per_chunk_ * block_size_; + auto const alignment = Ceil2(block_size_); + for (auto &chunk : chunks_) { + resource->Deallocate(chunk.raw_data, dataSize, alignment); + } + chunks_.clear(); + } + free_list_ = nullptr; +} + +void *Pool::Allocate() { + if (!free_list_) [[unlikely]] { + // need new chunk + auto const data_size = blocks_per_chunk_ * block_size_; + auto const alignment = Ceil2(block_size_); + auto *resource = GetUpstreamResource(); + auto *data = reinterpret_cast<std::byte *>(resource->Allocate(data_size, alignment)); + try { + auto &new_chunk = chunks_.emplace_front(data); + free_list_ = new_chunk.build_freelist(block_size_, blocks_per_chunk_); + } catch (...) { + resource->Deallocate(data, data_size, alignment); + throw; + } + } + return std::exchange(free_list_, *reinterpret_cast<std::byte **>(free_list_)); } void Pool::Deallocate(void *p) { - MG_ASSERT(last_dealloc_chunk_, "No chunk to deallocate"); - MG_ASSERT(!chunks_.empty(), - "Expected a call to Deallocate after at least a " - "single Allocate has been done."); - auto is_in_chunk = [this, p](const Chunk &chunk) { - auto ptr = reinterpret_cast<uintptr_t>(p); - size_t data_size = blocks_per_chunk_ * block_size_; - return reinterpret_cast<uintptr_t>(chunk.data) <= ptr && ptr < reinterpret_cast<uintptr_t>(chunk.data + data_size); - }; - auto deallocate_block_from_chunk = [this, p](Chunk *chunk) { - // NOTE: This check is not enough to cover all double-free issues. - MG_ASSERT(chunk->blocks_available < blocks_per_chunk_, - "Deallocating more blocks than a chunk can contain, possibly a " - "double-free situation or we have a bug in the allocator."); - // Link the block into the free-list - auto *block = reinterpret_cast<unsigned char *>(p); - *block = chunk->first_available_block_ix; - chunk->first_available_block_ix = (block - chunk->data) / block_size_; - chunk->blocks_available++; - }; - if (is_in_chunk(*last_dealloc_chunk_)) { - deallocate_block_from_chunk(last_dealloc_chunk_); - return; - } - - // Find the chunk which served this allocation - Chunk chunk{reinterpret_cast<unsigned char *>(p) - blocks_per_chunk_ * block_size_, 0, 0}; - auto it = std::lower_bound(chunks_.begin(), chunks_.end(), chunk, - [](const auto &a, const auto &b) { return a.data <= b.data; }); - MG_ASSERT(it != chunks_.end(), "Failed deallocation in utils::Pool"); - MG_ASSERT(is_in_chunk(*it), "Failed deallocation in utils::Pool"); - - // Update last_alloc_chunk_ as well because it now has a free block. - // Additionally this corresponds with C++ pattern of allocations and - // deallocations being done in reverse order. - last_alloc_chunk_ = &*it; - last_dealloc_chunk_ = &*it; - deallocate_block_from_chunk(last_dealloc_chunk_); - // TODO: We could release the Chunk to upstream memory -} - -void Pool::Release() { - for (auto &chunk : chunks_) { - size_t data_size = blocks_per_chunk_ * block_size_; - size_t alignment = Ceil2(block_size_); - GetUpstreamResource()->Deallocate(chunk.data, data_size, alignment); - } - chunks_.clear(); - last_alloc_chunk_ = nullptr; - last_dealloc_chunk_ = nullptr; + *reinterpret_cast<std::byte **>(p) = std::exchange(free_list_, reinterpret_cast<std::byte *>(p)); } } // namespace impl -PoolResource::PoolResource(size_t max_blocks_per_chunk, size_t max_block_size, MemoryResource *memory_pools, - MemoryResource *memory_unpooled) - : pools_(memory_pools), - unpooled_(memory_unpooled), - max_blocks_per_chunk_(std::min(max_blocks_per_chunk, static_cast<size_t>(impl::Pool::MaxBlocksInChunk()))), - max_block_size_(max_block_size) { - MG_ASSERT(max_blocks_per_chunk_ > 0U, "Invalid number of blocks per chunk"); - MG_ASSERT(max_block_size_ > 0U, "Invalid size of block"); +struct NullMemoryResourceImpl final : public MemoryResource { + NullMemoryResourceImpl() = default; + NullMemoryResourceImpl(NullMemoryResourceImpl const &) = default; + NullMemoryResourceImpl &operator=(NullMemoryResourceImpl const &) = default; + NullMemoryResourceImpl(NullMemoryResourceImpl &&) = default; + NullMemoryResourceImpl &operator=(NullMemoryResourceImpl &&) = default; + ~NullMemoryResourceImpl() override = default; + + private: + void *DoAllocate(size_t /*bytes*/, size_t /*alignment*/) override { + throw BadAlloc{"NullMemoryResource doesn't allocate"}; + } + void DoDeallocate(void * /*p*/, size_t /*bytes*/, size_t /*alignment*/) override { + throw BadAlloc{"NullMemoryResource doesn't deallocate"}; + } + bool DoIsEqual(MemoryResource const &other) const noexcept override { + return dynamic_cast<NullMemoryResourceImpl const *>(&other) != nullptr; + } +}; + +MemoryResource *NullMemoryResource() noexcept { + static auto res = NullMemoryResourceImpl{}; + return &res; } +namespace impl { + +/// 1 bit sensitivity test +static_assert(bin_index<1>(9U) == 0); +static_assert(bin_index<1>(10U) == 0); +static_assert(bin_index<1>(11U) == 0); +static_assert(bin_index<1>(12U) == 0); +static_assert(bin_index<1>(13U) == 0); +static_assert(bin_index<1>(14U) == 0); +static_assert(bin_index<1>(15U) == 0); +static_assert(bin_index<1>(16U) == 0); + +static_assert(bin_index<1>(17U) == 1); +static_assert(bin_index<1>(18U) == 1); +static_assert(bin_index<1>(19U) == 1); +static_assert(bin_index<1>(20U) == 1); +static_assert(bin_index<1>(21U) == 1); +static_assert(bin_index<1>(22U) == 1); +static_assert(bin_index<1>(23U) == 1); +static_assert(bin_index<1>(24U) == 1); +static_assert(bin_index<1>(25U) == 1); +static_assert(bin_index<1>(26U) == 1); +static_assert(bin_index<1>(27U) == 1); +static_assert(bin_index<1>(28U) == 1); +static_assert(bin_index<1>(29U) == 1); +static_assert(bin_index<1>(30U) == 1); +static_assert(bin_index<1>(31U) == 1); +static_assert(bin_index<1>(32U) == 1); + +/// 2 bit sensitivity test + +static_assert(bin_index<2>(9U) == 0); +static_assert(bin_index<2>(10U) == 0); +static_assert(bin_index<2>(11U) == 0); +static_assert(bin_index<2>(12U) == 0); + +static_assert(bin_index<2>(13U) == 1); +static_assert(bin_index<2>(14U) == 1); +static_assert(bin_index<2>(15U) == 1); +static_assert(bin_index<2>(16U) == 1); + +static_assert(bin_index<2>(17U) == 2); +static_assert(bin_index<2>(18U) == 2); +static_assert(bin_index<2>(19U) == 2); +static_assert(bin_index<2>(20U) == 2); +static_assert(bin_index<2>(21U) == 2); +static_assert(bin_index<2>(22U) == 2); +static_assert(bin_index<2>(23U) == 2); +static_assert(bin_index<2>(24U) == 2); + +} // namespace impl + void *PoolResource::DoAllocate(size_t bytes, size_t alignment) { // Take the max of `bytes` and `alignment` so that we simplify handling // alignment requests. - size_t block_size = std::max(bytes, alignment); + size_t block_size = std::max({bytes, alignment, 1UL}); // Check that we have received a regular allocation request with non-padded // structs/classes in play. These will always have // `sizeof(T) % alignof(T) == 0`. Special requests which don't have that @@ -279,80 +284,36 @@ void *PoolResource::DoAllocate(size_t bytes, size_t alignment) { // have to write a general-purpose allocator which has to behave as complex // as malloc/free. if (block_size % alignment != 0) throw BadAlloc("Requested bytes must be a multiple of alignment"); - if (block_size > max_block_size_) { - // Allocate a big block. - BigBlock big_block{bytes, alignment, GetUpstreamResourceBlocks()->Allocate(bytes, alignment)}; - // Insert the big block in the sorted position. - auto it = std::lower_bound(unpooled_.begin(), unpooled_.end(), big_block, - [](const auto &a, const auto &b) { return a.data < b.data; }); - try { - unpooled_.insert(it, big_block); - } catch (...) { - GetUpstreamResourceBlocks()->Deallocate(big_block.data, bytes, alignment); - throw; - } - return big_block.data; - } - // Allocate a regular block, first check if last_alloc_pool_ is suitable. - if (last_alloc_pool_ && last_alloc_pool_->GetBlockSize() == block_size) { - return last_alloc_pool_->Allocate(); - } - // Find the pool with greater or equal block_size. - impl::Pool pool(block_size, max_blocks_per_chunk_, GetUpstreamResource()); - auto it = std::lower_bound(pools_.begin(), pools_.end(), pool, - [](const auto &a, const auto &b) { return a.GetBlockSize() < b.GetBlockSize(); }); - if (it != pools_.end() && it->GetBlockSize() == block_size) { - last_alloc_pool_ = &*it; - last_dealloc_pool_ = &*it; - return it->Allocate(); - } - // We don't have a pool for this block_size, so insert it in the sorted - // position. - it = pools_.emplace(it, std::move(pool)); - last_alloc_pool_ = &*it; - last_dealloc_pool_ = &*it; - return it->Allocate(); -} + if (block_size <= 64) { + return mini_pools_[(block_size - 1UL) / 8UL].Allocate(); + } + if (block_size <= 128) { + return pools_3bit_.allocate(block_size); + } + if (block_size <= 512) { + return pools_4bit_.allocate(block_size); + } + if (block_size <= 1024) { + return pools_5bit_.allocate(block_size); + } + return unpooled_memory_->Allocate(bytes, alignment); +} void PoolResource::DoDeallocate(void *p, size_t bytes, size_t alignment) { - size_t block_size = std::max(bytes, alignment); - MG_ASSERT(block_size % alignment == 0, - "PoolResource shouldn't serve allocation requests where bytes aren't " - "a multiple of alignment"); - if (block_size > max_block_size_) { - // Deallocate a big block. - BigBlock big_block{bytes, alignment, p}; - auto it = std::lower_bound(unpooled_.begin(), unpooled_.end(), big_block, - [](const auto &a, const auto &b) { return a.data < b.data; }); - MG_ASSERT(it != unpooled_.end(), "Failed deallocation"); - MG_ASSERT(it->data == p && it->bytes == bytes && it->alignment == alignment, "Failed deallocation"); - unpooled_.erase(it); - GetUpstreamResourceBlocks()->Deallocate(p, bytes, alignment); - return; + size_t block_size = std::max({bytes, alignment, 1UL}); + DMG_ASSERT(block_size % alignment == 0); + + if (block_size <= 64) { + mini_pools_[(block_size - 1UL) / 8UL].Deallocate(p); + } else if (block_size <= 128) { + pools_3bit_.deallocate(p, block_size); + } else if (block_size <= 512) { + pools_4bit_.deallocate(p, block_size); + } else if (block_size <= 1024) { + pools_5bit_.deallocate(p, block_size); + } else { + unpooled_memory_->Deallocate(p, bytes, alignment); } - // Deallocate a regular block, first check if last_dealloc_pool_ is suitable. - if (last_dealloc_pool_ && last_dealloc_pool_->GetBlockSize() == block_size) return last_dealloc_pool_->Deallocate(p); - // Find the pool with equal block_size. - impl::Pool pool(block_size, max_blocks_per_chunk_, GetUpstreamResource()); - auto it = std::lower_bound(pools_.begin(), pools_.end(), pool, - [](const auto &a, const auto &b) { return a.GetBlockSize() < b.GetBlockSize(); }); - MG_ASSERT(it != pools_.end(), "Failed deallocation"); - MG_ASSERT(it->GetBlockSize() == block_size, "Failed deallocation"); - last_alloc_pool_ = &*it; - last_dealloc_pool_ = &*it; - return it->Deallocate(p); } - -void PoolResource::Release() { - for (auto &pool : pools_) pool.Release(); - pools_.clear(); - for (auto &big_block : unpooled_) - GetUpstreamResourceBlocks()->Deallocate(big_block.data, big_block.bytes, big_block.alignment); - unpooled_.clear(); - last_alloc_pool_ = nullptr; - last_dealloc_pool_ = nullptr; -} - -// PoolResource END - +bool PoolResource::DoIsEqual(MemoryResource const &other) const noexcept { return this == &other; } } // namespace memgraph::utils diff --git a/src/utils/memory.hpp b/src/utils/memory.hpp index 225a3b6a1..8ff6c3523 100644 --- a/src/utils/memory.hpp +++ b/src/utils/memory.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -15,7 +15,11 @@ #pragma once +#include <climits> #include <cstddef> +#include <cstdint> +#include <forward_list> +#include <list> #include <memory> #include <mutex> #include <new> @@ -248,6 +252,8 @@ bool operator!=(const Allocator<T> &a, const Allocator<U> &b) { return !(a == b); } +auto NullMemoryResource() noexcept -> MemoryResource *; + /// Wraps std::pmr::memory_resource for use with out MemoryResource class StdMemoryResource final : public MemoryResource { public: @@ -380,37 +386,45 @@ class MonotonicBufferResource final : public MemoryResource { namespace impl { +template <class T> +using AList = std::forward_list<T, Allocator<T>>; + template <class T> using AVector = std::vector<T, Allocator<T>>; /// Holds a number of Chunks each serving blocks of particular size. When a -/// Chunk runs out of available blocks, a new Chunk is allocated. The naming is -/// taken from `libstdc++` implementation, but the implementation details are -/// more similar to `FixedAllocator` described in "Small Object Allocation" from -/// "Modern C++ Design". +/// Chunk runs out of available blocks, a new Chunk is allocated. class Pool final { /// Holds a pointer into a chunk of memory which consists of equal sized - /// blocks. Each Chunk can handle `std::numeric_limits<unsigned char>::max()` - /// number of blocks. Blocks form a "free-list", where each unused block has - /// an embedded index to the next unused block. + /// blocks. Blocks form a "free-list" struct Chunk { - unsigned char *data; - unsigned char first_available_block_ix; - unsigned char blocks_available; + // TODO: make blocks_per_chunk a per chunk thing (ie. allow chunk growth) + std::byte *raw_data; + explicit Chunk(std::byte *rawData) : raw_data(rawData) {} + std::byte *build_freelist(std::size_t block_size, std::size_t blocks_in_chunk) { + auto current = raw_data; + std::byte *prev = nullptr; + auto end = current + (blocks_in_chunk * block_size); + while (current != end) { + std::byte **list_entry = reinterpret_cast<std::byte **>(current); + *list_entry = std::exchange(prev, current); + current += block_size; + } + DMG_ASSERT(prev != nullptr); + return prev; + } }; - unsigned char blocks_per_chunk_; - size_t block_size_; - AVector<Chunk> chunks_; - Chunk *last_alloc_chunk_{nullptr}; - Chunk *last_dealloc_chunk_{nullptr}; + std::byte *free_list_{nullptr}; + uint8_t blocks_per_chunk_{}; + std::size_t block_size_{}; + + AList<Chunk> chunks_; // TODO: do ourself so we can do fast Release (detect monotonic, do nothing) public: - static constexpr auto MaxBlocksInChunk() { - return std::numeric_limits<decltype(Chunk::first_available_block_ix)>::max(); - } + static constexpr auto MaxBlocksInChunk = std::numeric_limits<decltype(blocks_per_chunk_)>::max(); - Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *memory); + Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *chunk_memory); Pool(const Pool &) = delete; Pool &operator=(const Pool &) = delete; @@ -430,8 +444,147 @@ class Pool final { void *Allocate(); void Deallocate(void *p); +}; - void Release(); +// C++ overloads for clz +constexpr auto clz(unsigned int x) { return __builtin_clz(x); } +constexpr auto clz(unsigned long x) { return __builtin_clzl(x); } +constexpr auto clz(unsigned long long x) { return __builtin_clzll(x); } + +template <typename T> +constexpr auto bits_sizeof = sizeof(T) * CHAR_BIT; + +/// 0-based bit index of the most significant bit assumed that `n` != 0 +template <typename T> +constexpr auto msb_index(T n) { + return bits_sizeof<T> - clz(n) - T(1); +} + +/* This function will in O(1) time provide a bin index based on: + * B - the number of most significant bits to be sensitive to + * LB - the value that should be considered below the consideration for bin index of 0 (LB is exclusive) + * + * lets say we were: + * - sensitive to two bits (B == 2) + * - lowest bin is for 8 (LB == 8) + * + * our bin indexes would look like: + * 0 - 0000'1100 12 + * 1 - 0001'0000 16 + * 2 - 0001'1000 24 + * 3 - 0010'0000 32 + * 4 - 0011'0000 48 + * 5 - 0100'0000 64 + * 6 - 0110'0000 96 + * 7 - 1000'0000 128 + * 8 - 1100'0000 192 + * ... + * + * Example: + * Given n == 70, we want to return the bin index to the first value which is + * larger than n. + * bin_index<2,8>(70) => 6, as 64 (index 5) < 70 and 70 <= 96 (index 6) + */ +template <std::size_t B = 2, std::size_t LB = 8> +constexpr std::size_t bin_index(std::size_t n) { + static_assert(B >= 1U, "Needs to be sensitive to at least one bit"); + static_assert(LB != 0U, "Lower bound need to be non-zero"); + DMG_ASSERT(n > LB); + + // We will alway be sensitive to at least the MSB + // exponent tells us how many bits we need to use to select within a level + constexpr auto kExponent = B - 1U; + // 2^exponent gives the size of each level + constexpr auto kSize = 1U << kExponent; + // offset help adjust results down to be inline with bin_index(LB) == 0 + constexpr auto kOffset = msb_index(LB); + + auto const msb_idx = msb_index(n); + DMG_ASSERT(msb_idx != 0); + + auto const mask = (1u << msb_idx) - 1u; + auto const under = n & mask; + auto const selector = under >> (msb_idx - kExponent); + + auto const rest = under & (mask >> kExponent); + auto const no_overflow = rest == 0U; + + auto const msb_level = kSize * (msb_idx - kOffset); + return msb_level + selector - no_overflow; +} + +// This is the inverse opperation for bin_index +// bin_size(bin_index(X)-1) < X <= bin_size(bin_index(X)) +template <std::size_t B = 2, std::size_t LB = 8> +std::size_t bin_size(std::size_t idx) { + constexpr auto kExponent = B - 1U; + constexpr auto kSize = 1U << kExponent; + constexpr auto kOffset = msb_index(LB); + + // no need to optimise `/` or `%` compiler can see `kSize` is a power of 2 + auto const level = (idx + 1) / kSize; + auto const sub_level = (idx + 1) % kSize; + return (1U << (level + kOffset)) | (sub_level << (level + kOffset - kExponent)); +} + +template <std::size_t Bits, std::size_t LB, std::size_t UB> +struct MultiPool { + static_assert(LB < UB, "lower bound must be less than upper bound"); + static_assert(IsPow2(LB) && IsPow2(UB), "Design untested for non powers of 2"); + static_assert((LB << Bits) % sizeof(void *) == 0, "Smallest pool must have space and alignment for freelist"); + + // upper bound is inclusive + static bool is_size_handled(std::size_t size) { return LB < size && size <= UB; } + static bool is_above_upper_bound(std::size_t size) { return UB < size; } + + static constexpr auto n_bins = bin_index<Bits, LB>(UB) + 1U; + + MultiPool(uint8_t blocks_per_chunk, MemoryResource *memory, MemoryResource *internal_memory) + : blocks_per_chunk_{blocks_per_chunk}, memory_{memory}, internal_memory_{internal_memory} {} + + ~MultiPool() { + if (pools_) { + auto pool_alloc = Allocator<Pool>(internal_memory_); + for (auto i = 0U; i != n_bins; ++i) { + pool_alloc.destroy(&pools_[i]); + } + pool_alloc.deallocate(pools_, n_bins); + } + } + + void *allocate(std::size_t bytes) { + auto idx = bin_index<Bits, LB>(bytes); + if (!pools_) [[unlikely]] { + initialise_pools(); + } + return pools_[idx].Allocate(); + } + + void deallocate(void *ptr, std::size_t bytes) { + auto idx = bin_index<Bits, LB>(bytes); + pools_[idx].Deallocate(ptr); + } + + private: + void initialise_pools() { + auto pool_alloc = Allocator<Pool>(internal_memory_); + auto pools = pool_alloc.allocate(n_bins); + try { + for (auto i = 0U; i != n_bins; ++i) { + auto block_size = bin_size<Bits, LB>(i); + pool_alloc.construct(&pools[i], block_size, blocks_per_chunk_, memory_); + } + pools_ = pools; + } catch (...) { + pool_alloc.deallocate(pools, n_bins); + throw; + } + } + + Pool *pools_{}; + uint8_t blocks_per_chunk_{}; + MemoryResource *memory_{}; + MemoryResource *internal_memory_{}; }; } // namespace impl @@ -442,8 +595,6 @@ class Pool final { /// /// This class has the following properties with regards to memory management. /// -/// * All allocated memory will be freed upon destruction, even if Deallocate -/// has not been called for some of the allocated blocks. /// * It consists of a collection of impl::Pool instances, each serving /// requests for different block sizes. Each impl::Pool manages a collection /// of impl::Pool::Chunk instances which are divided into blocks of uniform @@ -452,91 +603,46 @@ class Pool final { /// arbitrary alignment requests. Each requested block size must be a /// multiple of alignment or smaller than the alignment value. /// * An allocation request within the limits of the maximum block size will -/// find a Pool serving the requested size. If there's no Pool serving such -/// a request, a new one is instantiated. +/// find a Pool serving the requested size. Some requests will share a larger +/// pool size. /// * When a Pool exhausts its Chunk, a new one is allocated with the size for /// the maximum number of blocks. /// * Allocation requests which exceed the maximum block size will be /// forwarded to upstream MemoryResource. -/// * Maximum block size and maximum number of blocks per chunk can be tuned -/// by passing the arguments to the constructor. +/// * Maximum number of blocks per chunk can be tuned by passing the +/// arguments to the constructor. + class PoolResource final : public MemoryResource { public: - /// Construct with given max_blocks_per_chunk, max_block_size and upstream - /// memory. - /// - /// The implementation will use std::min(max_blocks_per_chunk, - /// impl::Pool::MaxBlocksInChunk()) as the real maximum number of blocks per - /// chunk. Allocation requests exceeding max_block_size are simply forwarded - /// to upstream memory. - PoolResource(size_t max_blocks_per_chunk, size_t max_block_size, MemoryResource *memory_pools = NewDeleteResource(), - MemoryResource *memory_unpooled = NewDeleteResource()); - - PoolResource(const PoolResource &) = delete; - PoolResource &operator=(const PoolResource &) = delete; - - PoolResource(PoolResource &&) = default; - PoolResource &operator=(PoolResource &&) = default; - - ~PoolResource() override { Release(); } - - MemoryResource *GetUpstreamResource() const { return pools_.get_allocator().GetMemoryResource(); } - MemoryResource *GetUpstreamResourceBlocks() const { return unpooled_.get_allocator().GetMemoryResource(); } - - /// Release all allocated memory. - void Release(); + PoolResource(uint8_t blocks_per_chunk, MemoryResource *memory = NewDeleteResource(), + MemoryResource *internal_memory = NewDeleteResource()) + : mini_pools_{ + impl::Pool{8, blocks_per_chunk, memory}, + impl::Pool{16, blocks_per_chunk, memory}, + impl::Pool{24, blocks_per_chunk, memory}, + impl::Pool{32, blocks_per_chunk, memory}, + impl::Pool{40, blocks_per_chunk, memory}, + impl::Pool{48, blocks_per_chunk, memory}, + impl::Pool{56, blocks_per_chunk, memory}, + impl::Pool{64, blocks_per_chunk, memory}, + }, + pools_3bit_(blocks_per_chunk, memory, internal_memory), + pools_4bit_(blocks_per_chunk, memory, internal_memory), + pools_5bit_(blocks_per_chunk, memory, internal_memory), + unpooled_memory_{internal_memory} {} + ~PoolResource() override = default; private: - // Big block larger than max_block_size_, doesn't go into a pool. - struct BigBlock { - size_t bytes; - size_t alignment; - void *data; - }; - - // TODO: Potential memory optimization is replacing `std::vector` with our - // custom vector implementation which doesn't store a `MemoryResource *`. - // Currently we have vectors for `pools_` and `unpooled_`, as well as each - // `impl::Pool` stores a `chunks_` vector. - - // Pools are sorted by bound_size_, ascending. - impl::AVector<impl::Pool> pools_; - impl::Pool *last_alloc_pool_{nullptr}; - impl::Pool *last_dealloc_pool_{nullptr}; - // Unpooled BigBlocks are sorted by data pointer. - impl::AVector<BigBlock> unpooled_; - size_t max_blocks_per_chunk_; - size_t max_block_size_; - void *DoAllocate(size_t bytes, size_t alignment) override; - void DoDeallocate(void *p, size_t bytes, size_t alignment) override; - - bool DoIsEqual(const MemoryResource &other) const noexcept override { return this == &other; } -}; - -/// Like PoolResource but uses SpinLock for thread safe usage. -class SynchronizedPoolResource final : public MemoryResource { - public: - SynchronizedPoolResource(size_t max_blocks_per_chunk, size_t max_block_size, - MemoryResource *memory = NewDeleteResource()) - : pool_memory_(max_blocks_per_chunk, max_block_size, memory) {} + bool DoIsEqual(MemoryResource const &other) const noexcept override; private: - PoolResource pool_memory_; - SpinLock lock_; - - void *DoAllocate(size_t bytes, size_t alignment) override { - std::lock_guard<SpinLock> guard(lock_); - return pool_memory_.Allocate(bytes, alignment); - } - - void DoDeallocate(void *p, size_t bytes, size_t alignment) override { - std::lock_guard<SpinLock> guard(lock_); - pool_memory_.Deallocate(p, bytes, alignment); - } - - bool DoIsEqual(const MemoryResource &other) const noexcept override { return this == &other; } + std::array<impl::Pool, 8> mini_pools_; + impl::MultiPool<3, 64, 128> pools_3bit_; + impl::MultiPool<4, 128, 512> pools_4bit_; + impl::MultiPool<5, 512, 1024> pools_5bit_; + MemoryResource *unpooled_memory_; }; class MemoryTrackingResource final : public utils::MemoryResource { diff --git a/src/utils/on_scope_exit.hpp b/src/utils/on_scope_exit.hpp index a5398b017..114f1c370 100644 --- a/src/utils/on_scope_exit.hpp +++ b/src/utils/on_scope_exit.hpp @@ -35,7 +35,7 @@ namespace memgraph::utils { * // long block of code, might throw an exception * } */ -template <typename Callable> +template <std::invocable Callable> class [[nodiscard]] OnScopeExit { public: template <typename U> @@ -46,7 +46,7 @@ class [[nodiscard]] OnScopeExit { OnScopeExit &operator=(OnScopeExit const &) = delete; OnScopeExit &operator=(OnScopeExit &&) = delete; ~OnScopeExit() { - if (doCall_) function_(); + if (doCall_) std::invoke(std::move(function_)); } void Disable() { doCall_ = false; } @@ -57,5 +57,4 @@ class [[nodiscard]] OnScopeExit { }; template <typename Callable> OnScopeExit(Callable &&) -> OnScopeExit<Callable>; - } // namespace memgraph::utils diff --git a/src/utils/tag.hpp b/src/utils/tag.hpp new file mode 100644 index 000000000..dfd8c8f81 --- /dev/null +++ b/src/utils/tag.hpp @@ -0,0 +1,32 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +namespace memgraph::utils { + +template <typename T> +struct tag_type { + using type = T; +}; + +template <auto V> +struct tag_value { + static constexpr auto value = V; +}; + +template <typename T> +auto tag_t = tag_type<T>{}; + +template <auto V> +auto tag_v = tag_value<V>{}; + +} // namespace memgraph::utils diff --git a/src/utils/typeinfo.hpp b/src/utils/typeinfo.hpp index 77910f731..aeb62d2c1 100644 --- a/src/utils/typeinfo.hpp +++ b/src/utils/typeinfo.hpp @@ -187,6 +187,7 @@ enum class TypeId : uint64_t { AST_PROFILE_QUERY, AST_INDEX_QUERY, AST_EDGE_INDEX_QUERY, + AST_TEXT_INDEX_QUERY, AST_CREATE, AST_CALL_PROCEDURE, AST_MATCH, diff --git a/tests/benchmark/query/execution.cpp b/tests/benchmark/query/execution.cpp index d49b14fc3..1d65cdb93 100644 --- a/tests/benchmark/query/execution.cpp +++ b/tests/benchmark/query/execution.cpp @@ -55,12 +55,12 @@ class NewDeleteResource final { }; class PoolResource final { - memgraph::utils::PoolResource memory_{128, 4 * 1024}; + memgraph::utils::PoolResource memory_{128}; public: memgraph::utils::MemoryResource *get() { return &memory_; } - void Reset() { memory_.Release(); } + void Reset() {} }; static void AddVertices(memgraph::storage::Storage *db, int vertex_count) { diff --git a/tests/benchmark/skip_list_vs_stl.cpp b/tests/benchmark/skip_list_vs_stl.cpp index 1a17e56e1..9a856822f 100644 --- a/tests/benchmark/skip_list_vs_stl.cpp +++ b/tests/benchmark/skip_list_vs_stl.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -101,8 +101,7 @@ class StdSetWithPoolAllocatorInsertFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::set<uint64_t, std::less<>, memgraph::utils::Allocator<uint64_t>> container{&memory_}; memgraph::utils::SpinLock lock; }; @@ -208,8 +207,7 @@ class StdSetWithPoolAllocatorFindFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::set<uint64_t, std::less<>, memgraph::utils::Allocator<uint64_t>> container{&memory_}; memgraph::utils::SpinLock lock; }; @@ -325,8 +323,7 @@ class StdMapWithPoolAllocatorInsertFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::map<uint64_t, uint64_t, std::less<>, memgraph::utils::Allocator<std::pair<const uint64_t, uint64_t>>> container{ &memory_}; memgraph::utils::SpinLock lock; @@ -433,8 +430,7 @@ class StdMapWithPoolAllocatorFindFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::map<uint64_t, uint64_t, std::less<>, memgraph::utils::Allocator<std::pair<const uint64_t, uint64_t>>> container{ &memory_}; memgraph::utils::SpinLock lock; diff --git a/tests/code_analysis/clang_tidy.sh b/tests/code_analysis/clang_tidy.sh new file mode 100755 index 000000000..f8bdfc252 --- /dev/null +++ b/tests/code_analysis/clang_tidy.sh @@ -0,0 +1,23 @@ +#!/bin/bash +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PROJECT_ROOT="$SCRIPT_DIR/../.." +BASE_BRANCH="origin/master" +THREADS=${THREADS:-$(nproc)} + +if [[ "$#" -gt 0 ]]; then + case "$1" in + --base-branch) + BASE_BRANCH=$2 + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac +fi + +cd $PROJECT_ROOT +git diff -U0 $BASE_BRANCH... -- src | ./tools/github/clang-tidy/clang-tidy-diff.py -p 1 -j $THREADS -path build -regex ".+\.cpp" | tee ./build/clang_tidy_output.txt +# Fail if any warning is reported +! cat ./build/clang_tidy_output.txt | ./tools/github/clang-tidy/grep_error_lines.sh > /dev/null +cd $SCRIPT_DIR diff --git a/tests/code_analysis/python_code_analysis.sh b/tests/code_analysis/python_code_analysis.sh new file mode 100755 index 000000000..500a873d1 --- /dev/null +++ b/tests/code_analysis/python_code_analysis.sh @@ -0,0 +1,27 @@ +#!/bin/bash +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PROJECT_ROOT="$SCRIPT_DIR/../.." +BASE_BRANCH="origin/master" + +if [[ "$#" -gt 0 ]]; then + case "$1" in + --base-branch) + BASE_BRANCH=$2 + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac +fi + +cd $PROJECT_ROOT +CHANGED_FILES=$(git diff -U0 $BASE_BRANCH... --name-only --diff-filter=d) +for file in ${CHANGED_FILES}; do + echo ${file} + if [[ ${file} == *.py ]]; then + python3 -m black --check --diff ${file} + python3 -m isort --profile black --check-only --diff ${file} + fi +done +cd $SCRIPT_DIR diff --git a/tests/e2e/CMakeLists.txt b/tests/e2e/CMakeLists.txt index 1876074ee..60743676d 100644 --- a/tests/e2e/CMakeLists.txt +++ b/tests/e2e/CMakeLists.txt @@ -77,6 +77,7 @@ add_subdirectory(garbage_collection) add_subdirectory(query_planning) add_subdirectory(awesome_functions) add_subdirectory(high_availability) +add_subdirectory(concurrency) add_subdirectory(replication_experimental) diff --git a/tests/e2e/concurrency/CMakeLists.txt b/tests/e2e/concurrency/CMakeLists.txt new file mode 100644 index 000000000..f981a2537 --- /dev/null +++ b/tests/e2e/concurrency/CMakeLists.txt @@ -0,0 +1,6 @@ +function(copy_concurrency_e2e_python_files FILE_NAME) + copy_e2e_python_files(concurrency ${FILE_NAME}) +endfunction() + +copy_concurrency_e2e_python_files(common.py) +copy_concurrency_e2e_python_files(concurrency.py) diff --git a/tests/e2e/concurrency/common.py b/tests/e2e/concurrency/common.py new file mode 100644 index 000000000..208278929 --- /dev/null +++ b/tests/e2e/concurrency/common.py @@ -0,0 +1,60 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import typing + +import mgclient +import pytest + + +def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = {}) -> typing.List[tuple]: + cursor.execute(query, params) + return cursor.fetchall() + + +def execute_and_fetch_all_with_commit( + connection: mgclient.Connection, query: str, params: dict = {} +) -> typing.List[tuple]: + cursor = connection.cursor() + cursor.execute(query, params) + results = cursor.fetchall() + connection.commit() + return results + + +@pytest.fixture +def first_connection(**kwargs) -> mgclient.Connection: + connection = mgclient.connect(host="localhost", port=7687, **kwargs) + connection.autocommit = True + cursor = connection.cursor() + execute_and_fetch_all(cursor, "USE DATABASE memgraph") + try: + execute_and_fetch_all(cursor, "DROP DATABASE clean") + except: + pass + execute_and_fetch_all(cursor, "MATCH (n) DETACH DELETE n") + connection.autocommit = False + yield connection + + +@pytest.fixture +def second_connection(**kwargs) -> mgclient.Connection: + connection = mgclient.connect(host="localhost", port=7687, **kwargs) + connection.autocommit = True + cursor = connection.cursor() + execute_and_fetch_all(cursor, "USE DATABASE memgraph") + try: + execute_and_fetch_all(cursor, "DROP DATABASE clean") + except: + pass + execute_and_fetch_all(cursor, "MATCH (n) DETACH DELETE n") + connection.autocommit = False + yield connection diff --git a/tests/e2e/concurrency/concurrency.py b/tests/e2e/concurrency/concurrency.py new file mode 100644 index 000000000..7961c1984 --- /dev/null +++ b/tests/e2e/concurrency/concurrency.py @@ -0,0 +1,57 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import sys + +import pytest +from common import execute_and_fetch_all, first_connection, second_connection + + +def test_concurrency_if_no_delta_on_same_node_property_update(first_connection, second_connection): + m1c = first_connection.cursor() + m2c = second_connection.cursor() + + execute_and_fetch_all(m1c, "CREATE (:Node {prop: 1})") + first_connection.commit() + + test_has_error = False + try: + m1c.execute("MATCH (n) SET n.prop = 1") + m2c.execute("MATCH (n) SET n.prop = 1") + first_connection.commit() + second_connection.commit() + except Exception as e: + test_has_error = True + + assert test_has_error is False + + +def test_concurrency_if_no_delta_on_same_edge_property_update(first_connection, second_connection): + m1c = first_connection.cursor() + m2c = second_connection.cursor() + + execute_and_fetch_all(m1c, "CREATE ()-[:TYPE {prop: 1}]->()") + first_connection.commit() + + test_has_error = False + try: + m1c.execute("MATCH (n)-[r]->(m) SET r.prop = 1") + m2c.execute("MATCH (n)-[r]->(m) SET n.prop = 1") + first_connection.commit() + second_connection.commit() + except Exception as e: + test_has_error = True + + assert test_has_error is False + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/concurrency/workloads.yaml b/tests/e2e/concurrency/workloads.yaml new file mode 100644 index 000000000..839090538 --- /dev/null +++ b/tests/e2e/concurrency/workloads.yaml @@ -0,0 +1,14 @@ +concurrency_cluster: &concurrency_cluster + cluster: + main: + args: ["--bolt-port", "7687", "--log-level=TRACE", "--storage-delta-on-identical-property-update=false"] + log_file: "concurrency.log" + setup_queries: [] + validation_queries: [] + + +workloads: + - name: "Concurrency" + binary: "tests/e2e/pytest_runner.sh" + args: ["concurrency/concurrency.py"] + <<: *concurrency_cluster diff --git a/tests/e2e/configuration/default_config.py b/tests/e2e/configuration/default_config.py index 65a850f0b..11435da65 100644 --- a/tests/e2e/configuration/default_config.py +++ b/tests/e2e/configuration/default_config.py @@ -141,6 +141,11 @@ startup_config_dict = { "1", "The time duration between two replica checks/pings. If < 1, replicas will NOT be checked at all. NOTE: The MAIN instance allocates a new thread for each REPLICA.", ), + "storage_delta_on_identical_property_update": ( + "true", + "true", + "Controls whether updating a property with the same value should create a delta object.", + ), "storage_gc_cycle_sec": ("30", "30", "Storage garbage collector interval (in seconds)."), "storage_python_gc_cycle_sec": ("180", "180", "Storage python full garbage collection interval (in seconds)."), "storage_items_per_batch": ( @@ -221,6 +226,6 @@ startup_config_dict = { "experimental_enabled": ( "", "", - "Experimental features to be used, comma seperated. Options [system-replication, high-availability]", + "Experimental features to be used, comma-separated. Options [system-replication, text-search, high-availability]", ), } diff --git a/tests/e2e/memory/workloads.yaml b/tests/e2e/memory/workloads.yaml index bf29e484c..c043e03d8 100644 --- a/tests/e2e/memory/workloads.yaml +++ b/tests/e2e/memory/workloads.yaml @@ -52,26 +52,26 @@ in_memory_query_limit_cluster: &in_memory_query_limit_cluster setup_queries: [] validation_queries: [] -args_450_MiB_limit: &args_450_MiB_limit +args_350_MiB_limit: &args_350_MiB_limit - "--bolt-port" - *bolt_port - - "--memory-limit=450" + - "--memory-limit=350" - "--storage-gc-cycle-sec=180" - "--log-level=INFO" -in_memory_450_MiB_limit_cluster: &in_memory_450_MiB_limit_cluster +in_memory_350_MiB_limit_cluster: &in_memory_350_MiB_limit_cluster cluster: main: - args: *args_450_MiB_limit + args: *args_350_MiB_limit log_file: "memory-e2e.log" setup_queries: [] validation_queries: [] -disk_450_MiB_limit_cluster: &disk_450_MiB_limit_cluster +disk_350_MiB_limit_cluster: &disk_350_MiB_limit_cluster cluster: main: - args: *args_450_MiB_limit + args: *args_350_MiB_limit log_file: "memory-e2e.log" setup_queries: [] validation_queries: [] @@ -192,22 +192,22 @@ workloads: - name: "Memory control for accumulation" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_accumulation" args: ["--bolt-port", *bolt_port] - <<: *in_memory_450_MiB_limit_cluster + <<: *in_memory_350_MiB_limit_cluster - name: "Memory control for accumulation on disk storage" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_accumulation" args: ["--bolt-port", *bolt_port] - <<: *disk_450_MiB_limit_cluster + <<: *disk_350_MiB_limit_cluster - name: "Memory control for edge create" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_edge_create" args: ["--bolt-port", *bolt_port] - <<: *in_memory_450_MiB_limit_cluster + <<: *in_memory_350_MiB_limit_cluster - name: "Memory control for edge create on disk storage" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_edge_create" args: ["--bolt-port", *bolt_port] - <<: *disk_450_MiB_limit_cluster + <<: *disk_350_MiB_limit_cluster - name: "Memory control for memory limit global thread alloc" binary: "tests/e2e/memory/memgraph__e2e__memory_limit_global_thread_alloc_proc" diff --git a/tests/e2e/module_file_manager/module_file_manager.cpp b/tests/e2e/module_file_manager/module_file_manager.cpp index 20641b9d7..73508d81b 100644 --- a/tests/e2e/module_file_manager/module_file_manager.cpp +++ b/tests/e2e/module_file_manager/module_file_manager.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -126,7 +126,7 @@ std::filesystem::path CreateModuleFile(auto &client, const std::string_view file } std::string GetModuleFileQuery(const std::filesystem::path &path) { - return fmt::format("CALL mg.get_module_file({}) YIELD content", path); + return fmt::format("CALL mg.get_module_file('{}') YIELD content", path); } std::string GetModuleFile(auto &client, const std::filesystem::path &path) { @@ -141,7 +141,7 @@ std::string GetModuleFile(auto &client, const std::filesystem::path &path) { } std::string UpdateModuleFileQuery(const std::filesystem::path &path, const std::string_view content) { - return fmt::format("CALL mg.update_module_file({}, '{}')", path, content); + return fmt::format("CALL mg.update_module_file('{}', '{}')", path, content); } void UpdateModuleFile(auto &client, const std::filesystem::path &path, const std::string_view content) { @@ -151,7 +151,7 @@ void UpdateModuleFile(auto &client, const std::filesystem::path &path, const std } std::string DeleteModuleFileQuery(const std::filesystem::path &path) { - return fmt::format("CALL mg.delete_module_file({})", path); + return fmt::format("CALL mg.delete_module_file('{}')", path); } void DeleteModuleFile(auto &client, const std::filesystem::path &path) { diff --git a/tests/e2e/run.sh b/tests/e2e/run.sh index 88b70ae32..9ee4babe9 100755 --- a/tests/e2e/run.sh +++ b/tests/e2e/run.sh @@ -25,7 +25,7 @@ if [ "$#" -eq 0 ]; then # NOTE: If you want to run all tests under specific folder/section just # replace the dot (root directory below) with the folder name, e.g. # `--workloads-root-directory replication`. - python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build" + python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build/tests/e2e" elif [ "$#" -eq 1 ]; then if [ "$1" == "-h" ] || [ "$1" == "--help" ]; then print_help @@ -34,7 +34,7 @@ elif [ "$#" -eq 1 ]; then # NOTE: --workload-name comes from each individual folder/section # workloads.yaml file. E.g. `streams/workloads.yaml` has a list of # `workloads:` and each workload has it's `-name`. - python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build" --workload-name "$1" + python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build/tests/e2e" --workload-name "$1" else print_help fi diff --git a/tests/e2e/runner.py b/tests/e2e/runner.py index ae022d4d8..282629b20 100755 --- a/tests/e2e/runner.py +++ b/tests/e2e/runner.py @@ -40,6 +40,9 @@ def load_args(): def load_workloads(root_directory): workloads = [] for file in Path(root_directory).rglob("*.yaml"): + # 8.03.2024. - Skip streams e2e tests + if str(file).endswith("/streams/workloads.yaml"): + continue with open(file, "r") as f: workloads.extend(yaml.load(f, Loader=yaml.FullLoader)["workloads"]) return workloads diff --git a/tests/e2e/streams/conftest.py b/tests/e2e/streams/conftest.py index 1bf3544c2..2dadfcb28 100644 --- a/tests/e2e/streams/conftest.py +++ b/tests/e2e/streams/conftest.py @@ -9,6 +9,7 @@ # by the Apache License, Version 2.0, included in the file # licenses/APL.txt. +# import os import pulsar import pytest from common import NAME, PULSAR_SERVICE_URL, connect, execute_and_fetch_all @@ -20,6 +21,9 @@ import requests # To run these test locally a running Kafka sever is necessery. The test tries # to connect on localhost:9092. +# KAFKA_HOSTNAME=os.getenv("KAFKA_HOSTNAME", "localhost") +# PULSAR_HOSTNAME=os.getenv("PULSAR_HOSTNAME", "localhost") +# PULSAR_PORT="6652" if PULSAR_HOSTNAME == "localhost" else "8080" @pytest.fixture(autouse=True) def connection(): diff --git a/tests/e2e/streams/kafka_streams_tests.py b/tests/e2e/streams/kafka_streams_tests.py index b988a6c26..17decdc0f 100755 --- a/tests/e2e/streams/kafka_streams_tests.py +++ b/tests/e2e/streams/kafka_streams_tests.py @@ -11,6 +11,7 @@ # by the Apache License, Version 2.0, included in the file # licenses/APL.txt. +# import os import sys import time from multiprocessing import Process @@ -23,7 +24,7 @@ from mg_utils import mg_sleep_and_assert TRANSFORMATIONS_TO_CHECK_C = ["c_transformations.empty_transformation"] TRANSFORMATIONS_TO_CHECK_PY = ["kafka_transform.simple", "kafka_transform.with_parameters"] KAFKA_PRODUCER_SENDING_MSG_DEFAULT_TIMEOUT = 60 - +# KAFKA_HOSTNAME=os.getenv("KAFKA_HOSTNAME", "localhost") @pytest.mark.parametrize("transformation", TRANSFORMATIONS_TO_CHECK_PY) def test_simple(kafka_producer, kafka_topics, connection, transformation): @@ -162,7 +163,7 @@ def test_show_streams(kafka_topics, connection): complex_values_stream = "complex_values" common.create_stream( - cursor, default_values_stream, kafka_topics[0], "kafka_transform.simple", bootstrap_servers="'localhost:29092'" + cursor, default_values_stream, kafka_topics[0], "kafka_transform.simple", bootstrap_servers="'localhost:29092'" ) common.create_stream( cursor, diff --git a/tests/e2e/streams/pulsar_streams_tests.py b/tests/e2e/streams/pulsar_streams_tests.py index cf52416cb..49aa773e6 100755 --- a/tests/e2e/streams/pulsar_streams_tests.py +++ b/tests/e2e/streams/pulsar_streams_tests.py @@ -11,6 +11,7 @@ # by the Apache License, Version 2.0, included in the file # licenses/APL.txt. +# import os import sys import time from multiprocessing import Process, Value @@ -20,6 +21,7 @@ import mgclient import pytest TRANSFORMATIONS_TO_CHECK = ["pulsar_transform.simple", "pulsar_transform.with_parameters"] +# PULSAR_HOSTNAME=os.getenv("PULSAR_HOSTNAME", "127.0.0.1") def check_vertex_exists_with_topic_and_payload(cursor, topic, payload_byte): diff --git a/tests/e2e/text_search/CMakeLists.txt b/tests/e2e/text_search/CMakeLists.txt new file mode 100644 index 000000000..db2af7a11 --- /dev/null +++ b/tests/e2e/text_search/CMakeLists.txt @@ -0,0 +1,6 @@ +function(copy_text_search_e2e_python_files FILE_NAME) + copy_e2e_python_files(text_search ${FILE_NAME}) +endfunction() + +copy_text_search_e2e_python_files(common.py) +copy_text_search_e2e_python_files(test_text_search.py) diff --git a/tests/e2e/text_search/common.py b/tests/e2e/text_search/common.py new file mode 100644 index 000000000..0f28351d3 --- /dev/null +++ b/tests/e2e/text_search/common.py @@ -0,0 +1,87 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import typing + +import mgclient +import pytest +from gqlalchemy import Memgraph + + +def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = {}) -> typing.List[tuple]: + cursor.execute(query, params) + return cursor.fetchall() + + +@pytest.fixture +def connect(**kwargs) -> mgclient.Connection: + connection = mgclient.connect(host="localhost", port=7687, **kwargs) + connection.autocommit = True + cursor = connection.cursor() + execute_and_fetch_all(cursor, """USE DATABASE memgraph""") + try: + execute_and_fetch_all(cursor, """DROP DATABASE clean""") + except: + pass + execute_and_fetch_all(cursor, """MATCH (n) DETACH DELETE n""") + yield connection + + +@pytest.fixture +def memgraph(**kwargs) -> Memgraph: + memgraph = Memgraph() + + yield memgraph + + memgraph.drop_database() + memgraph.drop_indexes() + + +@pytest.fixture +def memgraph_with_text_indexed_data(**kwargs) -> Memgraph: + memgraph = Memgraph() + + memgraph.execute( + """CREATE (:Document {title: "Rules2024", version: 1, fulltext: "random works", date: date("2023-11-14")});""" + ) + memgraph.execute( + """CREATE (:Document {title: "Rules2023", version: 9, fulltext: "text Rules2024", date: date("2023-11-14")});""" + ) + memgraph.execute( + """CREATE (:Document:Revision {title: "Rules2024", version: 2, fulltext: "random words", date: date("2023-12-15")});""" + ) + memgraph.execute("""CREATE (:Revision {title: "OperationSchema", version: 3, date: date("2023-10-01")});""") + memgraph.execute("""CREATE TEXT INDEX complianceDocuments ON :Document;""") + + yield memgraph + + memgraph.execute("""DROP TEXT INDEX complianceDocuments;""") + memgraph.drop_database() + memgraph.drop_indexes() + + +@pytest.fixture +def memgraph_with_mixed_data(**kwargs) -> Memgraph: + memgraph = Memgraph() + + memgraph.execute( + """CREATE (:Document:Revision {title: "Rules2024", version: 1, date: date("2023-11-14"), contents: "Lorem ipsum dolor sit amet"});""" + ) + memgraph.execute( + """CREATE (:Revision {title: "Rules2024", version: 2, date: date("2023-12-15"), contents: "consectetur adipiscing elit"});""" + ) + memgraph.execute("""CREATE TEXT INDEX complianceDocuments ON :Document;""") + + yield memgraph + + memgraph.execute("""DROP TEXT INDEX complianceDocuments;""") + memgraph.drop_database() + memgraph.drop_indexes() diff --git a/tests/e2e/text_search/test_text_search.py b/tests/e2e/text_search/test_text_search.py new file mode 100644 index 000000000..8d538d464 --- /dev/null +++ b/tests/e2e/text_search/test_text_search.py @@ -0,0 +1,206 @@ +# Copyright 2024 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import json +import re +import sys + +import gqlalchemy +import mgclient +import pytest +from common import memgraph, memgraph_with_mixed_data, memgraph_with_text_indexed_data + +GET_RULES_2024_DOCUMENT = """CALL libtext.search("complianceDocuments", "data.title:Rules2024") YIELD node + RETURN node.title AS title, node.version AS version + ORDER BY version ASC, title ASC;""" + + +def test_create_index(memgraph): + memgraph.execute("""CREATE TEXT INDEX exampleIndex ON :Document;""") + + index_info = memgraph.execute_and_fetch("""SHOW INDEX INFO""") + + assert list(index_info) == [ + {"index type": "text (name: exampleIndex)", "label": "Document", "property": None, "count": None} + ] + + +def test_drop_index(memgraph): + memgraph.execute("""DROP TEXT INDEX exampleIndex;""") + + index_info = memgraph.execute_and_fetch("""SHOW INDEX INFO""") + + assert list(index_info) == [] + + +def test_create_existing_index(memgraph): + memgraph.execute("""CREATE TEXT INDEX duplicatedIndex ON :Document;""") + with pytest.raises( + gqlalchemy.exceptions.GQLAlchemyDatabaseError, match='Text index "duplicatedIndex" already exists.' + ) as _: + memgraph.execute("""CREATE TEXT INDEX duplicatedIndex ON :Document;""") + memgraph.execute("""DROP TEXT INDEX duplicatedIndex;""") # cleanup + + +def test_drop_nonexistent_index(memgraph): + with pytest.raises( + gqlalchemy.exceptions.GQLAlchemyDatabaseError, match='Text index "noSuchIndex" doesn’t exist.' + ) as _: + memgraph.execute("""DROP TEXT INDEX noSuchIndex;""") + + +def test_text_search_given_property(memgraph_with_text_indexed_data): + result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT)) + + assert len(result) == 2 and result == [{"title": "Rules2024", "version": 1}, {"title": "Rules2024", "version": 2}] + + +def test_text_search_all_properties(memgraph_with_text_indexed_data): + SEARCH_QUERY = "Rules2024" + + ALL_PROPERTIES_QUERY = f"""CALL libtext.search_all("complianceDocuments", "{SEARCH_QUERY}") YIELD node + RETURN node + ORDER BY node.version ASC, node.title ASC;""" + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(ALL_PROPERTIES_QUERY)) + result_nodes = [record["node"] for record in result] + + assert len(result) == 3 and ( + result_nodes[0].title == SEARCH_QUERY + and result_nodes[1].title == SEARCH_QUERY + and SEARCH_QUERY in result_nodes[2].fulltext + ) + + +def test_regex_text_search(memgraph_with_text_indexed_data): + REGEX_QUERY = """CALL libtext.regex_search("complianceDocuments", "wor.*s") YIELD node + RETURN node + ORDER BY node.version ASC, node.title ASC;""" + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(REGEX_QUERY)) + + assert ( + len(result) == 2 + and re.search("wor.*s", result[0]["node"].fulltext) + and re.search("wor.*s", result[1]["node"].fulltext) + # In this test, all values matching the regex string are found in the .node property only ^ + ) + + +def test_text_search_aggregate(memgraph_with_text_indexed_data): + input_aggregation = json.dumps({"count": {"value_count": {"field": "metadata.gid"}}}, separators=(",", ":")) + expected_aggregation = json.dumps({"count": {"value": 2.0}}, separators=(",", ":")) + + AGGREGATION_QUERY = f"""CALL libtext.aggregate("complianceDocuments", "data.title:Rules2024", '{input_aggregation}') + YIELD aggregation + RETURN aggregation;""" + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(AGGREGATION_QUERY)) + + assert len(result) == 1 and result[0]["aggregation"] == expected_aggregation + + +def test_text_search_query_boolean(memgraph_with_text_indexed_data): + BOOLEAN_QUERY = """CALL libtext.search("complianceDocuments", "(data.title:Rules2023 OR data.title:Rules2024) AND data.fulltext:words") YIELD node + RETURN node.title AS title, node.version AS version + ORDER BY version ASC, title ASC;""" + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(BOOLEAN_QUERY)) + + assert len(result) == 1 and result == [{"title": "Rules2024", "version": 2}] + + +def test_create_indexed_node(memgraph_with_text_indexed_data): + memgraph_with_text_indexed_data.execute("""CREATE (:Document {title: "Rules2024", version: 3});""") + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT)) + + assert len(result) == 3 and result == [ + {"title": "Rules2024", "version": 1}, + {"title": "Rules2024", "version": 2}, + {"title": "Rules2024", "version": 3}, + ] + + +def test_delete_indexed_node(memgraph_with_text_indexed_data): + memgraph_with_text_indexed_data.execute("""MATCH (n:Document {title: "Rules2024", version: 2}) DETACH DELETE n;""") + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT)) + + assert len(result) == 1 and result == [{"title": "Rules2024", "version": 1}] + + +def test_add_indexed_label(memgraph_with_mixed_data): + memgraph_with_mixed_data.execute("""MATCH (n:Revision {version:2}) SET n:Document;""") + + result = list(memgraph_with_mixed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT)) + + assert len(result) == 2 and result == [{"title": "Rules2024", "version": 1}, {"title": "Rules2024", "version": 2}] + + +def test_remove_indexed_label(memgraph_with_mixed_data): + memgraph_with_mixed_data.execute("""MATCH (n:Document {version: 1}) REMOVE n:Document;""") + + result = list(memgraph_with_mixed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT)) + + assert len(result) == 0 + + +def test_update_text_property_of_indexed_node(memgraph_with_text_indexed_data): + memgraph_with_text_indexed_data.execute("""MATCH (n:Document {version:1}) SET n.title = "Rules2030";""") + + result = list( + memgraph_with_text_indexed_data.execute_and_fetch( + """CALL libtext.search("complianceDocuments", "data.title:Rules2030") YIELD node + RETURN node.title AS title, node.version AS version + ORDER BY version ASC, title ASC;""" + ) + ) + + assert len(result) == 1 and result == [{"title": "Rules2030", "version": 1}] + + +def test_add_unindexable_property_to_indexed_node(memgraph_with_text_indexed_data): + try: + memgraph_with_text_indexed_data.execute("""MATCH (n:Document {version:1}) SET n.randomList = [2, 3, 4, 5];""") + except Exception: + assert False + + +def test_remove_indexable_property_from_indexed_node(memgraph_with_text_indexed_data): + try: + memgraph_with_text_indexed_data.execute( + """MATCH (n:Document {version:1}) REMOVE n.title, n.version, n.fulltext, n.date;""" + ) + except Exception: + assert False + + +def test_remove_unindexable_property_from_indexed_node(memgraph_with_text_indexed_data): + try: + memgraph_with_text_indexed_data.execute_and_fetch( + """MATCH (n:Document {date: date("2023-12-15")}) REMOVE n.date;""" + ) + except Exception: + assert False + + +def test_text_search_nonexistent_index(memgraph_with_text_indexed_data): + NONEXISTENT_INDEX_QUERY = """CALL libtext.search("noSuchIndex", "data.fulltext:words") YIELD node + RETURN node.title AS title, node.version AS version + ORDER BY version ASC, title ASC;""" + + with pytest.raises(mgclient.DatabaseError, match='Text index "noSuchIndex" doesn’t exist.') as _: + list(memgraph_with_text_indexed_data.execute_and_fetch(NONEXISTENT_INDEX_QUERY)) + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/text_search/test_text_search_disabled.py b/tests/e2e/text_search/test_text_search_disabled.py new file mode 100644 index 000000000..064f7b409 --- /dev/null +++ b/tests/e2e/text_search/test_text_search_disabled.py @@ -0,0 +1,69 @@ +# Copyright 2024 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import json +import sys + +import gqlalchemy +import pytest +from common import memgraph + +TEXT_SEARCH_DISABLED_ERROR = ( + "To use text indices and text search, start Memgraph with the experimental text search feature enabled." +) + + +def test_create_index(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + memgraph.execute("""CREATE TEXT INDEX exampleIndex ON :Document;""") + + +def test_drop_index(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + memgraph.execute("""DROP TEXT INDEX exampleIndex;""") + + +def test_text_search_given_property(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + memgraph.execute( + """CALL libtext.search("complianceDocuments", "data.title:Rules2024") YIELD node + RETURN node;""" + ) + + +def test_text_search_all_properties(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + memgraph.execute( + """CALL libtext.search_all("complianceDocuments", "Rules2024") YIELD node + RETURN node;""" + ) + + +def test_regex_text_search(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + memgraph.execute( + """CALL libtext.regex_search("complianceDocuments", "wor.*s") YIELD node + RETURN node;""" + ) + + +def test_text_search_aggregate(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + input_aggregation = json.dumps({"count": {"value_count": {"field": "metadata.gid"}}}, separators=(",", ":")) + + memgraph.execute( + f"""CALL libtext.aggregate("complianceDocuments", "wor.*s", '{input_aggregation}') YIELD aggregation + RETURN aggregation;""" + ) + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/text_search/workloads.yaml b/tests/e2e/text_search/workloads.yaml new file mode 100644 index 000000000..5b1640715 --- /dev/null +++ b/tests/e2e/text_search/workloads.yaml @@ -0,0 +1,33 @@ +text_search_cluster: &text_search_cluster + cluster: + main: + args: + [ + "--bolt-port", + "7687", + "--log-level=TRACE", + "--experimental-enabled=text-search", + ] + log_file: "text_search.log" + setup_queries: [] + validation_queries: [] + +text_search_disabled_cluster: &text_search_disabled_cluster + cluster: + main: + args: ["--bolt-port", "7687", "--log-level=TRACE"] + log_file: "text_search.log" + setup_queries: [] + validation_queries: [] + +workloads: + - name: "Test behavior of text search in Memgraph" + binary: "tests/e2e/pytest_runner.sh" + proc: "tests/e2e/text_search/query_modules/" + args: ["text_search/test_text_search.py"] + <<: *text_search_cluster + - name: "Test behavior of text search in Memgraph when disabled" + binary: "tests/e2e/pytest_runner.sh" + proc: "tests/e2e/text_search/query_modules/" + args: ["text_search/test_text_search_disabled.py"] + <<: *text_search_disabled_cluster diff --git a/tests/jepsen/jepsen_0.3.0.patch b/tests/jepsen/jepsen_0.3.0.patch deleted file mode 100644 index be47cc8b4..000000000 --- a/tests/jepsen/jepsen_0.3.0.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/docker/control/Dockerfile b/docker/control/Dockerfile -index 6b2d3c0e..195a7a60 100644 ---- a/docker/control/Dockerfile -+++ b/docker/control/Dockerfile -@@ -7,7 +7,7 @@ ENV LEIN_ROOT true - # Jepsen dependencies - # - RUN apt-get -y -q update && \ -- apt-get install -qy openjdk-17-jdk-headless \ -+ apt-get install -qy ca-certificates-java openjdk-17-jdk-headless \ - libjna-java \ - vim \ - emacs \ diff --git a/tests/jepsen/project.clj b/tests/jepsen/project.clj index 506bcff52..445e57b41 100644 --- a/tests/jepsen/project.clj +++ b/tests/jepsen/project.clj @@ -5,10 +5,10 @@ :url "https://github.com/memgraph/memgraph/blob/master/release/LICENSE_ENTERPRISE.md"} :main jepsen.memgraph.core :dependencies [[org.clojure/clojure "1.10.0"] - ;; 0.2.4-SNAPSHOT but 0.3.0, for more -> https://clojars.org/jepsen/versions - [jepsen "0.2.4-SNAPSHOT"] + ;; Details under https://clojars.org/jepsen/versions. + [jepsen "0.3.5-SNAPSHOT"] [gorillalabs/neo4j-clj "4.1.0"]] :profiles {:test {:dependencies [#_[org.neo4j.test/neo4j-harness "4.1.0"]]}} - ;; required to run 0.3.0 - ; :aot :all + ;; The below line is required to run after Jepsen 0.3.0. + :aot :all :repl-options {:init-ns jepsen.memgraph.core}) diff --git a/tests/jepsen/run.sh b/tests/jepsen/run.sh index a1587c8a1..040491c3f 100755 --- a/tests/jepsen/run.sh +++ b/tests/jepsen/run.sh @@ -2,12 +2,10 @@ set -Eeuo pipefail script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -MEMGRAPH_BINARY_PATH="../../build/memgraph" +MEMGRAPH_BUILD_PATH="$script_dir/../../build" +MEMGRAPH_BINARY_PATH="$MEMGRAPH_BUILD_PATH/memgraph" # NOTE: Jepsen Git tags are not consistent, there are: 0.2.4, v0.3.0, 0.3.2, ... -# NOTE: On Ubuntu 22.04 v0.3.2 uses non-existing docker compose --compatibility flag. -# NOTE: On Ubuntu 22.04 v0.3.0 and v0.3.1 seems to be runnable. -# TODO(gitbuda): Make sure Memgraph can be testes with Jepsen >= 0.3.0 -JEPSEN_VERSION="${JEPSEN_VERSION:-0.2.4}" +JEPSEN_VERSION="${JEPSEN_VERSION:-v0.3.5}" JEPSEN_ACTIVE_NODES_NO=5 CONTROL_LEIN_RUN_ARGS="test-all --node-configs resources/node-config.edn" CONTROL_LEIN_RUN_STDOUT_LOGS=1 @@ -24,7 +22,7 @@ PRINT_CONTEXT() { HELP_EXIT() { echo "" - echo "HELP: $0 help|cluster-up|cluster-refresh|cluster-cleanup|cluster-dealloc|mgbuild|test|test-all-individually [args]" + echo "HELP: $0 help|cluster-up|cluster-refresh|cluster-nodes-cleanup|cluster-dealloc|mgbuild|test|test-all-individually [args]" echo "" echo " test args --binary MEMGRAPH_BINARY_PATH" echo " --ignore-run-stdout-logs Ignore lein run stdout logs." @@ -43,24 +41,18 @@ INFO() { /bin/echo -e "\e[104m\e[97m[INFO]\e[49m\e[39m" "$@" } +if [[ "$#" -lt 1 || "$1" == "-h" || "$1" == "--help" ]]; then + HELP_EXIT +fi + if ! command -v docker > /dev/null 2>&1 || ! command -v docker-compose > /dev/null 2>&1; then ERROR "docker and docker-compose have to be installed." exit 1 fi if [ ! -d "$script_dir/jepsen" ]; then + # TODO(deda): install apt get docker-compose-plugin on all build machines. git clone https://github.com/jepsen-io/jepsen.git -b "$JEPSEN_VERSION" "$script_dir/jepsen" - if [ "$JEPSEN_VERSION" == "v0.3.0" ]; then - if [ -f "$script_dir/jepsen_0.3.0.patch" ]; then - cd "$script_dir/jepsen" - git apply "$script_dir/jepsen_0.3.0.patch" - cd "$script_dir" - fi - fi -fi - -if [ "$#" -lt 1 ]; then - HELP_EXIT fi PROCESS_ARGS() { @@ -199,7 +191,7 @@ CLUSTER_UP() { } CLUSTER_DEALLOC() { - ps=$(docker ps --filter name=jepsen* --filter status=running -q) + ps=$(docker ps -a --filter name=jepsen* -q) if [[ ! -z ${ps} ]]; then echo "Killing ${ps}" docker rm -f ${ps} @@ -213,6 +205,7 @@ CLUSTER_DEALLOC() { else echo "No Jepsen containers detected!" fi + echo "Cluster dealloc DONE" } # Initialize testing context by copying source/binary files. Inside CI, @@ -239,7 +232,7 @@ case $1 in CLUSTER_DEALLOC ;; - cluster-cleanup) + cluster-nodes-cleanup) jepsen_control_exec="docker exec jepsen-control bash -c" INFO "Deleting /jepsen/memgraph/store/* on jepsen-control" $jepsen_control_exec "rm -rf /jepsen/memgraph/store/*" @@ -252,9 +245,13 @@ case $1 in ;; mgbuild) + PROCESS_ARGS "$@" PRINT_CONTEXT + # docker cp -L mgbuild_debian-12:/memgraph/build/memgraph "${MEMGRAPH_BUILD_PATH}/" + # NOTE: mgconsole is interesting inside jepsen container to inspect Memgraph state. + # docker cp -L mgbuild_debian-12:/usr/local/bin/mgconsole "${MEMGRAPH_BUILD_PATH}/" echo "" - echo "TODO(gitbuda): Build memgraph for Debian 10 via memgraph/memgraph-builder" + echo "TODO(gitbuda): Build memgraph for Jepsen (on v0.3.5 for Debian 12) via memgraph/memgraph-builder" exit 1 ;; diff --git a/tests/jepsen/src/jepsen/memgraph/support.clj b/tests/jepsen/src/jepsen/memgraph/support.clj index b7846f8ce..aec2aadb1 100644 --- a/tests/jepsen/src/jepsen/memgraph/support.clj +++ b/tests/jepsen/src/jepsen/memgraph/support.clj @@ -25,6 +25,7 @@ :--storage-recover-on-startup :--storage-wal-enabled :--storage-snapshot-interval-sec 300 + :--replication-restore-state-on-startup :--storage-properties-on-edges)) (defn stop-node! diff --git a/tests/mgbench/runners.py b/tests/mgbench/runners.py index e1f52b696..155ceac06 100644 --- a/tests/mgbench/runners.py +++ b/tests/mgbench/runners.py @@ -416,6 +416,7 @@ class Memgraph(BaseRunner): def __init__(self, benchmark_context: BenchmarkContext): super().__init__(benchmark_context=benchmark_context) self._memgraph_binary = benchmark_context.vendor_binary + self._bolt_num_workers = benchmark_context.num_workers_for_benchmark self._performance_tracking = benchmark_context.performance_tracking self._directory = tempfile.TemporaryDirectory(dir=benchmark_context.temporary_directory) self._vendor_args = benchmark_context.vendor_args @@ -440,6 +441,7 @@ class Memgraph(BaseRunner): kwargs["bolt_port"] = self._bolt_port kwargs["data_directory"] = data_directory kwargs["storage_properties_on_edges"] = True + kwargs["bolt_num_workers"] = self._bolt_num_workers for key, value in self._vendor_args.items(): kwargs[key] = value return _convert_args_to_flags(self._memgraph_binary, **kwargs) diff --git a/tests/setup.sh b/tests/setup.sh index 7cab86db6..b91eea7c3 100755 --- a/tests/setup.sh +++ b/tests/setup.sh @@ -37,6 +37,7 @@ pip --timeout 1000 install "pulsar-client==3.1.0" for pkg in "${PIP_DEPS[@]}"; do pip --timeout 1000 install "$pkg" done +pip --timeout 1000 install "networkx==2.4" # Install mgclient from source becasue of full flexibility. pushd "$DIR/../libs/pymgclient" > /dev/null diff --git a/tests/unit/database_get_info.cpp b/tests/unit/database_get_info.cpp index be6885b37..2547378c6 100644 --- a/tests/unit/database_get_info.cpp +++ b/tests/unit/database_get_info.cpp @@ -197,8 +197,8 @@ TYPED_TEST(InfoTest, InfoCheck) { ASSERT_EQ(info.storage_info.vertex_count, 5); ASSERT_EQ(info.storage_info.edge_count, 2); ASSERT_EQ(info.storage_info.average_degree, 0.8); - ASSERT_GT(info.storage_info.memory_res, 10'000'000); // 250MB < > 10MB - ASSERT_LT(info.storage_info.memory_res, 250'000'000); + ASSERT_GT(info.storage_info.memory_res, 10'000'000); // 300MB < > 10MB + ASSERT_LT(info.storage_info.memory_res, 300'000'000); ASSERT_GT(info.storage_info.disk_usage, 100); // 1MB < > 100B ASSERT_LT(info.storage_info.disk_usage, 1000'000); ASSERT_EQ(info.storage_info.label_indices, 1); diff --git a/tests/unit/property_value_v2.cpp b/tests/unit/property_value_v2.cpp index aba322ce7..28937598e 100644 --- a/tests/unit/property_value_v2.cpp +++ b/tests/unit/property_value_v2.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -570,7 +570,6 @@ TEST(PropertyValue, MoveConstructor) { for (auto &item : data) { memgraph::storage::PropertyValue copy(item); memgraph::storage::PropertyValue pv(std::move(item)); - ASSERT_EQ(item.type(), memgraph::storage::PropertyValue::Type::Null); ASSERT_EQ(pv.type(), copy.type()); switch (copy.type()) { case memgraph::storage::PropertyValue::Type::Null: @@ -668,7 +667,6 @@ TEST(PropertyValue, MoveAssignment) { memgraph::storage::PropertyValue copy(item); memgraph::storage::PropertyValue pv(123); pv = std::move(item); - ASSERT_EQ(item.type(), memgraph::storage::PropertyValue::Type::Null); ASSERT_EQ(pv.type(), copy.type()); switch (copy.type()) { case memgraph::storage::PropertyValue::Type::Null: diff --git a/tests/unit/query_dump.cpp b/tests/unit/query_dump.cpp index a2ca2864d..2dd1e7ac7 100644 --- a/tests/unit/query_dump.cpp +++ b/tests/unit/query_dump.cpp @@ -71,6 +71,11 @@ struct DatabaseState { std::string property; }; + struct TextItem { + std::string index_name; + std::string label; + }; + struct LabelPropertiesItem { std::string label; std::set<std::string, std::less<>> properties; @@ -80,6 +85,7 @@ struct DatabaseState { std::set<Edge> edges; std::set<LabelItem> label_indices; std::set<LabelPropertyItem> label_property_indices; + std::set<TextItem> text_indices; std::set<LabelPropertyItem> existence_constraints; std::set<LabelPropertiesItem> unique_constraints; }; @@ -106,6 +112,10 @@ bool operator<(const DatabaseState::LabelPropertyItem &first, const DatabaseStat return first.property < second.property; } +bool operator<(const DatabaseState::TextItem &first, const DatabaseState::TextItem &second) { + return first.index_name < second.index_name && first.label < second.label; +} + bool operator<(const DatabaseState::LabelPropertiesItem &first, const DatabaseState::LabelPropertiesItem &second) { if (first.label != second.label) return first.label < second.label; return first.properties < second.properties; @@ -128,6 +138,10 @@ bool operator==(const DatabaseState::LabelPropertyItem &first, const DatabaseSta return first.label == second.label && first.property == second.property; } +bool operator==(const DatabaseState::TextItem &first, const DatabaseState::TextItem &second) { + return first.index_name == second.index_name && first.label == second.label; +} + bool operator==(const DatabaseState::LabelPropertiesItem &first, const DatabaseState::LabelPropertiesItem &second) { return first.label == second.label && first.properties == second.properties; } @@ -185,6 +199,7 @@ DatabaseState GetState(memgraph::storage::Storage *db) { // Capture all indices std::set<DatabaseState::LabelItem> label_indices; std::set<DatabaseState::LabelPropertyItem> label_property_indices; + std::set<DatabaseState::TextItem> text_indices; { auto info = dba->ListAllIndices(); for (const auto &item : info.label) { @@ -193,6 +208,9 @@ DatabaseState GetState(memgraph::storage::Storage *db) { for (const auto &item : info.label_property) { label_property_indices.insert({dba->LabelToName(item.first), dba->PropertyToName(item.second)}); } + for (const auto &item : info.text_indices) { + text_indices.insert({item.first, dba->LabelToName(item.second)}); + } } // Capture all constraints @@ -212,7 +230,8 @@ DatabaseState GetState(memgraph::storage::Storage *db) { } } - return {vertices, edges, label_indices, label_property_indices, existence_constraints, unique_constraints}; + return {vertices, edges, label_indices, label_property_indices, text_indices, existence_constraints, + unique_constraints}; } auto Execute(memgraph::query::InterpreterContext *context, memgraph::dbms::DatabaseAccess db, diff --git a/tests/unit/storage_v2_decoder_encoder.cpp b/tests/unit/storage_v2_decoder_encoder.cpp index 15db49b1c..0264e2287 100644 --- a/tests/unit/storage_v2_decoder_encoder.cpp +++ b/tests/unit/storage_v2_decoder_encoder.cpp @@ -358,6 +358,8 @@ TEST_F(DecoderEncoderTest, PropertyValueInvalidMarker) { case memgraph::storage::durability::Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR: case memgraph::storage::durability::Marker::DELTA_EDGE_TYPE_INDEX_CREATE: case memgraph::storage::durability::Marker::DELTA_EDGE_TYPE_INDEX_DROP: + case memgraph::storage::durability::Marker::DELTA_TEXT_INDEX_CREATE: + case memgraph::storage::durability::Marker::DELTA_TEXT_INDEX_DROP: case memgraph::storage::durability::Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE: case memgraph::storage::durability::Marker::DELTA_EXISTENCE_CONSTRAINT_DROP: case memgraph::storage::durability::Marker::DELTA_UNIQUE_CONSTRAINT_CREATE: diff --git a/tests/unit/storage_v2_get_info.cpp b/tests/unit/storage_v2_get_info.cpp index 71dbc1a8d..ee5c1bb62 100644 --- a/tests/unit/storage_v2_get_info.cpp +++ b/tests/unit/storage_v2_get_info.cpp @@ -146,6 +146,7 @@ TYPED_TEST(InfoTest, InfoCheck) { ASSERT_LT(info.disk_usage, 1000'000); ASSERT_EQ(info.label_indices, 1); ASSERT_EQ(info.label_property_indices, 1); + ASSERT_EQ(info.text_indices, 0); ASSERT_EQ(info.existence_constraints, 0); ASSERT_EQ(info.unique_constraints, 2); ASSERT_EQ(info.storage_mode, this->mode); diff --git a/tests/unit/storage_v2_wal_file.cpp b/tests/unit/storage_v2_wal_file.cpp index 4094090f5..a94b20590 100644 --- a/tests/unit/storage_v2_wal_file.cpp +++ b/tests/unit/storage_v2_wal_file.cpp @@ -53,6 +53,10 @@ memgraph::storage::durability::WalDeltaData::Type StorageMetadataOperationToWalD return memgraph::storage::durability::WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_SET; case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_CLEAR: return memgraph::storage::durability::WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_CLEAR; + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE: + return memgraph::storage::durability::WalDeltaData::Type::TEXT_INDEX_CREATE; + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP: + return memgraph::storage::durability::WalDeltaData::Type::TEXT_INDEX_DROP; case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: return memgraph::storage::durability::WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE; case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP: @@ -252,7 +256,7 @@ class DeltaGenerator final { ASSERT_TRUE(false) << "Unexpected statistics operation!"; } } - wal_file_.AppendOperation(operation, label_id, property_ids, l_stats, lp_stats, timestamp_); + wal_file_.AppendOperation(operation, std::nullopt, label_id, property_ids, l_stats, lp_stats, timestamp_); if (valid_) { UpdateStats(timestamp_, 1); memgraph::storage::durability::WalDeltaData data; @@ -271,6 +275,8 @@ class DeltaGenerator final { break; case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE: case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP: + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE: + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP: case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP: data.operation_label_property.label = label; @@ -313,6 +319,8 @@ class DeltaGenerator final { case memgraph::storage::durability::StorageMetadataOperation::LABEL_INDEX_STATS_SET: case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE: case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP: + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE: + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP: case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:; case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_SET: diff --git a/tests/unit/utils_memory.cpp b/tests/unit/utils_memory.cpp index 5173a5f7b..e46c6c1f9 100644 --- a/tests/unit/utils_memory.cpp +++ b/tests/unit/utils_memory.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -195,134 +195,6 @@ TEST(MonotonicBufferResource, AllocationWithInitialBufferOnStack) { } } -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, SingleSmallBlockAllocations) { - TestMemory test_mem; - const size_t max_blocks_per_chunk = 3U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem); - // Fill the first chunk. - CheckAllocation(&mem, 64U, 1U); - // May allocate more than once due to bookkeeping. - EXPECT_GE(test_mem.new_count_, 1U); - // Reset tracking and continue filling the first chunk. - test_mem.new_count_ = 0U; - CheckAllocation(&mem, 64U, 64U); - CheckAllocation(&mem, 64U); - EXPECT_EQ(test_mem.new_count_, 0U); - // Reset tracking and fill the second chunk - test_mem.new_count_ = 0U; - CheckAllocation(&mem, 64U, 32U); - auto *ptr1 = CheckAllocation(&mem, 32U, 64U); // this will become 64b block - auto *ptr2 = CheckAllocation(&mem, 64U, 32U); - // We expect one allocation for chunk and at most one for bookkeeping. - EXPECT_TRUE(test_mem.new_count_ >= 1U && test_mem.new_count_ <= 2U); - test_mem.delete_count_ = 0U; - mem.Deallocate(ptr1, 32U, 64U); - mem.Deallocate(ptr2, 64U, 32U); - EXPECT_EQ(test_mem.delete_count_, 0U); - mem.Release(); - EXPECT_GE(test_mem.delete_count_, 2U); - CheckAllocation(&mem, 64U, 1U); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, MultipleSmallBlockAllocations) { - TestMemory test_mem; - const size_t max_blocks_per_chunk = 1U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem); - CheckAllocation(&mem, 64U); - CheckAllocation(&mem, 18U, 2U); - CheckAllocation(&mem, 24U, 8U); - // May allocate more than once per chunk due to bookkeeping. - EXPECT_GE(test_mem.new_count_, 3U); - // Reset tracking and fill the second chunk - test_mem.new_count_ = 0U; - CheckAllocation(&mem, 64U); - CheckAllocation(&mem, 18U, 2U); - CheckAllocation(&mem, 24U, 8U); - // We expect one allocation for chunk and at most one for bookkeeping. - EXPECT_TRUE(test_mem.new_count_ >= 3U && test_mem.new_count_ <= 6U); - mem.Release(); - EXPECT_GE(test_mem.delete_count_, 6U); - CheckAllocation(&mem, 64U); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, BigBlockAllocations) { - TestMemory test_mem; - TestMemory test_mem_unpooled; - const size_t max_blocks_per_chunk = 3U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem, &test_mem_unpooled); - CheckAllocation(&mem, max_block_size + 1, 1U); - // May allocate more than once per block due to bookkeeping. - EXPECT_GE(test_mem_unpooled.new_count_, 1U); - CheckAllocation(&mem, max_block_size + 1, 1U); - EXPECT_GE(test_mem_unpooled.new_count_, 2U); - auto *ptr = CheckAllocation(&mem, max_block_size * 2, 1U); - EXPECT_GE(test_mem_unpooled.new_count_, 3U); - mem.Deallocate(ptr, max_block_size * 2, 1U); - EXPECT_GE(test_mem_unpooled.delete_count_, 1U); - mem.Release(); - EXPECT_GE(test_mem_unpooled.delete_count_, 3U); - CheckAllocation(&mem, max_block_size + 1, 1U); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, BlockSizeIsNotMultipleOfAlignment) { - const size_t max_blocks_per_chunk = 3U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size); - EXPECT_THROW(mem.Allocate(64U, 24U), std::bad_alloc); - EXPECT_THROW(mem.Allocate(63U), std::bad_alloc); - EXPECT_THROW(mem.Allocate(max_block_size + 1, max_block_size), std::bad_alloc); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, AllocationWithOverflow) { - { - const size_t max_blocks_per_chunk = 2U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, std::numeric_limits<size_t>::max()); - EXPECT_THROW(mem.Allocate(std::numeric_limits<size_t>::max(), 1U), std::bad_alloc); - // Throws because initial chunk block is aligned to - // memgraph::utils::Ceil2(block_size), which wraps in this case. - EXPECT_THROW(mem.Allocate((std::numeric_limits<size_t>::max() - 1U) / max_blocks_per_chunk, 1U), std::bad_alloc); - } - { - const size_t max_blocks_per_chunk = memgraph::utils::impl::Pool::MaxBlocksInChunk(); - memgraph::utils::PoolResource mem(max_blocks_per_chunk, std::numeric_limits<size_t>::max()); - EXPECT_THROW(mem.Allocate(std::numeric_limits<size_t>::max(), 1U), std::bad_alloc); - // Throws because initial chunk block is aligned to - // memgraph::utils::Ceil2(block_size), which wraps in this case. - EXPECT_THROW(mem.Allocate((std::numeric_limits<size_t>::max() - 1U) / max_blocks_per_chunk, 1U), std::bad_alloc); - } -} - -TEST(PoolResource, BlockDeallocation) { - TestMemory test_mem; - const size_t max_blocks_per_chunk = 2U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem); - auto *ptr = CheckAllocation(&mem, max_block_size); - test_mem.new_count_ = 0U; - // Do another allocation before deallocating `ptr`, so that we are sure that - // the chunk of 2 blocks is still alive and therefore `ptr` may be reused when - // it's deallocated. If we deallocate now, the implementation may choose to - // free the whole chunk, and we do not want that for the purposes of this - // test. - CheckAllocation(&mem, max_block_size); - EXPECT_EQ(test_mem.new_count_, 0U); - EXPECT_EQ(test_mem.delete_count_, 0U); - mem.Deallocate(ptr, max_block_size); - EXPECT_EQ(test_mem.delete_count_, 0U); - // CheckAllocation(&mem, max_block_size) will fail as PoolResource should - // reuse free blocks. - EXPECT_EQ(ptr, mem.Allocate(max_block_size)); - EXPECT_EQ(test_mem.new_count_, 0U); -} - class AllocationTrackingMemory final : public memgraph::utils::MemoryResource { public: std::vector<size_t> allocated_sizes_; diff --git a/tools/github/clang-tidy/clang-tidy-diff.py b/tools/github/clang-tidy/clang-tidy-diff.py index 1bdf1da25..609b2eedb 100755 --- a/tools/github/clang-tidy/clang-tidy-diff.py +++ b/tools/github/clang-tidy/clang-tidy-diff.py @@ -250,7 +250,6 @@ def main(): common_clang_tidy_args.append("-extra-arg=%s" % arg) for arg in args.extra_arg_before: common_clang_tidy_args.append("-extra-arg-before=%s" % arg) - for name in lines_by_file: line_filter_json = json.dumps([{"name": name, "lines": lines_by_file[name]}], separators=(",", ":")) @@ -266,7 +265,6 @@ def main(): command.extend(common_clang_tidy_args) command.append(name) command.extend(clang_tidy_args) - task_queue.put(command) # Wait for all threads to be done.