Compare commits

..

1 Commits

Author SHA1 Message Date
Matej Ferencevic
d4315b3242 Prepare release v0.11.0 2018-07-04 23:01:56 +02:00
2568 changed files with 712890 additions and 316567 deletions

7
.arcconfig Normal file
View File

@ -0,0 +1,7 @@
{
"project_id" : "memgraph",
"conduit_uri" : "https://phabricator.memgraph.io",
"phabricator_uri" : "https://phabricator.memgraph.io",
"git.default-relative-commit": "origin/master",
"arc.land.onto.default": "master"
}

View File

@ -1,11 +1,9 @@
---
BasedOnStyle: Google
---
Language: Cpp
Standard: "c++20"
BasedOnStyle: Google
Standard: "C++11"
UseTab: Never
DerivePointerAlignment: false
PointerAlignment: Right
ColumnLimit : 120
IncludeBlocks: Preserve
ColumnLimit : 80
...

View File

@ -1,94 +0,0 @@
---
Checks: '*,
-abseil-string-find-str-contains,
-altera-id-dependent-backward-branch,
-altera-struct-pack-align,
-altera-unroll-loops,
-android-*,
-cert-err58-cpp,
-cppcoreguidelines-avoid-do-while,
-cppcoreguidelines-avoid-c-arrays,
-cppcoreguidelines-avoid-goto,
-cppcoreguidelines-avoid-magic-numbers,
-cppcoreguidelines-no-malloc,
-cppcoreguidelines-non-private-member-variables-in-classes,
-cppcoreguidelines-owning-memory,
-cppcoreguidelines-pro-bounds-array-to-pointer-decay,
-cppcoreguidelines-pro-bounds-constant-array-index,
-cppcoreguidelines-pro-bounds-pointer-arithmetic,
-cppcoreguidelines-pro-type-member-init,
-cppcoreguidelines-pro-type-reinterpret-cast,
-cppcoreguidelines-pro-type-static-cast-downcast,
-cppcoreguidelines-pro-type-union-access,
-cppcoreguidelines-pro-type-vararg,
-cppcoreguidelines-special-member-functions,
-fuchsia-default-arguments,
-fuchsia-default-arguments-calls,
-fuchsia-default-arguments-declarations,
-fuchsia-overloaded-operator,
-fuchsia-statically-constructed-objects,
-fuchsia-trailing-return,
-fuchsia-virtual-inheritance,
-google-explicit-constructor,
-google-readability-*,
-google-runtime-references,
-hicpp-avoid-c-arrays,
-hicpp-avoid-goto,
-hicpp-braces-around-statements,
-hicpp-member-init,
-hicpp-no-array-decay,
-hicpp-no-assembler,
-hicpp-no-malloc,
-hicpp-use-equals-default,
-hicpp-use-nullptr,
-hicpp-vararg,
-llvm-header-guard,
-llvm-include-order,
-llvmlibc-callee-namespace,
-llvmlibc-implementation-in-namespace,
-llvmlibc-restrict-system-libc-headers,
-misc-non-private-member-variables-in-classes,
-modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces,
-modernize-pass-by-value,
-modernize-use-equals-default,
-modernize-use-nodiscard,
-modernize-use-trailing-return-type,
-performance-unnecessary-value-param,
-readability-braces-around-statements,
-readability-else-after-return,
-readability-function-cognitive-complexity,
-readability-implicit-bool-conversion,
-readability-magic-numbers,
-readability-named-parameter,
-readability-identifier-length,
-misc-no-recursion,
-concurrency-mt-unsafe,
-bugprone-easily-swappable-parameters,
-bugprone-unchecked-optional-access'
WarningsAsErrors: ''
HeaderFilterRegex: 'src/.*'
AnalyzeTemporaryDtors: false
FormatStyle: none
CheckOptions:
- key: google-readability-braces-around-statements.ShortStatementLines
value: '1'
- key: google-readability-function-size.StatementThreshold
value: '800'
- key: google-readability-namespace-comments.ShortNamespaceLines
value: '10'
- key: google-readability-namespace-comments.SpacesBeforeComments
value: '2'
- key: modernize-loop-convert.MaxCopySize
value: '16'
- key: modernize-loop-convert.MinConfidence
value: reasonable
- key: modernize-loop-convert.NamingStyle
value: CamelCase
- key: modernize-pass-by-value.IncludeStyle
value: llvm
- key: modernize-replace-auto-ptr.IncludeStyle
value: llvm
- key: modernize-use-nullptr.NullMacros
value: 'NULL'
...

View File

@ -1,36 +0,0 @@
#!/bin/sh
project_folder=$(git rev-parse --show-toplevel)
if git rev-parse --verify HEAD >/dev/null 2>&1
then
against=HEAD
else
# Initial commit: diff against an empty tree object
against=$(git hash-object -t tree /dev/null)
fi
# Redirect output to stderr.
exec 1>&2
tmpdir=$(mktemp -d repo-XXXXXXXX)
trap "rm -rf $tmpdir" EXIT INT
modified_files=$(git diff --cached --name-only --diff-filter=AM $against | sed -nE "/.*\.(cpp|cc|cxx|c|h|hpp)$/p")
FAIL=0
for file in $modified_files; do
echo "Checking $file..."
cp $project_folder/.clang-format $project_folder/.clang-tidy $tmpdir
git checkout-index --prefix="$tmpdir/" -- $file
# Do not break header checker
echo "Running header checker..."
$project_folder/tools/header-checker.py $tmpdir/$file $file --amend-year
CODE=$?
if [ $CODE -ne 0 ]; then
FAIL=1
fi
done;
exit ${FAIL}

View File

@ -1,41 +0,0 @@
---
name: Bug report
about: Create a report to help us improve
title: ""
labels: bug
---
**Memgraph version**
Which version did you use?
**Environment**
Some information about the environment you are using Memgraph on: operating
system, architecture (ARM, x86), how do you connect, with or without docker,
which driver etc.
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Run the following query '...'
2. Click on '....'
3. ... IDEALLY: link to the workload info (DATASET & QUERIES) ...
**Expected behavior**
A clear and concise description of what you expected to happen.
**Logs**
If applicable, add logs of Memgraph, CLI output or screenshots to help explain
your problem.
**Additional context**
Add any other context about the problem here.
**Verification Environment**
Once we fix it, what do you need to verify the fix?
Do you need:
* Plain memgraph package -> for which Linux?
* Plain memgraph Docker image?
* Which architecture do you use ARM | x86?
* Full Memgraph platform?

View File

@ -1,28 +0,0 @@
### Description
Please briefly explain the changes you made here.
Please delete either the [master < EPIC] or [master < Task] part, depending on what are your needs.
[master < Epic] PR
- [ ] Write E2E tests
- [ ] Compare the [benchmarking results](https://bench-graph.memgraph.com/) between the master branch and the Epic branch
- [ ] Provide the full content or a guide for the final git message
- [FINAL GIT MESSAGE]
[master < Task] PR
- [ ] Provide the full content or a guide for the final git message
- **[FINAL GIT MESSAGE]**
### Documentation checklist
- [ ] Add the documentation label tag
- [ ] Add the bug / feature label tag
- [ ] Add the milestone for which this feature is intended
- If not known, set for a later milestone
- [ ] Write a release note, including added/changed clauses
- **[Release note text]**
- [ ] Link the documentation PR here
- **[Documentation PR link]**
- [ ] Tag someone from docs team in the comments

View File

@ -1,106 +0,0 @@
name: Daily Benchmark
on:
workflow_dispatch:
schedule:
- cron: "0 22 * * *"
jobs:
release_benchmarks:
name: "Release benchmarks"
runs-on: [self-hosted, Linux, X64, Diff, Gen7]
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build only memgraph release binaries.
cd build
cmake -DCMAKE_BUILD_TYPE=release ..
make -j$THREADS
- name: Run macro benchmarks
run: |
cd tests/macro_benchmark
./harness QuerySuite MemgraphRunner \
--groups aggregation 1000_create unwind_create dense_expand match \
--no-strict
- name: Get branch name (merge)
if: github.event_name != 'pull_request'
shell: bash
run: echo "BRANCH_NAME=$(echo ${GITHUB_REF#refs/heads/} | tr / -)" >> $GITHUB_ENV
- name: Get branch name (pull request)
if: github.event_name == 'pull_request'
shell: bash
run: echo "BRANCH_NAME=$(echo ${GITHUB_HEAD_REF} | tr / -)" >> $GITHUB_ENV
- name: Upload macro benchmark results
run: |
cd tools/bench-graph-client
virtualenv -p python3 ve3
source ve3/bin/activate
pip install -r requirements.txt
./main.py --benchmark-name "macro_benchmark" \
--benchmark-results "../../tests/macro_benchmark/.harness_summary" \
--github-run-id "${{ github.run_id }}" \
--github-run-number "${{ github.run_number }}" \
--head-branch-name "${{ env.BRANCH_NAME }}"
- name: Run mgbench
run: |
cd tests/mgbench
./benchmark.py vendor-native --num-workers-for-benchmark 12 --export-results benchmark_pokec.json pokec/medium/*/*
./benchmark.py vendor-native --num-workers-for-benchmark 1 --export-results benchmark_supernode.json supernode
./benchmark.py vendor-native --num-workers-for-benchmark 1 --export-results benchmark_high_write_set_property.json high_write_set_property
./benchmark.py vendor-native --num-workers-for-benchmark 12 --export-results cartesian.json cartesian
- name: Upload mgbench results
run: |
cd tools/bench-graph-client
virtualenv -p python3 ve3
source ve3/bin/activate
pip install -r requirements.txt
./main.py --benchmark-name "mgbench" \
--benchmark-results "../../tests/mgbench/benchmark_pokec.json" \
--github-run-id "${{ github.run_id }}" \
--github-run-number "${{ github.run_number }}" \
--head-branch-name "${{ env.BRANCH_NAME }}"
./main.py --benchmark-name "supernode" \
--benchmark-results "../../tests/mgbench/benchmark_supernode.json" \
--github-run-id "${{ github.run_id }}" \
--github-run-number "${{ github.run_number }}" \
--head-branch-name "${{ env.BRANCH_NAME }}"
./main.py --benchmark-name "high_write_set_property" \
--benchmark-results "../../tests/mgbench/benchmark_high_write_set_property.json" \
--github-run-id "${{ github.run_id }}" \
--github-run-number "${{ github.run_number }}" \
--head-branch-name "${{ env.BRANCH_NAME }}"
./main.py --benchmark-name "cartesian" \
--benchmark-results "../../tests/mgbench/cartesian.json" \
--github-run-id "${{ github.run_id }}" \
--github-run-number "${{ github.run_number }}" \
--head-branch-name "${{ env.BRANCH_NAME }}"

View File

@ -1,660 +0,0 @@
name: Diff
concurrency:
group: ${{ github.head_ref || github.sha }}
cancel-in-progress: true
on:
push:
branches:
- master
workflow_dispatch:
pull_request:
paths-ignore:
- "docs/**"
- "**/*.md"
- ".clang-format"
- "CODEOWNERS"
- "licenses/*"
jobs:
community_build:
name: "Community build"
runs-on: [self-hosted, Linux, X64, DockerMgBuild]
timeout-minutes: 60
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
OS: debian-11
TOOLCHAIN: v5
ARCH: amd
BUILD_TYPE: RelWithDebInfo
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Spin up mgbuild container
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
run
- name: Build release binaries
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--build-type $BUILD_TYPE \
--threads $THREADS \
build-memgraph --community
- name: Run unit tests
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--threads $THREADS \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph unit
- name: Stop mgbuild container
if: always()
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
stop --remove
code_analysis:
name: "Code analysis"
runs-on: [self-hosted, Linux, X64, DockerMgBuild]
timeout-minutes: 60
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
OS: debian-11
TOOLCHAIN: v5
ARCH: amd
BUILD_TYPE: Debug
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Spin up mgbuild container
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
run
# This is also needed if we want do to comparison against other branches
# See https://github.community/t/checkout-code-fails-when-it-runs-lerna-run-test-since-master/17920
- name: Fetch all history for all tags and branches
run: git fetch
- name: Initialize deps
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--build-type $BUILD_TYPE \
--threads $THREADS \
build-memgraph --init-only
- name: Set base branch
if: ${{ github.event_name == 'pull_request' }}
run: |
echo "BASE_BRANCH=origin/${{ github.base_ref }}" >> $GITHUB_ENV
- name: Set base branch # if we manually dispatch or push to master
if: ${{ github.event_name != 'pull_request' }}
run: |
echo "BASE_BRANCH=origin/master" >> $GITHUB_ENV
- name: Python code analysis
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph code-analysis --base-branch "${{ env.BASE_BRANCH }}"
- name: Build combined ASAN, UBSAN and coverage binaries
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--build-type $BUILD_TYPE \
--threads $THREADS \
build-memgraph --coverage --asan --ubsan
- name: Run unit tests
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph unit-coverage
- name: Compute code coverage
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph code-coverage
- name: Save code coverage
uses: actions/upload-artifact@v4
with:
name: "Code coverage(Code analysis)"
path: tools/github/generated/code_coverage.tar.gz
- name: Run clang-tidy
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--threads $THREADS \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph clang-tidy --base-branch "${{ env.BASE_BRANCH }}"
- name: Stop mgbuild container
if: always()
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
stop --remove
debug_build:
name: "Debug build"
runs-on: [self-hosted, Linux, X64, DockerMgBuild]
timeout-minutes: 100
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
OS: debian-11
TOOLCHAIN: v5
ARCH: amd
BUILD_TYPE: Debug
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Spin up mgbuild container
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
run
- name: Build release binaries
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--build-type $BUILD_TYPE \
--threads $THREADS \
build-memgraph
- name: Run leftover CTest tests
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--threads $THREADS \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph leftover-CTest
- name: Run drivers tests
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--threads $THREADS \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph drivers
- name: Run HA driver tests
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--threads $THREADS \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph drivers-high-availability
- name: Run integration tests
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--threads $THREADS \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph integration
- name: Run cppcheck and clang-format
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--threads $THREADS \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph cppcheck-and-clang-format
- name: Save cppcheck and clang-format errors
uses: actions/upload-artifact@v4
with:
name: "Code coverage(Debug build)"
path: tools/github/cppcheck_and_clang_format.txt
- name: Stop mgbuild container
if: always()
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
stop --remove
release_build:
name: "Release build"
runs-on: [self-hosted, Linux, X64, DockerMgBuild]
timeout-minutes: 100
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
OS: debian-11
TOOLCHAIN: v5
ARCH: amd
BUILD_TYPE: Release
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Spin up mgbuild container
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
run
- name: Build release binaries
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--build-type $BUILD_TYPE \
--threads $THREADS \
build-memgraph
- name: Run GQL Behave tests
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph gql-behave
- name: Save quality assurance status
uses: actions/upload-artifact@v4
with:
name: "GQL Behave Status"
path: |
tests/gql_behave/gql_behave_status.csv
tests/gql_behave/gql_behave_status.html
- name: Run unit tests
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--threads $THREADS \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph unit
# This step will be skipped because the e2e stream tests have been disabled
# We need to fix this as soon as possible
- name: Ensure Kafka and Pulsar are up
if: false
run: |
cd tests/e2e/streams/kafka
docker-compose up -d
cd ../pulsar
docker-compose up -d
- name: Run e2e tests
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph e2e
# Same as two steps prior
- name: Ensure Kafka and Pulsar are down
if: false
run: |
cd tests/e2e/streams/kafka
docker-compose down
cd ../pulsar
docker-compose down
- name: Run stress test (plain)
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph stress-plain
- name: Run stress test (SSL)
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph stress-ssl
- name: Run durability test
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph durability
- name: Create enterprise DEB package
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
package-memgraph
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
copy --package
- name: Save enterprise DEB package
uses: actions/upload-artifact@v4
with:
name: "Enterprise DEB package"
path: build/output/${{ env.OS }}/memgraph*.deb
- name: Copy build logs
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
copy --build-logs
- name: Save test data
uses: actions/upload-artifact@v4
if: always()
with:
name: "Test data(Release build)"
path: build/logs
- name: Stop mgbuild container
if: always()
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
stop --remove
release_jepsen_test:
name: "Release Jepsen Test"
runs-on: [self-hosted, Linux, X64, DockerMgBuild]
timeout-minutes: 80
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
OS: debian-12
TOOLCHAIN: v5
ARCH: amd
BUILD_TYPE: RelWithDebInfo
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Spin up mgbuild container
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
run
- name: Build release binaries
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--build-type $BUILD_TYPE \
--threads $THREADS \
build-memgraph
- name: Copy memgraph binary
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
copy --binary
- name: Refresh Jepsen Cluster
run: |
cd tests/jepsen
./run.sh cluster-refresh
- name: Run Jepsen tests
run: |
cd tests/jepsen
./run.sh test-all-individually --binary ../../build/memgraph --ignore-run-stdout-logs --ignore-run-stderr-logs
- name: Save Jepsen report
uses: actions/upload-artifact@v4
if: ${{ always() }}
with:
name: "Jepsen Report"
path: tests/jepsen/Jepsen.tar.gz
- name: Stop mgbuild container
if: always()
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
stop --remove
release_benchmarks:
name: "Release benchmarks"
runs-on: [self-hosted, Linux, X64, DockerMgBuild, Gen7]
timeout-minutes: 60
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
OS: debian-11
TOOLCHAIN: v5
ARCH: amd
BUILD_TYPE: Release
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Spin up mgbuild container
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
run
- name: Build release binaries
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--build-type $BUILD_TYPE \
--threads $THREADS \
build-memgraph
- name: Run macro benchmarks
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph macro-benchmark
- name: Get branch name (merge)
if: github.event_name != 'pull_request'
shell: bash
run: echo "BRANCH_NAME=$(echo ${GITHUB_REF#refs/heads/} | tr / -)" >> $GITHUB_ENV
- name: Get branch name (pull request)
if: github.event_name == 'pull_request'
shell: bash
run: echo "BRANCH_NAME=$(echo ${GITHUB_HEAD_REF} | tr / -)" >> $GITHUB_ENV
- name: Upload macro benchmark results
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph upload-to-bench-graph \
--benchmark-name "macro_benchmark" \
--benchmark-results "../../tests/macro_benchmark/.harness_summary" \
--github-run-id ${{ github.run_id }} \
--github-run-number ${{ github.run_number }} \
--head-branch-name ${{ env.BRANCH_NAME }}
- name: Run mgbench
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph mgbench
- name: Upload mgbench results
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph upload-to-bench-graph \
--benchmark-name "mgbench" \
--benchmark-results "../../tests/mgbench/benchmark_result.json" \
--github-run-id "${{ github.run_id }}" \
--github-run-number "${{ github.run_number }}" \
--head-branch-name "${{ env.BRANCH_NAME }}"
- name: Stop mgbuild container
if: always()
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
stop --remove

View File

@ -1,46 +0,0 @@
name: Run clang-tidy on the full codebase
on:
workflow_dispatch:
jobs:
clang_tidy_check:
name: "Clang-tidy check"
runs-on: [self-hosted, Linux, X64, Ubuntu20.04]
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build debug binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build debug binaries.
cd build
cmake ..
make -j$THREADS
- name: Run clang-tidy
run: |
source /opt/toolchain-v4/activate
# The results are also written to standard output in order to retain them in the logs
./tools/github/clang-tidy/run-clang-tidy.py -p build -j $THREADS -clang-tidy-binary=/opt/toolchain-v4/bin/clang-tidy "$PWD/src/*" |
tee ./build/full_clang_tidy_output.txt
- name: Summarize clang-tidy results
run: cat ./build/full_clang_tidy_output.txt | ./tools/github/clang-tidy/count_errors.sh

View File

@ -1,295 +0,0 @@
name: Package memgraph
# TODO(gitbuda): Cleanup docker container if GHA job was canceled.
on:
workflow_dispatch:
inputs:
memgraph_version:
description: "Memgraph version to upload as. Leave this field empty if you don't want to upload binaries to S3. Format: 'X.Y.Z'"
required: false
build_type:
type: choice
description: "Memgraph Build type. Default value is Release"
default: 'Release'
options:
- Release
- RelWithDebInfo
target_os:
type: choice
description: "Target OS for which memgraph will be packaged. Select 'all' if you want to package for every listed OS. Default is Ubuntu 22.04"
default: 'ubuntu-22_04'
options:
- all
- amzn-2
- centos-7
- centos-9
- debian-10
- debian-11
- debian-11-arm
- debian-11-platform
- docker
- fedora-36
- ubuntu-18_04
- ubuntu-20_04
- ubuntu-22_04
- ubuntu-22_04-arm
jobs:
amzn-2:
if: ${{ github.event.inputs.target_os == 'amzn-2' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package amzn-2 ${{ github.event.inputs.build_type }}
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: amzn-2
path: build/output/amzn-2/memgraph*.rpm
centos-7:
if: ${{ github.event.inputs.target_os == 'centos-7' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package centos-7 ${{ github.event.inputs.build_type }}
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: centos-7
path: build/output/centos-7/memgraph*.rpm
centos-9:
if: ${{ github.event.inputs.target_os == 'centos-9' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package centos-9 ${{ github.event.inputs.build_type }}
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: centos-9
path: build/output/centos-9/memgraph*.rpm
debian-10:
if: ${{ github.event.inputs.target_os == 'debian-10' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package debian-10 ${{ github.event.inputs.build_type }}
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: debian-10
path: build/output/debian-10/memgraph*.deb
debian-11:
if: ${{ github.event.inputs.target_os == 'debian-11' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package debian-11 ${{ github.event.inputs.build_type }}
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: debian-11
path: build/output/debian-11/memgraph*.deb
debian-11-arm:
if: ${{ github.event.inputs.target_os == 'debian-11-arm' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, ARM64, strange]
timeout-minutes: 120
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package debian-11-arm ${{ github.event.inputs.build_type }}
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: debian-11-aarch64
path: build/output/debian-11-arm/memgraph*.deb
debian-11-platform:
if: ${{ github.event.inputs.target_os == 'debian-11-platform' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package debian-11 ${{ github.event.inputs.build_type }} --for-platform
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: debian-11-platform
path: build/output/debian-11/memgraph*.deb
docker:
if: ${{ github.event.inputs.target_os == 'docker' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
cd release/package
./run.sh package debian-11 ${{ github.event.inputs.build_type }} --for-docker
./run.sh docker
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: docker
path: build/output/docker/memgraph*.tar.gz
fedora-36:
if: ${{ github.event.inputs.target_os == 'fedora-36' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package fedora-36 ${{ github.event.inputs.build_type }}
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: fedora-36
path: build/output/fedora-36/memgraph*.rpm
ubuntu-18_04:
if: ${{ github.event.inputs.target_os == 'ubuntu-18_04' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package ubuntu-18.04 ${{ github.event.inputs.build_type }}
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: ubuntu-18.04
path: build/output/ubuntu-18.04/memgraph*.deb
ubuntu-20_04:
if: ${{ github.event.inputs.target_os == 'ubuntu-20_04' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package ubuntu-20.04 ${{ github.event.inputs.build_type }}
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: ubuntu-20.04
path: build/output/ubuntu-20.04/memgraph*.deb
ubuntu-22_04:
if: ${{ github.event.inputs.target_os == 'ubuntu-22_04' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package ubuntu-22.04 ${{ github.event.inputs.build_type }}
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: ubuntu-22.04
path: build/output/ubuntu-22.04/memgraph*.deb
ubuntu-22_04-arm:
if: ${{ github.event.inputs.target_os == 'ubuntu-22_04-arm' || github.event.inputs.target_os == 'all' }}
runs-on: [self-hosted, DockerMgBuild, ARM64, strange]
timeout-minutes: 120
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package ubuntu-22.04-arm ${{ github.event.inputs.build_type }}
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: ubuntu-22.04-aarch64
path: build/output/ubuntu-22.04-arm/memgraph*.deb
upload-to-s3:
# only run upload if we specified version. Allows for runs without upload
if: "${{ github.event.inputs.memgraph_version != '' }}"
needs: [amzn-2, centos-7, centos-9, debian-10, debian-11, debian-11-arm, debian-11-platform, docker, fedora-36, ubuntu-18_04, ubuntu-20_04, ubuntu-22_04, ubuntu-22_04-arm]
runs-on: ubuntu-latest
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
# name: # if name input parameter is not provided, all artifacts are downloaded
# and put in directories named after each one.
path: build/output/release
- name: Upload to S3
uses: jakejarvis/s3-sync-action@v0.5.1
env:
AWS_S3_BUCKET: "download.memgraph.com"
AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: "eu-west-1"
SOURCE_DIR: "build/output/release"
DEST_DIR: "memgraph/v${{ github.event.inputs.memgraph_version }}/"

View File

@ -1,85 +0,0 @@
name: Run performance benchmarks manually
on:
workflow_dispatch:
jobs:
performance_benchmarks:
name: "Performance benchmarks"
runs-on: [self-hosted, Linux, X64, Diff, Gen7]
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build only memgraph release binaries.
cd build
cmake -DCMAKE_BUILD_TYPE=release ..
make -j$THREADS
- name: Get branch name (merge)
if: github.event_name != 'pull_request'
shell: bash
run: echo "BRANCH_NAME=$(echo ${GITHUB_REF#refs/heads/} | tr / -)" >> $GITHUB_ENV
- name: Get branch name (pull request)
if: github.event_name == 'pull_request'
shell: bash
run: echo "BRANCH_NAME=$(echo ${GITHUB_HEAD_REF} | tr / -)" >> $GITHUB_ENV
- name: Run benchmarks
run: |
cd tests/mgbench
./benchmark.py vendor-native --num-workers-for-benchmark 12 --export-results benchmark_result.json pokec/medium/*/*
./benchmark.py vendor-native --num-workers-for-benchmark 1 --export-results benchmark_supernode.json supernode
./benchmark.py vendor-native --num-workers-for-benchmark 1 --export-results benchmark_high_write_set_property.json high_write_set_property
./benchmark.py vendor-native --num-workers-for-benchmark 12 --export-results benchmark_cartesian.json cartesian
- name: Upload benchmark results
run: |
cd tools/bench-graph-client
virtualenv -p python3 ve3
source ve3/bin/activate
pip install -r requirements.txt
./main.py --benchmark-name "mgbench" \
--benchmark-results "../../tests/mgbench/benchmark_result.json" \
--github-run-id "${{ github.run_id }}" \
--github-run-number "${{ github.run_number }}" \
--head-branch-name "${{ env.BRANCH_NAME }}"
./main.py --benchmark-name "supernode" \
--benchmark-results "../../tests/mgbench/benchmark_supernode.json" \
--github-run-id "${{ github.run_id }}" \
--github-run-number "${{ github.run_number }}" \
--head-branch-name "${{ env.BRANCH_NAME }}"
./main.py --benchmark-name "high_write_set_property" \
--benchmark-results "../../tests/mgbench/benchmark_high_write_set_property.json" \
--github-run-id "${{ github.run_id }}" \
--github-run-number "${{ github.run_number }}" \
--head-branch-name "${{ env.BRANCH_NAME }}"
./main.py --benchmark-name "cartesian" \
--benchmark-results "../../tests/mgbench/cartesian.json" \
--github-run-id "${{ github.run_id }}" \
--github-run-number "${{ github.run_number }}" \
--head-branch-name "${{ env.BRANCH_NAME }}"

View File

@ -1,208 +0,0 @@
name: Release build test
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}
cancel-in-progress: true
on:
workflow_dispatch:
inputs:
build_type:
type: choice
description: "Memgraph Build type. Default value is Release."
default: 'Release'
options:
- Release
- RelWithDebInfo
push:
branches:
- "release/**"
tags:
- "v*.*.*-rc*"
- "v*.*-rc*"
schedule:
# UTC
- cron: "0 22 * * *"
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
BUILD_TYPE: ${{ github.event.inputs.build_type || 'Release' }}
jobs:
Debian10:
uses: ./.github/workflows/release_debian10.yaml
with:
build_type: ${{ github.event.inputs.build_type || 'Release' }}
secrets: inherit
Ubuntu20_04:
uses: ./.github/workflows/release_ubuntu2004.yaml
with:
build_type: ${{ github.event.inputs.build_type || 'Release' }}
secrets: inherit
PackageDebian10:
if: github.ref_type == 'tag'
needs: [Debian10]
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package debian-10 $BUILD_TYPE
- name: Upload to S3
uses: jakejarvis/s3-sync-action@v0.5.1
env:
AWS_S3_BUCKET: "deps.memgraph.io"
AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: "eu-west-1"
SOURCE_DIR: "build/output"
DEST_DIR: "memgraph-unofficial/${{ github.ref_name }}/"
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: debian-10
path: build/output/debian-10/memgraph*.deb
PackageUbuntu20_04:
if: github.ref_type == 'tag'
needs: [Ubuntu20_04]
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package ubuntu-22.04 $BUILD_TYPE
- name: Upload to S3
uses: jakejarvis/s3-sync-action@v0.5.1
env:
AWS_S3_BUCKET: "deps.memgraph.io"
AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: "eu-west-1"
SOURCE_DIR: "build/output"
DEST_DIR: "memgraph-unofficial/${{ github.ref_name }}/"
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: ubuntu-22.04
path: build/output/ubuntu-22.04/memgraph*.deb
PackageUbuntu20_04_ARM:
if: github.ref_type == 'tag'
needs: [Ubuntu20_04]
runs-on: [self-hosted, DockerMgBuild, ARM64]
# M1 Mac mini is sometimes slower
timeout-minutes: 150
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package ubuntu-22.04-arm $BUILD_TYPE
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: ubuntu-22.04-aarch64
path: build/output/ubuntu-22.04-arm/memgraph*.deb
PushToS3Ubuntu20_04_ARM:
if: github.ref_type == 'tag'
needs: [PackageUbuntu20_04_ARM]
runs-on: ubuntu-latest
steps:
- name: Download package
uses: actions/download-artifact@v4
with:
name: ubuntu-22.04-aarch64
path: build/output/release
- name: Upload to S3
uses: jakejarvis/s3-sync-action@v0.5.1
env:
AWS_S3_BUCKET: "deps.memgraph.io"
AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: "eu-west-1"
SOURCE_DIR: "build/output/release"
DEST_DIR: "memgraph-unofficial/${{ github.ref_name }}/"
PackageDebian11:
if: github.ref_type == 'tag'
needs: [Debian10, Ubuntu20_04]
runs-on: [self-hosted, DockerMgBuild, X64]
timeout-minutes: 60
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package debian-11 $BUILD_TYPE
- name: Upload to S3
uses: jakejarvis/s3-sync-action@v0.5.1
env:
AWS_S3_BUCKET: "deps.memgraph.io"
AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: "eu-west-1"
SOURCE_DIR: "build/output"
DEST_DIR: "memgraph-unofficial/${{ github.ref_name }}/"
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: debian-11
path: build/output/debian-11/memgraph*.deb
PackageDebian11_ARM:
if: github.ref_type == 'tag'
needs: [Debian10, Ubuntu20_04]
runs-on: [self-hosted, DockerMgBuild, ARM64]
# M1 Mac mini is sometimes slower
timeout-minutes: 150
steps:
- name: "Set up repository"
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required because of release/get_version.py
- name: "Build package"
run: |
./release/package/run.sh package debian-11-arm $BUILD_TYPE
- name: "Upload package"
uses: actions/upload-artifact@v4
with:
name: debian-11-aarch64
path: build/output/debian-11-arm/memgraph*.deb
PushToS3Debian11_ARM:
if: github.ref_type == 'tag'
needs: [PackageDebian11_ARM]
runs-on: ubuntu-latest
steps:
- name: Download package
uses: actions/download-artifact@v4
with:
name: debian-11-aarch64
path: build/output/release
- name: Upload to S3
uses: jakejarvis/s3-sync-action@v0.5.1
env:
AWS_S3_BUCKET: "deps.memgraph.io"
AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: "eu-west-1"
SOURCE_DIR: "build/output/release"
DEST_DIR: "memgraph-unofficial/${{ github.ref_name }}/"

View File

@ -1,463 +0,0 @@
name: Release Debian 10
on:
workflow_call:
inputs:
build_type:
type: string
description: "Memgraph Build type. Default value is Release."
default: 'Release'
workflow_dispatch:
inputs:
build_type:
type: choice
description: "Memgraph Build type. Default value is Release."
default: 'Release'
options:
- Release
- RelWithDebInfo
env:
OS: "Debian10"
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
BUILD_TYPE: ${{ github.event.inputs.build_type || 'Release' }}
jobs:
community_build:
name: "Community build"
runs-on: [self-hosted, Linux, X64, Debian10]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build community binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build community binaries.
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DMG_ENTERPRISE=OFF ..
make -j$THREADS
- name: Run unit tests
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run unit tests.
cd build
ctest -R memgraph__unit --output-on-failure
coverage_build:
name: "Coverage build"
runs-on: [self-hosted, Linux, X64, Debian10]
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build coverage binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build coverage binaries.
cd build
cmake -DTEST_COVERAGE=ON ..
make -j$THREADS memgraph__unit
- name: Run unit tests
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run unit tests.
cd build
ctest -R memgraph__unit --output-on-failure
- name: Compute code coverage
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Compute code coverage.
cd tools/github
./coverage_convert
# Package code coverage.
cd generated
tar -czf code_coverage.tar.gz coverage.json html report.json summary.rmu
- name: Save code coverage
uses: actions/upload-artifact@v4
with:
name: "Code coverage(Coverage build)-${{ env.OS }}"
path: tools/github/generated/code_coverage.tar.gz
debug_build:
name: "Debug build"
runs-on: [self-hosted, Linux, X64, Debian10]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build debug binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build debug binaries.
cd build
cmake ..
make -j$THREADS
- name: Run leftover CTest tests
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run leftover CTest tests (all except unit and benchmark tests).
cd build
ctest -E "(memgraph__unit|memgraph__benchmark)" --output-on-failure
- name: Run drivers tests
run: |
./tests/drivers/run.sh
- name: Run cppcheck and clang-format
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run cppcheck and clang-format.
cd tools/github
./cppcheck_and_clang_format diff
- name: Save cppcheck and clang-format errors
uses: actions/upload-artifact@v4
with:
name: "Code coverage(Debug build)-${{ env.OS }}"
path: tools/github/cppcheck_and_clang_format.txt
debug_integration_test:
name: "Debug integration tests"
runs-on: [self-hosted, Linux, X64, Debian10]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build debug binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build debug binaries.
cd build
cmake ..
make -j$THREADS
- name: Run integration tests
run: |
tests/integration/run.sh
release_build:
name: "Release build"
runs-on: [self-hosted, Linux, X64, Debian10]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build release binaries.
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
make -j$THREADS
- name: Create enterprise DEB package
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
cd build
# create mgconsole
# we use the -B to force the build
make -j$THREADS -B mgconsole
# Create enterprise DEB package.
mkdir output && cd output
cpack -G DEB --config ../CPackConfig.cmake
- name: Save enterprise DEB package
uses: actions/upload-artifact@v4
with:
name: "Enterprise DEB package-${{ env.OS}}"
path: build/output/memgraph*.deb
- name: Run GQL Behave tests
run: |
cd tests
./setup.sh /opt/toolchain-v4/activate
cd gql_behave
./continuous_integration
- name: Save quality assurance status
uses: actions/upload-artifact@v4
with:
name: "GQL Behave Status-${{ env.OS }}"
path: |
tests/gql_behave/gql_behave_status.csv
tests/gql_behave/gql_behave_status.html
- name: Run unit tests
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run unit tests.
cd build
ctest -R memgraph__unit --output-on-failure
release_benchmark_tests:
name: "Release Benchmark Tests"
runs-on: [self-hosted, Linux, X64, Debian10]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build release binaries
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
make -j$THREADS
- name: Run micro benchmark tests
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run micro benchmark tests.
cd build
# The `eval` benchmark needs a large stack limit.
ulimit -s 262144
ctest -R memgraph__benchmark -V
- name: Run macro benchmark tests
run: |
cd tests/macro_benchmark
./harness QuerySuite MemgraphRunner \
--groups aggregation 1000_create unwind_create dense_expand match \
--no-strict
- name: Run parallel macro benchmark tests
run: |
cd tests/macro_benchmark
./harness QueryParallelSuite MemgraphRunner \
--groups aggregation_parallel create_parallel bfs_parallel \
--num-database-workers 9 --num-clients-workers 30 \
--no-strict
release_e2e_test:
name: "Release End-to-end Test"
runs-on: [self-hosted, Linux, X64, Debian10]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build release binaries
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
make -j$THREADS
- name: Ensure Kafka and Pulsar are up
run: |
cd tests/e2e/streams/kafka
docker-compose up -d
cd ../pulsar
docker-compose up -d
- name: Run e2e tests
run: |
cd tests
./setup.sh /opt/toolchain-v4/activate
source ve3/bin/activate_e2e
cd e2e
./run.sh
- name: Ensure Kafka and Pulsar are down
if: always()
run: |
cd tests/e2e/streams/kafka
docker-compose down
cd ../pulsar
docker-compose down
release_durability_stress_tests:
name: "Release durability and stress tests"
runs-on: [self-hosted, Linux, X64, Debian10]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build release binaries.
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
make -j$THREADS
- name: Run stress test (plain)
run: |
cd tests/stress
./continuous_integration
- name: Run stress test (SSL)
run: |
cd tests/stress
./continuous_integration --use-ssl
- name: Run durability test (plain)
run: |
cd tests/stress
source ve3/bin/activate
python3 durability --num-steps 5
- name: Run durability test (large)
run: |
cd tests/stress
source ve3/bin/activate
python3 durability --num-steps 20
release_jepsen_test:
name: "Release Jepsen Test"
runs-on: [self-hosted, Linux, X64, Debian10, JepsenControl]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build only memgraph release binary.
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
make -j$THREADS memgraph
- name: Refresh Jepsen Cluster
run: |
cd tests/jepsen
./run.sh cluster-refresh
- name: Run Jepsen tests
run: |
cd tests/jepsen
./run.sh test-all-individually --binary ../../build/memgraph --ignore-run-stdout-logs --ignore-run-stderr-logs
- name: Save Jepsen report
uses: actions/upload-artifact@v4
if: ${{ always() }}
with:
name: "Jepsen Report-${{ env.OS }}"
path: tests/jepsen/Jepsen.tar.gz

View File

@ -1,69 +0,0 @@
name: Publish Docker images
on:
workflow_dispatch:
inputs:
version:
description: "Memgraph binary version to publish on DockerHub."
required: true
force_release:
type: boolean
required: false
default: false
jobs:
docker_publish:
runs-on: ubuntu-latest
env:
DOCKER_ORGANIZATION_NAME: memgraph
DOCKER_REPOSITORY_NAME: memgraph
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v2
- name: Log in to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Download memgraph binary
run: |
cd release/docker
curl -L https://download.memgraph.com/memgraph/v${{ github.event.inputs.version }}/debian-11/memgraph_${{ github.event.inputs.version }}-1_amd64.deb > memgraph-amd64.deb
curl -L https://download.memgraph.com/memgraph/v${{ github.event.inputs.version }}/debian-11-aarch64/memgraph_${{ github.event.inputs.version }}-1_arm64.deb > memgraph-arm64.deb
- name: Check if specified version is already pushed
run: |
EXISTS=$(docker manifest inspect $DOCKER_ORGANIZATION_NAME/$DOCKER_REPOSITORY_NAME:${{ github.event.inputs.version }} > /dev/null; echo $?)
echo $EXISTS
if [[ ${EXISTS} -eq 0 ]]; then
echo 'The specified version has been already released to DockerHub.'
if [[ ${{ github.event.inputs.force_release }} = true ]]; then
echo 'Forcing the release!'
else
echo 'Stopping the release!'
exit 1
fi
else
echo 'All good the specified version has not been release to DockerHub.'
fi
- name: Build & push docker images
run: |
cd release/docker
docker buildx build \
--build-arg BINARY_NAME="memgraph-" \
--build-arg EXTENSION="deb" \
--platform linux/amd64,linux/arm64 \
--tag $DOCKER_ORGANIZATION_NAME/$DOCKER_REPOSITORY_NAME:${{ github.event.inputs.version }} \
--tag $DOCKER_ORGANIZATION_NAME/$DOCKER_REPOSITORY_NAME:latest \
--file memgraph_deb.dockerfile \
--push .

View File

@ -1,63 +0,0 @@
name: "Mgbench Bolt Client Publish Docker Image"
on:
workflow_dispatch:
inputs:
version:
description: "Mgbench bolt client version to publish on Dockerhub."
required: true
force_release:
type: boolean
required: false
default: false
jobs:
mgbench_docker_publish:
runs-on: ubuntu-latest
env:
DOCKER_ORGANIZATION_NAME: memgraph
DOCKER_REPOSITORY_NAME: mgbench-client
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v2
- name: Log in to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Check if specified version is already pushed
run: |
EXISTS=$(docker manifest inspect $DOCKER_ORGANIZATION_NAME/$DOCKER_REPOSITORY_NAME:${{ github.event.inputs.version }} > /dev/null; echo $?)
echo $EXISTS
if [[ ${EXISTS} -eq 0 ]]; then
echo 'The specified version has been already released to DockerHub.'
if [[ ${{ github.event.inputs.force_release }} = true ]]; then
echo 'Forcing the release!'
else
echo 'Stopping the release!'
exit 1
fi
else
echo 'All good the specified version has not been release to DockerHub.'
fi
- name: Build & push docker images
run: |
cd tests/mgbench
docker buildx build \
--build-arg TOOLCHAIN_VERSION=toolchain-v4 \
--platform linux/amd64,linux/arm64 \
--tag $DOCKER_ORGANIZATION_NAME/$DOCKER_REPOSITORY_NAME:${{ github.event.inputs.version }} \
--tag $DOCKER_ORGANIZATION_NAME/$DOCKER_REPOSITORY_NAME:latest \
--file Dockerfile.mgbench_client \
--push .

View File

@ -1,417 +0,0 @@
name: Release Ubuntu 20.04
on:
workflow_call:
inputs:
build_type:
type: string
description: "Memgraph Build type. Default value is Release."
default: 'Release'
workflow_dispatch:
inputs:
build_type:
type: choice
description: "Memgraph Build type. Default value is Release."
default: 'Release'
options:
- Release
- RelWithDebInfo
env:
OS: "Ubuntu 20.04"
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
BUILD_TYPE: ${{ github.event.inputs.build_type || 'Release' }}
jobs:
community_build:
name: "Community build"
runs-on: [self-hosted, Linux, X64, Ubuntu20.04]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build community binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build community binaries.
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DMG_ENTERPRISE=OFF ..
make -j$THREADS
- name: Run unit tests
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run unit tests.
cd build
ctest -R memgraph__unit --output-on-failure
coverage_build:
name: "Coverage build"
runs-on: [self-hosted, Linux, X64, Ubuntu20.04]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build coverage binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build coverage binaries.
cd build
cmake -DTEST_COVERAGE=ON ..
make -j$THREADS memgraph__unit
- name: Run unit tests
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run unit tests.
cd build
ctest -R memgraph__unit --output-on-failure
- name: Compute code coverage
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Compute code coverage.
cd tools/github
./coverage_convert
# Package code coverage.
cd generated
tar -czf code_coverage.tar.gz coverage.json html report.json summary.rmu
- name: Save code coverage
uses: actions/upload-artifact@v4
with:
name: "Code coverage(Coverage build)-${{ env.OS }}"
path: tools/github/generated/code_coverage.tar.gz
debug_build:
name: "Debug build"
runs-on: [self-hosted, Linux, X64, Ubuntu20.04]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build debug binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build debug binaries.
cd build
cmake ..
make -j$THREADS
- name: Run leftover CTest tests
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run leftover CTest tests (all except unit and benchmark tests).
cd build
ctest -E "(memgraph__unit|memgraph__benchmark)" --output-on-failure
- name: Run drivers tests
run: |
./tests/drivers/run.sh
- name: Run cppcheck and clang-format
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run cppcheck and clang-format.
cd tools/github
./cppcheck_and_clang_format diff
- name: Save cppcheck and clang-format errors
uses: actions/upload-artifact@v4
with:
name: "Code coverage(Debug build)-${{ env.OS }}"
path: tools/github/cppcheck_and_clang_format.txt
debug_integration_test:
name: "Debug integration tests"
runs-on: [self-hosted, Linux, X64, Ubuntu20.04]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build debug binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build debug binaries.
cd build
cmake ..
make -j$THREADS
- name: Run integration tests
run: |
tests/integration/run.sh
release_build:
name: "Release build"
runs-on: [self-hosted, Linux, X64, Ubuntu20.04]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build release binaries.
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
make -j$THREADS
- name: Create enterprise DEB package
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
cd build
# create mgconsole
# we use the -B to force the build
make -j$THREADS -B mgconsole
# Create enterprise DEB package.
mkdir output && cd output
cpack -G DEB --config ../CPackConfig.cmake
- name: Save enterprise DEB package
uses: actions/upload-artifact@v4
with:
name: "Enterprise DEB package-${{ env.OS }}"
path: build/output/memgraph*.deb
- name: Run GQL Behave tests
run: |
cd tests
./setup.sh /opt/toolchain-v4/activate
cd gql_behave
./continuous_integration
- name: Save quality assurance status
uses: actions/upload-artifact@v4
with:
name: "GQL Behave Status-${{ env.OS }}"
path: |
tests/gql_behave/gql_behave_status.csv
tests/gql_behave/gql_behave_status.html
- name: Run unit tests
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run unit tests.
cd build
ctest -R memgraph__unit --output-on-failure
release_benchmark_tests:
name: "Release Benchmark Tests"
runs-on: [self-hosted, Linux, X64, Ubuntu20.04]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build release binaries
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
make -j$THREADS
- name: Run micro benchmark tests
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Run micro benchmark tests.
cd build
# The `eval` benchmark needs a large stack limit.
ulimit -s 262144
ctest -R memgraph__benchmark -V
- name: Run macro benchmark tests
run: |
cd tests/macro_benchmark
./harness QuerySuite MemgraphRunner \
--groups aggregation 1000_create unwind_create dense_expand match \
--no-strict
- name: Run parallel macro benchmark tests
run: |
cd tests/macro_benchmark
./harness QueryParallelSuite MemgraphRunner \
--groups aggregation_parallel create_parallel bfs_parallel \
--num-database-workers 9 --num-clients-workers 30 \
--no-strict
release_e2e_test:
name: "Release End-to-end Test"
runs-on: [self-hosted, Linux, X64, Ubuntu20.04]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build release binaries
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
make -j$THREADS
- name: Ensure Kafka and Pulsar are up
run: |
cd tests/e2e/streams/kafka
docker-compose up -d
cd ../pulsar
docker-compose up -d
- name: Run e2e tests
run: |
cd tests
./setup.sh /opt/toolchain-v4/activate
source ve3/bin/activate_e2e
cd e2e
./run.sh
- name: Ensure Kafka and Pulsar are down
if: always()
run: |
cd tests/e2e/streams/kafka
docker-compose down
cd ../pulsar
docker-compose down
release_durability_stress_tests:
name: "Release durability and stress tests"
runs-on: [self-hosted, Linux, X64, Ubuntu20.04]
timeout-minutes: 60
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build release binaries.
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
make -j$THREADS
- name: Run stress test (plain)
run: |
cd tests/stress
./continuous_integration
- name: Run stress test (SSL)
run: |
cd tests/stress
./continuous_integration --use-ssl
- name: Run durability test (plain)
run: |
cd tests/stress
source ve3/bin/activate
python3 durability --num-steps 5
- name: Run durability test (large)
run: |
cd tests/stress
source ve3/bin/activate
python3 durability --num-steps 20

View File

@ -1,68 +0,0 @@
name: Stress test large
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}
cancel-in-progress: true
on:
workflow_dispatch:
inputs:
build_type:
type: choice
description: "Memgraph Build type. Default value is Release."
default: 'Release'
options:
- Release
- RelWithDebInfo
push:
tags:
- "v*.*.*-rc*"
- "v*.*-rc*"
schedule:
- cron: "0 22 * * *"
env:
THREADS: 24
MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }}
MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }}
BUILD_TYPE: ${{ github.event.inputs.build_type || 'Release' }}
jobs:
stress_test_large:
name: "Stress test large"
timeout-minutes: 720
strategy:
matrix:
os: [Debian10, Ubuntu20.04]
extra: [BigMemory, Gen8]
exclude:
- os: Debian10
extra: Gen8
- os: Ubuntu20.04
extra: BigMemory
runs-on: [self-hosted, Linux, X64, "${{ matrix.os }}", "${{ matrix.extra }}"]
steps:
- name: Set up repository
uses: actions/checkout@v4
with:
# Number of commits to fetch. `0` indicates all history for all
# branches and tags. (default: 1)
fetch-depth: 0
- name: Build release binaries
run: |
# Activate toolchain.
source /opt/toolchain-v4/activate
# Initialize dependencies.
./init
# Build release binaries.
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
make -j$THREADS
- name: Run stress test (large)
run: |
cd tests/stress
./continuous_integration --large-dataset

View File

@ -1,32 +0,0 @@
name: Upload Package All artifacts to S3
on:
workflow_dispatch:
inputs:
memgraph_version:
description: "Memgraph version to upload as. Format: 'X.Y.Z'"
required: true
run_number:
description: "# of the package_all workflow run to upload artifacts from. Format: '#XYZ'"
required: true
jobs:
upload-to-s3:
runs-on: ubuntu-latest
steps:
- name: Download artifacts
uses: dawidd6/action-download-artifact@v4
with:
workflow: package_all.yaml
workflow_conclusion: success
run_number: "${{ github.event.inputs.run_number }}"
path: build/output/release
- name: Upload to S3
uses: jakejarvis/s3-sync-action@v0.5.1
env:
AWS_S3_BUCKET: "download.memgraph.com"
AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: "eu-west-1"
SOURCE_DIR: "build/output/release"
DEST_DIR: "memgraph/v${{ github.event.inputs.memgraph_version }}/"

50
.gitignore vendored
View File

@ -16,7 +16,8 @@
.ycm_extra_conf.pyc
.temp/
Testing/
/build*/
build
build/
release/examples/build
cmake-build-*
cmake/DownloadProject/
@ -33,32 +34,43 @@ TAGS
*.fas
*.fasl
src/database/distributed/serialization.hpp
src/database/single_node_ha/serialization.hpp
# Cap'n Proto generated files
*.capnp.c++
*.capnp.h
# LCP generated C++ & Cap'n Proto files
*.lcp.cpp
src/database/counters_rpc_messages.capnp
src/database/counters_rpc_messages.hpp
src/database/state_delta.capnp
src/database/state_delta.hpp
src/distributed/bfs_rpc_messages.capnp
src/distributed/bfs_rpc_messages.hpp
src/distributed/coordination_rpc_messages.capnp
src/distributed/coordination_rpc_messages.hpp
src/distributed/data_rpc_messages.capnp
src/distributed/data_rpc_messages.hpp
src/distributed/durability_rpc_messages.capnp
src/distributed/durability_rpc_messages.hpp
src/distributed/dynamic_worker_rpc_messages.hpp
src/distributed/index_rpc_messages.capnp
src/distributed/index_rpc_messages.hpp
src/distributed/plan_rpc_messages.capnp
src/distributed/plan_rpc_messages.hpp
src/distributed/pull_produce_rpc_messages.capnp
src/distributed/pull_produce_rpc_messages.hpp
src/distributed/storage_gc_rpc_messages.capnp
src/distributed/storage_gc_rpc_messages.hpp
src/distributed/token_sharing_rpc_messages.capnp
src/distributed/token_sharing_rpc_messages.hpp
src/distributed/transactional_cache_cleaner_rpc_messages.capnp
src/distributed/transactional_cache_cleaner_rpc_messages.hpp
src/distributed/updates_rpc_messages.capnp
src/distributed/updates_rpc_messages.hpp
src/durability/distributed/state_delta.hpp
src/durability/single_node/state_delta.hpp
src/durability/single_node_ha/state_delta.hpp
src/query/distributed/frontend/semantic/symbol_serialization.hpp
src/query/distributed/plan/ops.hpp
src/raft/log_entry.hpp
src/raft/raft_rpc_messages.hpp
src/raft/snapshot_metadata.hpp
src/raft/storage_info_rpc_messages.hpp
src/query/plan/operator.capnp
src/query/plan/operator.hpp
src/stats/stats_rpc_messages.capnp
src/stats/stats_rpc_messages.hpp
src/storage/distributed/rpc/concurrent_id_mapper_rpc_messages.hpp
src/transactions/distributed/engine_rpc_messages.hpp
/tests/manual/js/transaction_timeout/package-lock.json
/tests/manual/js/transaction_timeout/node_modules/
.vscode/
src/query/frontend/opencypher/grammar/.antlr/*
src/storage/concurrent_id_mapper_rpc_messages.capnp
src/storage/concurrent_id_mapper_rpc_messages.hpp
src/transactions/engine_rpc_messages.capnp
src/transactions/engine_rpc_messages.hpp

View File

@ -1,35 +0,0 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-yaml
args: [--allow-multiple-documents]
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 23.1.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v13.0.0
hooks:
- id: clang-format
# - repo: local
# hooks:
# - id: clang-tidy
# name: clang-tidy
# description: Runs clang-tidy and checks for errors
# entry: python ./tools/pre-commit/clang-tidy.py
# language: python
# files: ^src/
# types: [c++, text]
# fail_fast: true
# require_serial: true
# args: [--compile_commands_path=build]
# pass_filenames: false

View File

@ -1,22 +0,0 @@
# Path to sources
sonar.sources = .
# sonar.exclusions=
sonar.inclusions=src,include,query_modules
# Path to tests
sonar.tests = tests/
# sonar.test.exclusions=
# sonar.test.inclusions=
# Source encoding
# sonar.sourceEncoding=
# Exclusions for copy-paste detection
# sonar.cpd.exclusions=
# Python version (for python projects only)
# sonar.python.version=
# C++ standard version (for C++ projects only)
# If not specified, it defaults to the latest supported standard
# sonar.cfamily.reportingCppStandardOverride=c++98|c++11|c++14|c++17|c++20

171
.ycm_extra_conf.py Normal file
View File

@ -0,0 +1,171 @@
import os
import os.path
import fnmatch
import logging
import ycm_core
BASE_FLAGS = [
'-Wall',
'-Wextra',
'-Werror',
'-Wno-long-long',
'-Wno-variadic-macros',
'-fexceptions',
'-ferror-limit=10000',
'-std=c++1z',
'-xc++',
'-I/usr/lib/',
'-I/usr/include/',
'-I./src',
'-I./include',
'-I./libs/fmt',
'-I./libs/yaml-cpp',
'-I./libs/glog/include',
'-I./libs/googletest/googletest/include',
'-I./libs/googletest/googlemock/include',
'-I./libs/benchmark/include',
'-I./libs/cereal/include',
# We include cppitertools headers directly from libs directory.
'-I./libs',
'-I./libs/rapidcheck/include',
'-I./libs/antlr4/runtime/Cpp/runtime/src',
'-I./libs/gflags/include',
'-I./experimental/distributed/src',
'-I./libs/postgresql/include',
'-I./libs/bzip2',
'-I./libs/zlib',
'-I./libs/rocksdb/include',
'-I./build/include'
]
SOURCE_EXTENSIONS = [
'.cpp',
'.cxx',
'.cc',
'.c',
'.m',
'.mm'
]
HEADER_EXTENSIONS = [
'.h',
'.hxx',
'.hpp',
'.hh'
]
# set the working directory of YCMD to be this file
os.chdir(os.path.dirname(os.path.realpath(__file__)))
def IsHeaderFile(filename):
extension = os.path.splitext(filename)[1]
return extension in HEADER_EXTENSIONS
def GetCompilationInfoForFile(database, filename):
if IsHeaderFile(filename):
basename = os.path.splitext(filename)[0]
for extension in SOURCE_EXTENSIONS:
replacement_file = basename + extension
if os.path.exists(replacement_file):
compilation_info = database.GetCompilationInfoForFile(replacement_file)
if compilation_info.compiler_flags_:
return compilation_info
return None
return database.GetCompilationInfoForFile(filename)
def FindNearest(path, target):
candidate = os.path.join(path, target)
if(os.path.isfile(candidate) or os.path.isdir(candidate)):
logging.info("Found nearest " + target + " at " + candidate)
return candidate;
else:
parent = os.path.dirname(os.path.abspath(path));
if(parent == path):
raise RuntimeError("Could not find " + target);
return FindNearest(parent, target)
def MakeRelativePathsInFlagsAbsolute(flags, working_directory):
if not working_directory:
return list(flags)
new_flags = []
make_next_absolute = False
path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
for flag in flags:
new_flag = flag
if make_next_absolute:
make_next_absolute = False
if not flag.startswith('/'):
new_flag = os.path.join(working_directory, flag)
for path_flag in path_flags:
if flag == path_flag:
make_next_absolute = True
break
if flag.startswith(path_flag):
path = flag[ len(path_flag): ]
new_flag = path_flag + os.path.join(working_directory, path)
break
if new_flag:
new_flags.append(new_flag)
return new_flags
def FlagsForClangComplete(root):
try:
clang_complete_path = FindNearest(root, '.clang_complete')
clang_complete_flags = open(clang_complete_path, 'r').read().splitlines()
return clang_complete_flags
except:
return None
def FlagsForInclude(root):
try:
include_path = FindNearest(root, 'include')
flags = []
for dirroot, dirnames, filenames in os.walk(include_path):
for dir_path in dirnames:
real_path = os.path.join(dirroot, dir_path)
flags = flags + ["-I" + real_path]
return flags
except:
return None
def FlagsForCompilationDatabase(root, filename):
try:
compilation_db_path = FindNearest(root, 'compile_commands.json')
compilation_db_dir = os.path.dirname(compilation_db_path)
logging.info("Set compilation database directory to " + compilation_db_dir)
compilation_db = ycm_core.CompilationDatabase(compilation_db_dir)
if not compilation_db:
logging.info("Compilation database file found but unable to load")
return None
compilation_info = GetCompilationInfoForFile(compilation_db, filename)
if not compilation_info:
logging.info("No compilation info for " + filename + " in compilation database")
return None
return MakeRelativePathsInFlagsAbsolute(
compilation_info.compiler_flags_,
compilation_info.compiler_working_dir_)
except:
return None
def FlagsForFile(filename):
root = os.path.realpath(filename);
compilation_db_flags = FlagsForCompilationDatabase(root, filename)
if compilation_db_flags:
final_flags = compilation_db_flags
else:
final_flags = BASE_FLAGS
clang_flags = FlagsForClangComplete(root)
if clang_flags:
final_flags = final_flags + clang_flags
include_flags = FlagsForInclude(root)
if include_flags:
final_flags = final_flags + include_flags
return {
'flags': final_flags,
'do_cache': True
}

View File

@ -1,32 +0,0 @@
# Tantivy ADR
**Author**
Marko Budiselic (github.com/gitbuda)
**Status**
APPROVED
**Date**
January 5, 2024
**Problem**
For some of Memgraph workloads, text search is a required feature. We don't
want to build a new text search engine because that's not Memgraph's core
value.
**Criteria**
- easy integration with our C++ codebase
- ability to operate in-memory and on-disk
- sufficient features (regex, full-text search, fuzzy search, aggregations over
text data)
- production-ready
**Decision**
All known C++ libraries are not production-ready. Recent Rust libraries, in
particular [Tantivy](https://github.com/quickwit-oss/tantivy), seem to provide
much more features, it is production ready. The way how we'll integrate Tantivy
into the current Memgraph codebase is via
[cxx](https://github.com/dtolnay/cxx). **We select Tantivy.**

View File

@ -1,34 +0,0 @@
# NuRaft ADR
**Author**
Marko Budiselic (github.com/gitbuda)
**Status**
PROPOSED
**Date**
January 10, 2024
**Problem**
In order to enhance Memgraph to have High Availability features as requested by
customers, we want to have reliable coordinators backed by RAFT consensus algorithm. Implementing
RAFT to be correct and performant is a very challenging task. Skillful Memgraph
engineers already tried 3 times and failed to deliver in a reasonable timeframe
all three times (approximately 4 person-weeks of engineering work each time).
**Criteria**
- easy integration with our C++ codebase
- heavily tested in production environments
- implementation of performance optimizations on top of the canonical Raft
implementation
**Decision**
There are a few, robust C++ implementations of Raft but as a part of other
projects or bigger libraries. **We select
[NuRaft](https://github.com/eBay/NuRaft)** because it focuses on delivering
Raft without bloatware, and it's used by
[Clickhouse](https://github.com/ClickHouse/ClickHouse) (an comparable peer to
Memgraph, a very well-established product).

View File

@ -1,38 +0,0 @@
# RocksDB ADR
**Author**
Marko Budiselic (github.com/gitbuda)
**Status**
ACCEPTED
**Date**
January 23, 2024
**Problem**
Interacting with data (reads and writes) on disk in a concurrent, safe, and
fast way is a challenging task. Implementing all low-level primitives to
interact with various disk hardware efficiently consumes significant
engineering people. Whenever Memgraph has to store data on disk (or any
other colder than RAM storage system), the problem is how to do that in the
least amount of development time while satisfying all functional requirements
(often performance).
**Criteria**
- working efficiently in a highly concurrent environment
- easy integration with Memgraph's C++ codebase
- providing low-level key-value API
- heavily tested in production environments
- providing abstractions for the storage hardware (even for cloud-based
storages like S3)
**Decision**
There are a few robust key-value stores, but finding one that is
production-ready and compatible with Memgraph's C++ codebase is challenging.
**We select [RocksDB](https://github.com/facebook/rocksdb)** because it
delivers robust API to manage data on disk; it's battle-tested in many
production environments (many databases systems are embedding RocksDB), and
it's the most compatible one.

View File

@ -1,67 +0,0 @@
# Architecture Decision Records
Also known as ADRs. This practice has become widespread in many
high performing engineering teams. It is a technique for communicating
between software engineers. ADRs provide a clear and documented
history of architectural choices, ensuring that everyone on the
team is on the same page. This improves communication and reduces
misunderstandings. The act of recording decisions encourages
thoughtful consideration before making choices. This can lead to
more robust and better-informed architectural decisions.
Links must be created, pointing both to and from the Github Issues
and/or the Notion Program Management "Initiative" database.
ADRs are complimentary to any tech specs that get written while
designing a solution. ADRs are very short and to the point, while
tech specs will include diagrams and can be quite verbose.
## HOWTO
Each ADR will be assigned a monotonically increasing unique numeric
identifier, which will be zero-padded to 3 digits. Each ADR will
be in a single markdown file containing no more than one page of
text, and the filename will start with that unique identifier,
followed by a camel case phrase summarizing the problem. For
example: `001_architecture_decision_records.md` or
`002_big_integration_cap_theorem.md`.
We want to use an ADR when:
1. Significant Impact: This includes choices that affect scalability, performance, or fundamental design principles.
1. Long-Term Ramifications: When a decision is expected to have long-term ramifications or is difficult to reverse.
1. Architectural Principles: ADRs are suitable for documenting decisions related to architectural principles, frameworks, or patterns that shape the system's structure.
1. Controversial Choices: When a decision is likely to be controversial or may require justification in the future.
The most senior engineer on a project will evaluate and decide
whether or not an ADR is needed.
## Do
1. Keep them brief and concise.
1. Explain the trade-offs.
1. Each ADR should be about one AD, not multiple ADs
1. Don't alter existing information in an ADR. Instead, amend the ADR by adding new information, or supersede the ADR by creating a new ADR.
1. Explain your organization's situation and business priorities.
1. Include rationale and considerations based on social and skills makeups of your teams.
1. Include pros and cons that are relevant, and describe them in terms that align with your needs and goals.
1. Explain what follows from making the decision. This can include the effects, outcomes, outputs, follow ups, and more.
## Don't
1. Try to guess what the executive leader wants, and then attempt to please them. Be objective.
1. Try to solve everything all at once. A pretty good solution now is MUCH BETTER than a perfect solution later. Carpe diem!
1. Hide any doubts or unanswered questions.
1. Make it a sales pitch. Everything has upsides and downsides - be authentic and honest about them.
1. Perform merely a superficial investigation. If an ADR doesn't call for some deep thinking, then it probably shouldn't exist.
1. Ignore the long-term costs such as performance, tech debt or hardware and maintenance.
1. Get tunnel vision where creative or surprising approaches are not explored.
# Template - use the format below for each new ADR
1. **Author** - who has written the ADR
1. **Status** - one of: PROPOSED, ACCEPTED, REJECTED, SUPERSEDED-BY or DEPRECATED
1. **Date** - when the status was most recently updated
1. **Problem** - a concise paragraph explaining the context
1. **Criteria** - a list of the two or three metrics by which the solution was evaluated, and their relative weights (importance)
1. **Decision** - what was chosen as the way forward, and what the consequences are of the decision

View File

@ -1,5 +1,167 @@
Change Log for all versions of Memgraph can be found on-line at
https://docs.memgraph.com/memgraph/changelog
# Change Log
All the updates to the Change Log can be made in the following repository:
https://github.com/memgraph/docs
## v0.11.0
### Major Features and Improvements
* [Enterprise Ed.] Improve Cartesian support in distributed queries.
* [Enterprise Ed.] Improve distributed execution of BFS.
* [Enterprise Ed.] Dynamic graph partitioner added.
* Static vertices/edges id generators exposed through the Id Cypher function.
* Properties on disk added.
* Telemetry added.
* SSL support added.
* Add `toString` function to openCypher.
### Bug Fixes and Other Changes
* Document issues with Docker on OS X.
* Add BFS and Dijkstra's algorithm examples to documentation.
## v0.10.0
### Breaking Changes
* Snapshot format changed (not backward compatible).
### Major Features and Improvements
* [Enterprise Ed.] Distributed storage and execution.
* `reduce` and `single` functions added to openCypher.
* `wShortest` edge expansion added to openCypher.
* Support packaging RPM on CentOS 7.
### Bug Fixes and Other Changes
* Report an error if updating a deleted element.
* Log an error if reading info on available memory fails.
* Fix a bug when `MATCH` would stop matching if a result was empty, but later
results still contain data to be matched. The simplest case of this was the
query: `UNWIND [1,2,3] AS x MATCH (n :Label {prop: x}) RETURN n`. If there
was no node `(:Label {prop: 1})`, then the `MATCH` wouldn't even try to find
for `x` being 2 or 3.
* Report an error if trying to compare a property value with something that
cannot be stored in a property.
* Fix crashes in some obscure cases.
* Commit log automatically garbage collected.
* Add minor performance improvements.
## v0.9.0
### Breaking Changes
* Snapshot format changed (not backward compatible).
* Snapshot configuration flags changed, general durability flags added.
### Major Features and Improvements
* Write-ahead log added.
* `nodes` and `relationships` functions added.
* `UNION` and `UNION ALL` is implemented.
* Concurrent index creation is now enabled.
### Bug Fixes and Other Changes
## v0.8.0
### Major Features and Improvements
* CASE construct (without aggregations).
* Named path support added.
* Maps can now be stored as vertex/edge properties.
* Map indexing supported.
* `rand` function added.
* `assert` function added.
* `counter` and `counterSet` functions added.
* `indexInfo` function added.
* `collect` aggregation now supports Map collection.
* Changed the BFS syntax.
### Bug Fixes and Other Changes
* Use \u to specify 4 digit codepoint and \U for 8 digit
* Keywords appearing in header (named expressions) keep original case.
* Our Bolt protocol implementation is now completely compatible with the protocol version 1 specification. (https://boltprotocol.org/v1/)
* Added a log warning when running out of memory and the `memory_warning_threshold` flag
* Edges are no longer additionally filtered after expansion.
## v0.7.0
### Major Features and Improvements
* Variable length path `MATCH`.
* Explicitly started transactions (multi-query transactions).
* Map literal.
* Query parameters (except for parameters in place of property maps).
* `all` function in openCypher.
* `degree` function in openCypher.
* User specified transaction execution timeout.
### Bug Fixes and Other Changes
* Concurrent `BUILD INDEX` deadlock now returns an error to the client.
* A `MATCH` preceeded by `OPTIONAL MATCH` expansion inconsistencies.
* High concurrency Antlr parsing bug.
* Indexing improvements.
* Query stripping and caching speedups.
## v0.6.0
### Major Features and Improvements
* AST caching.
* Label + property index support.
* Different logging setup & format.
## v0.5.0
### Major Features and Improvements
* Use label indexes to speed up querying.
* Generate multiple query plans and use the cost estimator to select the best.
* Snapshots & Recovery.
* Abandon old yaml configuration and migrate to gflags.
* Query stripping & AST caching support.
### Bug Fixes and Other Changes
* Fixed race condition in MVCC. Hints exp+aborted race condition prevented.
* Fixed conceptual bug in MVCC GC. Evaluate old records w.r.t. the oldest.
transaction's id AND snapshot.
* User friendly error messages thrown from the query engine.
## Build 837
### Bug Fixes and Other Changes
* List indexing supported with preceeding IN (for example in query `RETURN 1 IN [[1,2]][0]`).
## Build 825
### Major Features and Improvements
* RETURN *, count(*), OPTIONAL MATCH, UNWIND, DISTINCT (except DISTINCT in aggregate functions), list indexing and slicing, escaped labels, IN LIST operator, range function.
### Bug Fixes and Other Changes
* TCP_NODELAY -> import should be faster.
* Clear hint bits.
## Build 783
### Major Features and Improvements
* SKIP, LIMIT, ORDER BY.
* Math functions.
* Initial support for MERGE clause.
### Bug Fixes and Other Changes
* Unhandled Lock Timeout Exception.
## Build 755
### Major Features and Improvements
* MATCH, CREATE, WHERE, SET, REMOVE, DELETE.

View File

@ -1,7 +1,6 @@
# MemGraph CMake configuration
cmake_minimum_required(VERSION 3.12)
cmake_policy(SET CMP0076 NEW)
cmake_minimum_required(VERSION 3.1)
# !! IMPORTANT !! run ./project_root/init.sh before cmake command
# to download dependencies
@ -19,12 +18,10 @@ set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM TRUE)
# during the code coverage process
find_program(CCACHE_FOUND ccache)
option(USE_CCACHE "ccache:" ON)
message(STATUS "CCache: ${USE_CCACHE}")
if(CCACHE_FOUND AND USE_CCACHE)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
message(STATUS "CCache: Used")
else ()
message(STATUS "CCache: Not used")
endif(CCACHE_FOUND AND USE_CCACHE)
# choose a compiler
@ -34,143 +31,23 @@ find_program(CLANGXX_FOUND clang++)
if (CLANG_FOUND AND CLANGXX_FOUND)
set(CMAKE_C_COMPILER ${CLANG_FOUND})
set(CMAKE_CXX_COMPILER ${CLANGXX_FOUND})
else()
message(FATAL_ERROR "Couldn't find clang and/or clang++!")
endif()
# Get current commit hash.
execute_process(
OUTPUT_VARIABLE COMMIT_HASH
COMMAND git rev-parse --short HEAD
)
string(STRIP ${COMMIT_HASH} COMMIT_HASH)
# -----------------------------------------------------------------------------
project(memgraph LANGUAGES C CXX)
#TODO: upgrade to cmake 3.24 + CheckIPOSupported
#cmake_policy(SET CMP0138 NEW)
#include(CheckIPOSupported)
#check_ipo_supported()
#set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_Release TRUE)
#set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RelWithDebInfo TRUE)
# Install licenses.
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/licenses/
DESTINATION share/doc/memgraph)
# For more information about how to release a new version of Memgraph, see
# `release/README.md`.
# Option that is used to specify which version of Memgraph should be built. The
# default is `ON` which causes the build system to build Memgraph Enterprise.
# Memgraph Community is built if explicitly set to `OFF`.
option(MG_ENTERPRISE "Build Memgraph Enterprise Edition" ON)
# Set the current version here to override the automatic version detection. The
# version must be specified as `X.Y.Z`. Primarily used when building new patch
# versions.
set(MEMGRAPH_OVERRIDE_VERSION "")
# Custom suffix that this version should have. The suffix can be any arbitrary
# string. Primarily used when building a version for a specific customer.
set(MEMGRAPH_OVERRIDE_VERSION_SUFFIX "")
# Variables used to generate the versions.
if (MG_ENTERPRISE)
set(get_version_offering "")
else()
set(get_version_offering "--open-source")
endif()
set(get_version_script "${CMAKE_CURRENT_SOURCE_DIR}/release/get_version.py")
# Get version that should be used in the binary.
execute_process(
OUTPUT_VARIABLE MEMGRAPH_VERSION
RESULT_VARIABLE MEMGRAPH_VERSION_RESULT
COMMAND "${get_version_script}" ${get_version_offering}
"${MEMGRAPH_OVERRIDE_VERSION}"
"${MEMGRAPH_OVERRIDE_VERSION_SUFFIX}"
"--memgraph-root-dir"
"${CMAKE_CURRENT_SOURCE_DIR}"
)
if(MEMGRAPH_VERSION_RESULT AND NOT MEMGRAPH_VERSION_RESULT EQUAL 0)
message(FATAL_ERROR "Unable to get Memgraph version.")
else()
MESSAGE(STATUS "Memgraph version: ${MEMGRAPH_VERSION}")
endif()
# Get version that should be used in the DEB package.
execute_process(
OUTPUT_VARIABLE MEMGRAPH_VERSION_DEB
RESULT_VARIABLE MEMGRAPH_VERSION_DEB_RESULT
COMMAND "${get_version_script}" ${get_version_offering}
--variant deb
"${MEMGRAPH_OVERRIDE_VERSION}"
"${MEMGRAPH_OVERRIDE_VERSION_SUFFIX}"
"--memgraph-root-dir"
"${CMAKE_CURRENT_SOURCE_DIR}"
)
if(MEMGRAPH_VERSION_DEB_RESULT AND NOT MEMGRAPH_VERSION_DEB_RESULT EQUAL 0)
message(FATAL_ERROR "Unable to get Memgraph DEB version.")
else()
MESSAGE(STATUS "Memgraph DEB version: ${MEMGRAPH_VERSION_DEB}")
endif()
# Get version that should be used in the RPM package.
execute_process(
OUTPUT_VARIABLE MEMGRAPH_VERSION_RPM
RESULT_VARIABLE MEMGRAPH_VERSION_RPM_RESULT
COMMAND "${get_version_script}" ${get_version_offering}
--variant rpm
"${MEMGRAPH_OVERRIDE_VERSION}"
"${MEMGRAPH_OVERRIDE_VERSION_SUFFIX}"
"--memgraph-root-dir"
"${CMAKE_CURRENT_SOURCE_DIR}"
)
if(MEMGRAPH_VERSION_RPM_RESULT AND NOT MEMGRAPH_VERSION_RPM_RESULT EQUAL 0)
message(FATAL_ERROR "Unable to get Memgraph RPM version.")
else()
MESSAGE(STATUS "Memgraph RPM version: ${MEMGRAPH_VERSION_RPM}")
endif()
# We want the above variables to be updated each time something is committed to
# the repository. That is why we include a dependency on the current git HEAD
# to trigger a new CMake run when the git repository state changes. This is a
# hack, as CMake doesn't have a mechanism to regenerate variables when
# something changes (only files can be regenerated).
# https://cmake.org/pipermail/cmake/2018-October/068389.html
#
# The hack in the above link is nearly correct but it has a fatal flaw. The
# `CMAKE_CONFIGURE_DEPENDS` isn't a `GLOBAL` property, it is instead a
# `DIRECTORY` property and as such must be set in the `DIRECTORY` scope.
# https://cmake.org/cmake/help/v3.14/manual/cmake-properties.7.html
#
# Unlike the above mentioned hack, we don't use the `.git/index` file. That
# file changes on every `git add` (even on `git status`) so it triggers
# unnecessary recalculations of the release version. The release version only
# changes on every `git commit` or `git checkout`. That is why we watch the
# following files for changes:
# - `.git/HEAD` -> changes each time a `git checkout` is issued
# - `.git/refs/heads/...` -> the value in `.git/HEAD` is a branch name (when
# you are on a branch) and you have to monitor the file of the specific
# branch to detect when a `git commit` was issued
# More details about the contents of the `.git` directory and the specific
# files used can be seen here:
# https://git-scm.com/book/en/v2/Git-Internals-Git-References
set(git_directory "${CMAKE_SOURCE_DIR}/.git")
# Check for directory because if the repo is cloned as a git submodule, .git is
# a file and below code doesn't work.
if (IS_DIRECTORY "${git_directory}")
set_property(DIRECTORY APPEND PROPERTY
CMAKE_CONFIGURE_DEPENDS "${git_directory}/HEAD")
file(STRINGS "${git_directory}/HEAD" git_head_data)
if (git_head_data MATCHES "^ref: ")
string(SUBSTRING "${git_head_data}" 5 -1 git_head_ref)
set_property(DIRECTORY APPEND PROPERTY
CMAKE_CONFIGURE_DEPENDS "${git_directory}/${git_head_ref}")
endif()
endif()
project(memgraph VERSION 0.11.0)
# -----------------------------------------------------------------------------
# setup CMake module path, defines path for include() and find_package()
# https://cmake.org/cmake/help/latest/variable/CMAKE_MODULE_PATH.html
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake)
# custom function definitions
include(functions)
# -----------------------------------------------------------------------------
@ -185,45 +62,31 @@ add_custom_target(clean_all
# build flags -----------------------------------------------------------------
# Export the compile commands so that we can use clang-tidy. Additional benefit
# is easier debugging of compilation and linker flags.
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# c99-designator is disabled because of required mixture of designated and
# non-designated initializers in Python Query Module code (`py_module.cpp`).
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall \
-Werror=switch -Werror=switch-bool -Werror=return-type \
-Werror=return-stack-address \
-Wno-c99-designator -Wmissing-field-initializers \
-DBOOST_ASIO_USE_TS_EXECUTOR_AS_DEFAULT")
# TODO: set here 17 once it will be available in the cmake version (3.8)
# set(CMAKE_CXX_STANDARD 17)
# set(CMAKE_CXX_STANDARD_REQUIRED ON)
# For now, explicitly set -std= flag for C++17.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1z -Wall \
-Werror=switch -Werror=switch-bool -Werror=return-type")
# Don't omit frame pointer in RelWithDebInfo, for additional callchain debug.
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO
"${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-omit-frame-pointer")
# Statically link libgcc and libstdc++, the GCC allows this according to:
# https://gcc.gnu.org/onlinedocs/gcc-10.2.0/libstdc++/manual/manual/license.html
# https://www.gnu.org/licenses/gcc-exception-faq.html
# Last checked for gcc-10.2 which we are using on the build machines.
# Last checked for gcc-7.3, we are using gcc-6.3 on build machines (license is
# valid there also).
# ** If we change versions, recheck this! **
# ** Static linking is allowed only for executables! **
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
# Use lld linker to speedup build and use less memory.
add_link_options(-fuse-ld=lld)
# NOTE: Moving to latest Clang (probably starting from 15), lld stopped to work
# without explicit link_directories call.
string(REPLACE ":" " " LD_LIBS $ENV{LD_LIBRARY_PATH})
separate_arguments(LD_LIBS)
link_directories(${LD_LIBS})
# Use gold linker to speedup build
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold")
# release flags
set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG")
SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -pthread")
#debug flags
set(PREFERRED_DEBUGGER "gdb" CACHE STRING
"Tunes the debug output for your preferred debugger (gdb or lldb).")
@ -239,7 +102,15 @@ else()
set(CMAKE_CXX_FLAGS_DEBUG "-g")
endif()
# ndebug
option(NDEBUG "No debug" OFF)
message(STATUS "NDEBUG: ${NDEBUG} (be careful CMAKE_BUILD_TYPE can also \
append this flag)")
if(NDEBUG)
add_definitions( -DNDEBUG )
endif()
# -----------------------------------------------------------------------------
# default build type is debug
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Debug")
@ -247,22 +118,15 @@ endif()
message(STATUS "CMake build type: ${CMAKE_BUILD_TYPE}")
# -----------------------------------------------------------------------------
add_definitions( -DCMAKE_BUILD_TYPE_NAME="${CMAKE_BUILD_TYPE}")
if (NOT MG_ARCH)
set(MG_ARCH_DESCR "Host architecture to build Memgraph on. Supported values are x86_64, ARM64.")
if (${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64")
set(MG_ARCH "ARM64" CACHE STRING ${MG_ARCH_DESCR})
else()
set(MG_ARCH "x86_64" CACHE STRING ${MG_ARCH_DESCR})
endif()
endif()
message(STATUS "MG_ARCH: ${MG_ARCH}")
# setup external dependencies -------------------------------------------------
# threading
find_package(Threads REQUIRED)
# optional Ltalloc
option(USE_LTALLOC "Use Ltalloc instead of default allocator (default OFF). \
Set this to ON to link with Ltalloc." OFF)
# optional readline
option(USE_READLINE "Use GNU Readline library if available (default ON). \
Set this to OFF to prevent linking with Readline even if it is available." ON)
@ -273,17 +137,63 @@ if (USE_READLINE)
endif()
endif()
# OpenSSL
find_package(OpenSSL REQUIRED)
set(libs_dir ${CMAKE_SOURCE_DIR}/libs)
add_subdirectory(libs EXCLUDE_FROM_ALL)
include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR})
include_directories(SYSTEM ${GLOG_INCLUDE_DIR})
include_directories(SYSTEM ${FMT_INCLUDE_DIR})
include_directories(SYSTEM ${ANTLR4_INCLUDE_DIR})
include_directories(SYSTEM ${BZIP2_INCLUDE_DIR})
include_directories(SYSTEM ${ZLIB_INCLUDE_DIR})
include_directories(SYSTEM ${ROCKSDB_INCLUDE_DIR})
include_directories(SYSTEM ${CAPNP_INCLUDE_DIR})
# -----------------------------------------------------------------------------
# openCypher parser -----------------------------------------------------------
set(opencypher_frontend ${CMAKE_SOURCE_DIR}/src/query/frontend/opencypher)
set(opencypher_generated ${opencypher_frontend}/generated)
set(opencypher_grammar ${opencypher_frontend}/grammar/Cypher.g4)
# enumerate all files that are generated from antlr
set(antlr_opencypher_generated_src
${opencypher_generated}/CypherLexer.cpp
${opencypher_generated}/CypherParser.cpp
${opencypher_generated}/CypherBaseVisitor.cpp
${opencypher_generated}/CypherVisitor.cpp
)
# Provide a command to generate sources if missing. If this were a
# custom_target, it would always run and we don't want that.
add_custom_command(OUTPUT ${antlr_opencypher_generated_src}
COMMAND
${CMAKE_COMMAND} -E make_directory ${opencypher_generated}
COMMAND
java -jar ${CMAKE_SOURCE_DIR}/libs/antlr-4.6-complete.jar -Dlanguage=Cpp -visitor -o ${opencypher_generated} -package antlropencypher ${opencypher_grammar}
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
DEPENDS ${opencypher_grammar})
# add custom target for generation
add_custom_target(generate_opencypher_parser
DEPENDS ${antlr_opencypher_generated_src})
add_library(antlr_opencypher_parser_lib STATIC ${antlr_opencypher_generated_src})
target_link_libraries(antlr_opencypher_parser_lib antlr4)
# -----------------------------------------------------------------------------
# Optional subproject configuration -------------------------------------------
option(POC "Build proof of concept binaries" OFF)
option(EXPERIMENTAL "Build experimental binaries" OFF)
option(CUSTOMERS "Build customer binaries" OFF)
option(TEST_COVERAGE "Generate coverage reports from running memgraph" OFF)
option(TOOLS "Build tools binaries" ON)
option(QUERY_MODULES "Build query modules containing custom procedures" ON)
option(ASAN "Build with Address Sanitizer. To get a reasonable performance option should be used only in Release or RelWithDebInfo build " OFF)
option(TSAN "Build with Thread Sanitizer. To get a reasonable performance option should be used only in Release or RelWithDebInfo build " OFF)
option(UBSAN "Build with Undefined Behaviour Sanitizer" OFF)
# Build feature flags
option(THIN_LTO "Build with link time optimization" OFF)
if (TEST_COVERAGE)
string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
@ -294,29 +204,8 @@ if (TEST_COVERAGE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
endif()
if (MG_ENTERPRISE)
add_definitions(-DMG_ENTERPRISE)
endif()
option(ENABLE_JEMALLOC "Use jemalloc" ON)
option(MG_MEMORY_PROFILE "If build should be setup for memory profiling" OFF)
if (MG_MEMORY_PROFILE AND ENABLE_JEMALLOC)
message(STATUS "Jemalloc has been disabled because MG_MEMORY_PROFILE is enabled")
set(ENABLE_JEMALLOC OFF)
endif ()
if (MG_MEMORY_PROFILE AND ASAN)
message(STATUS "ASAN has been disabled because MG_MEMORY_PROFILE is enabled")
set(ASAN OFF)
endif ()
if (MG_MEMORY_PROFILE)
add_compile_definitions(MG_MEMORY_PROFILE)
endif ()
if (ASAN)
message(WARNING "Disabling jemalloc as it doesn't work well with ASAN")
set(ENABLE_JEMALLOC OFF)
# Enable Address sanitizer and get nicer stack traces in error messages.
# Enable Addres sanitizer and get nicer stack traces in error messages.
# NOTE: AddressSanitizer uses llvm-symbolizer binary from the Clang
# distribution to symbolize the stack traces (note that ideally the
# llvm-symbolizer version must match the version of ASan runtime library).
@ -337,8 +226,6 @@ if (ASAN)
endif()
if (TSAN)
message(WARNING "Disabling jemalloc as it doesn't work well with ASAN")
set(ENABLE_JEMALLOC OFF)
# ThreadSanitizer generally requires all code to be compiled with -fsanitize=thread.
# If some code (e.g. dynamic libraries) is not compiled with the flag, it can
# lead to false positive race reports, false negative race reports and/or
@ -354,7 +241,7 @@ if (TSAN)
# By default ThreadSanitizer uses addr2line utility to symbolize reports.
# llvm-symbolizer is faster, consumes less memory and produces much better
# reports. To use it set runtime flag:
# TSAN_OPTIONS="extern-symbolizer-path=~/llvm-symbolizer"
# TSAN_OPTIONS="extern-symbolizer-path=~/llvm-symbolizer"
# For more runtime flags see: https://github.com/google/sanitizers/wiki/ThreadSanitizerFlags
endif()
@ -364,37 +251,90 @@ if (UBSAN)
# runtime library and c++ standard libraries are present.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-omit-frame-pointer -fno-sanitize=vptr")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined -fno-sanitize=vptr")
# Run program with environment variable UBSAN_OPTIONS=print_stacktrace=1.
# Make sure llvm-symbolizer binary is in path.
# To make the program abort on undefined behavior, use UBSAN_OPTIONS=halt_on_error=1.
# Run program with environment variable UBSAN_OPTIONS=print_stacktrace=1
# Make sure llvm-symbolizer binary is in path
endif()
set(MG_PYTHON_VERSION "" CACHE STRING "Specify the exact Python version used by the query modules")
set(MG_PYTHON_PATH "" CACHE STRING "Specify the exact Python path used by the query modules")
if (THIN_LTO)
set(CMAKE_CXX_FLAGS"${CMAKE_CXX_FLAGS} -flto=thin")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=thin")
endif()
# Add subprojects
include_directories(src)
add_subdirectory(src)
# Release configuration
add_subdirectory(release)
option(MG_ENABLE_TESTING "Set this to OFF to disable building test binaries" ON)
message(STATUS "MG_ENABLE_TESTING: ${MG_ENABLE_TESTING}")
if (MG_ENABLE_TESTING)
enable_testing()
add_subdirectory(tests)
if(POC)
add_subdirectory(poc)
endif()
if(EXPERIMENTAL)
add_subdirectory(experimental)
endif()
if(CUSTOMERS)
add_subdirectory(customers)
endif()
enable_testing()
add_subdirectory(tests)
if(TOOLS)
add_subdirectory(tools)
endif()
if(QUERY_MODULES)
add_subdirectory(query_modules)
endif()
# -----------------------------------------------------------------------------
install(FILES ${CMAKE_BINARY_DIR}/bin/mgconsole
PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
TYPE BIN)
# ---- Setup CPack --------
# General setup
set(CPACK_PACKAGE_NAME memgraph)
set(CPACK_PACKAGE_VENDOR "Memgraph Ltd.")
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY
"High performance, in-memory, transactional graph database")
set(CPACK_PACKAGE_VERSION_MAJOR ${memgraph_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${memgraph_VERSION_MINOR})
set(CPACK_PACKAGE_VERSION_PATCH ${memgraph_VERSION_PATCH})
set(CPACK_PACKAGE_VERSION_TWEAK ${memgraph_VERSION_TWEAK})
set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${memgraph_VERSION}-${COMMIT_HASH}${CPACK_SYSTEM_NAME})
# DEB specific
# Instead of using "name <email>" format, we use "email (name)" to prevent
# errors due to full stop, '.' at the end of "Ltd". (See: RFC 822)
set(CPACK_DEBIAN_PACKAGE_MAINTAINER "tech@memgraph.com (Memgraph Ltd.)")
set(CPACK_DEBIAN_PACKAGE_SECTION non-free/database)
set(CPACK_DEBIAN_PACKAGE_HOMEPAGE https://memgraph.com)
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA
"${CMAKE_SOURCE_DIR}/release/debian/conffiles;"
"${CMAKE_SOURCE_DIR}/release/debian/copyright;"
"${CMAKE_SOURCE_DIR}/release/debian/prerm;"
"${CMAKE_SOURCE_DIR}/release/debian/postrm;"
"${CMAKE_SOURCE_DIR}/release/debian/postinst;")
set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON)
# Description formatting is important, summary must be followed with a newline and 1 space.
set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "${CPACK_PACKAGE_DESCRIPTION_SUMMARY}
Contains Memgraph, the graph database. It aims to deliver developers the
speed, simplicity and scale required to build the next generation of
applications driver by real-time connected data.")
# Add `openssl` package to dependencies list. Used to generate SSL certificates.
set(CPACK_DEBIAN_PACKAGE_DEPENDS "openssl (>= 1.1.0)")
# RPM specific
set(CPACK_RPM_PACKAGE_URL https://memgraph.com)
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION
/var /var/lib /var/log /etc/logrotate.d
/lib /lib/systemd /lib/systemd/system /lib/systemd/system/memgraph.service)
set(CPACK_RPM_PACKAGE_REQUIRES_PRE "shadow-utils")
# NOTE: user specfile has a bug in cmake 3.7.2, this needs to be patched
# manually in: ~/cmake/share/cmake-3.7/Modules/CPackRPM.cmake line 2273
# Or newer cmake version used
set(CPACK_RPM_USER_BINARY_SPECFILE "${CMAKE_SOURCE_DIR}/release/rpm/memgraph.spec.in")
# Description formatting is important, no line must be greater than 80 characters.
set(CPACK_RPM_PACKAGE_DESCRIPTION "Contains Memgraph, the graph database.
It aims to deliver developers the speed, simplicity and scale required to build
the next generation of applications driver by real-time connected data.")
# Add `openssl` package to dependencies list. Used to generate SSL certificates.
set(CPACK_RPM_PACKAGE_REQUIRES "openssl >= 1.0.0")
# All variables must be set before including.
include(CPack)
# ---- End Setup CPack ----

View File

@ -1,127 +0,0 @@
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
- Demonstrating empathy and kindness toward other people
- Being respectful of differing opinions, viewpoints, and experiences
- Giving and gracefully accepting constructive feedback
- Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
- Focusing on what is best not just for us as individuals, but for the overall
community
Examples of unacceptable behavior include:
- The use of sexualized language or imagery, and sexual attention or advances of
any kind
- Trolling, insulting or derogatory comments, and personal or political attacks
- Public or private harassment
- Publishing other's private information, such as a physical or email address,
without their explicit permission
- Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
[contact@memgraph.com](contact@memgraph.com). All complaints will be reviewed
and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series of
actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within the
community.
## Attribution
This Code of Conduct is adapted from the Contributor Covenant, version 2.1,
available at
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html).
Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder][mozilla coc].
For answers to common questions about this code of conduct, see the FAQ at
[https://www.contributor-covenant.org/faq](https://www.contributor-covenant.org/faq).
Translations are available at
[https://www.contributor-covenant.org/translations](https://www.contributor-covenant.org/translations).

View File

@ -1,121 +0,0 @@
# How to contribute?
This is a general purpose guide for contributing to Memgraph. We're still
working out the kinks to make contributing to this project as easy and
transparent as possible, but we're not quite there yet. Hopefully, this document
makes the process for contributing clear and answers some questions that you may
have.
- [How to contribute?](#how-to-contribute)
- [Open development](#open-development)
- [Branch organization](#branch-organization)
- [Bugs & changes](#bugs--changes)
- [Where to find known issues?](#where-to-find-known-issues)
- [Proposing a change](#proposing-a-change)
- [Your first pull request](#your-first-pull-request)
- [Sending a pull request](#sending-a-pull-request)
- [Style guide](#style-guide)
- [How to get in touch?](#how-to-get-in-touch)
- [Code of Conduct](#code-of-conduct)
- [License](#license)
- [Attribution](#attribution)
## Open development
All work on Memgraph is done via [GitHub](https://github.com/memgraph/memgraph).
Both core team members and external contributors send pull requests which go
through the same review process.
## Branch organization
Most pull requests should target the [`master
branch`](https://github.com/memgraph/memgraph/tree/master). We only use separate
branches for developing new features and fixing bugs before they are merged with
`master`. We do our best to keep `master` in good shape, with all tests passing.
Code that lands in `master` must be compatible with the latest stable release.
It may contain additional features but no breaking changes if it's not
absolutely necessary. We should be able to release a new minor version from the
tip of `master` at any time.
## Bugs & changes
### Where to find known issues?
We are using [GitHub Issues](https://github.com/memgraph/memgraph/issues) for
our public bugs. We keep a close eye on this and try to make it clear when we
have an internal fix in progress. Before filing a new task, try to make sure
your problem doesn't already exist.
### Proposing a change
If you intend to change the public API, or make any non-trivial changes to the
implementation, we recommend [filing an
issue](https://github.com/memgraph/memgraph/issues/new). This lets us reach an
agreement on your proposal before you put significant effort into it.
If you're only fixing a bug, it's fine to submit a pull request right away but
we still recommend to file an issue detailing what you're fixing. This is
helpful in case we don't accept that specific fix but want to keep track of the
issue.
### Your first pull request
Working on your first Pull Request? You can learn how from this free video
series:
**[How to Contribute to an Open Source Project on
GitHub](https://app.egghead.io/courses/how-to-contribute-to-an-open-source-project-on-github)**
If you decide to fix an issue, please be sure to check the comment thread in
case somebody is already working on a fix. If nobody is working on it at the
moment, please leave a comment stating that you intend to work on it so other
people don't accidentally duplicate your effort.
If somebody claims an issue but doesn't follow up for more than two weeks, it's
fine to take it over but you should still leave a comment.
### Sending a pull request
The core team is monitoring for pull requests. We will review your pull request
and either merge it, request changes to it, or close it with an explanation.
**Before submitting a pull request,** please make sure the following is done:
1. Fork [the repository](https://github.com/memgraph/memgraph) and create your
branch from `master`.
2. If you've fixed a bug or added code that should be tested, add tests!
3. Use the formatter `clang-format` for C/C++ code and `flake8` for Python code.
`clang-format` will automatically detect the `.clang-format` file in the root
directory while `flake8` can be used with the default configuration.
### Style guide
Memgraph uses the [Google Style
Guide](https://google.github.io/styleguide/cppguide.html) for C++ in most of its
code. You should follow them whenever writing new code.
## How to get in touch?
Aside from communicating directly via Pull Requests and Issues, the Memgraph
Community [Discord Server](https://discord.gg/memgraph) is the best place for
conversing with project maintainers and other community members.
## [Code of Conduct](https://github.com/memgraph/memgraph/blob/master/CODE_OF_CONDUCT.md)
Memgraph has adopted the [Contributor
Covenant](https://www.contributor-covenant.org/) as its Code of Conduct, and we
expect project participants to adhere to it. Please read [the full
text](https://github.com/memgraph/memgraph/blob/master/CODE_OF_CONDUCT.md) so
that you can understand what actions will and will not be tolerated.
## License
By contributing to Memgraph, you agree that your contributions will be licensed
under the [Memgraph licensing
scheme](https://github.com/memgraph/memgraph/blob/master/LICENSE).
## Attribution
This Contributing guide is adapted from the **React.js Contributing guide**
available at
[https://reactjs.org/docs/how-to-contribute.html](https://reactjs.org/docs/how-to-contribute.html).

View File

@ -51,7 +51,7 @@ PROJECT_BRIEF = "The World's Most Powerful Graph Database"
# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
# the logo to the output directory.
PROJECT_LOGO = docs/doxygen/memgraph_logo.png
PROJECT_LOGO = Doxylogo.png
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
# into which the generated documentation will be written. If a relative path is
@ -839,6 +839,7 @@ EXCLUDE_PATTERNS += */Testing/*
EXCLUDE_PATTERNS += */tests/*
EXCLUDE_PATTERNS += */dist/*
EXCLUDE_PATTERNS += */tools/*
EXCLUDE_PATTERNS += */customers/*
# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
# (namespaces, classes, functions, etc.) that should be excluded from the

View File

Before

Width:  |  Height:  |  Size: 6.6 KiB

After

Width:  |  Height:  |  Size: 6.6 KiB

View File

@ -1,5 +0,0 @@
Source code in this repository is variously licensed under the Business Source
License 1.1 (BSL), the Memgraph Enterprise License (MEL). A copy of each licence
can be found in the licences directory. Source code in a given file is licensed
under the BSL and the copyright belongs to The Memgraph Authors unless
otherwise noted at th beginning of the file.

191
README.md
View File

@ -1,177 +1,24 @@
<p align="center">
<img src="https://public-assets.memgraph.com/github-readme-images/github-memgraph-repo-banner.png">
</p>
# memgraph
---
Memgraph is an ACID compliant high performance transactional distributed
in-memory graph database featuring runtime native query compiling, lock free
data structures, multi-version concurrency control and asynchronous IO.
<p align="center">
<a href="https://github.com/memgraph/memgraph/blob/master/licenses/APL.txt">
<img src="https://img.shields.io/badge/license-APL-green" alt="license" title="license"/>
</a>
<a href="https://github.com/memgraph/memgraph/blob/master/licenses/BSL.txt">
<img src="https://img.shields.io/badge/license-BSL-yellowgreen" alt="license" title="license"/>
</a>
<a href="https://github.com/memgraph/memgraph/blob/master/licenses/MEL.txt" alt="Documentation">
<img src="https://img.shields.io/badge/license-MEL-yellow" alt="license" title="license"/>
</a>
</p>
## dependencies
<p align="center">
<a href="https://github.com/memgraph/memgraph">
<img src="https://img.shields.io/github/actions/workflow/status/memgraph/memgraph/release_debian10.yaml?branch=master&label=build%20and%20test&logo=github"/>
</a>
<a href="https://memgraph.com/docs/" alt="Documentation">
<img src="https://img.shields.io/badge/documentation-Memgraph-orange" />
</a>
</p>
Memgraph can be compiled using any modern c++ compiler. It mostly relies on
the standard template library, however, some things do require external
libraries.
<p align="center">
<a href="https://memgr.ph/join-discord">
<img src="https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"/>
</a>
</p>
Some code contains linux-specific libraries and the build is only supported
on a 64 bit linux kernel.
## :clipboard: Description
Memgraph is an open source graph database built for real-time streaming and
compatible with Neo4j. Whether you're a developer or a data scientist with
interconnected data, Memgraph will get you the immediate actionable insights
fast.
Memgraph directly connects to your streaming infrastructure. You can ingest data
from sources like Kafka, SQL, or plain CSV files. Memgraph provides a standard
interface to query your data with Cypher, a widely-used and declarative query
language that is easy to write, understand and optimize for performance. This is
achieved by using the property graph data model, which stores data in terms of
objects, their attributes, and the relationships that connect them. This is a
natural and effective way to model many real-world problems without relying on
complex SQL schemas.
Memgraph is implemented in C/C++ and leverages an in-memory first architecture
to ensure that youre getting the [best possible
performance](http://memgraph.com/benchgraph) consistently and without surprises.
Its also ACID-compliant and highly available.
## :zap: Features
- Run Python, Rust, and C/C++ code natively, check out the
[MAGE](https://github.com/memgraph/mage) graph algorithm library
- Native support for machine learning
- Streaming support
- Replication
- Authentication and authorization
- ACID compliance
## :video_game: Memgraph Playground
You don't need to install anything to try out Memgraph. Check out
our **[Memgraph Playground](https://playground.memgraph.com/)** sandboxes in
your browser.
<p align="left">
<a href="https://playground.memgraph.com/">
<img width="450px" alt="Memgraph Playground" src="https://download.memgraph.com/asset/github/memgraph/memgraph-playground.png">
</a>
</p>
## :floppy_disk: Download & Install
### Windows
[![Windows](https://img.shields.io/badge/Windows-Docker-0078D6?style=for-the-badge&logo=windows&logoColor=white)](https://memgraph.com/docs/memgraph/install-memgraph-on-windows-docker)
[![Windows](https://img.shields.io/badge/Windows-WSL-0078D6?style=for-the-badge&logo=windows&logoColor=white)](https://memgraph.com/docs/memgraph/install-memgraph-on-windows-wsl)
### macOS
[![macOS](https://img.shields.io/badge/macOS-Docker-000000?style=for-the-badge&logo=macos&logoColor=F0F0F0)](https://memgraph.com/docs/memgraph/install-memgraph-on-macos-docker)
[![macOS](https://img.shields.io/badge/lima-AACF41?style=for-the-badge&logo=macos&logoColor=F0F0F0)](https://memgraph.com/docs/memgraph/install-memgraph-on-ubuntu)
### Linux
[![Linux](https://img.shields.io/badge/Linux-Docker-FCC624?style=for-the-badge&logo=linux&logoColor=black)](https://memgraph.com/docs/memgraph/install-memgraph-on-linux-docker)
[![Debian](https://img.shields.io/badge/Debian-D70A53?style=for-the-badge&logo=debian&logoColor=white)](https://memgraph.com/docs/memgraph/install-memgraph-on-debian)
[![Ubuntu](https://img.shields.io/badge/Ubuntu-E95420?style=for-the-badge&logo=ubuntu&logoColor=white)](https://memgraph.com/docs/memgraph/install-memgraph-on-ubuntu)
[![Cent OS](https://img.shields.io/badge/cent%20os-002260?style=for-the-badge&logo=centos&logoColor=F0F0F0)](https://memgraph.com/docs/memgraph/install-memgraph-from-rpm)
[![Fedora](https://img.shields.io/badge/fedora-0B57A4?style=for-the-badge&logo=fedora&logoColor=F0F0F0)](https://memgraph.com/docs/memgraph/install-memgraph-from-rpm)
[![RedHat](https://img.shields.io/badge/redhat-EE0000?style=for-the-badge&logo=redhat&logoColor=F0F0F0)](https://memgraph.com/docs/memgraph/install-memgraph-from-rpm)
You can find the binaries and Docker images on the [Download
Hub](https://memgraph.com/download) and the installation instructions in the
[official documentation](https://memgraph.com/docs/memgraph/installation).
## :cloud: Memgraph Cloud
Check out [Memgraph Cloud](https://memgraph.com/docs/memgraph-cloud) - a cloud service fully managed on AWS and available in 6 geographic regions around the world. Memgraph Cloud allows you to create projects with Enterprise instances of MemgraphDB from your browser.
<p align="left">
<a href="https://memgraph.com/docs/memgraph-cloud">
<img width="450px" alt="Memgraph Cloud" src="https://public-assets.memgraph.com/memgraph-gifs%2Fcloud.gif">
</a>
</p>
## :link: Connect to Memgraph
[Connect to the database](https://memgraph.com/docs/memgraph/connect-to-memgraph) using Memgraph Lab, mgconsole, various drivers (Python, C/C++ and others) and WebSocket.
### :microscope: Memgraph Lab
Visualize graphs and play with queries to understand your data. [Memgraph Lab](https://memgraph.com/docs/memgraph-lab) is a user interface that helps you explore and manipulate the data stored in Memgraph. Visualize graphs, execute ad hoc queries, and optimize their performance.
<p align="left">
<a href="https://memgraph.com/docs/memgraph-lab">
<img width="450px" alt="Memgraph Cloud" src="https://public-assets.memgraph.com/memgraph-gifs%2Flab.gif">
</a>
</p>
## :file_folder: Import data
[Import data](https://memgraph.com/docs/memgraph/import-data) into Memgraph using Kafka, RedPanda or Pulsar streams, CSV and JSON files, or Cypher commands.
## :bookmark_tabs: Documentation
The Memgraph documentation is available at
[memgraph.com/docs](https://memgraph.com/docs).
## :question: Configuration
Command line options that Memgraph accepts are available in the [reference
guide](https://memgraph.com/docs/memgraph/reference-guide/configuration).
## :trophy: Contributing
Welcome to the heart of Memgraph development! We're on a mission to supercharge Memgraph, making it faster, more user-friendly, and even more powerful. We owe a big thanks to our fantastic community of contributors who help us fix bugs and bring incredible improvements to life. If you're passionate about databases and open source, here's your chance to make a difference!
### Explore Memgraph Internals
Interested in the nuts and bolts of Memgraph? Our [internals documentation](https://memgraph.notion.site/Memgraph-Internals-12b69132d67a417898972927d6870bd2) is where you can uncover the inner workings of Memgraph's architecture, learn how to build the project from scratch, and discover the secrets of effective contributions. Dive deep into the database!
### Dive into the Contributing Guide
Ready to jump into the action? Explore our [contributing guide](CONTRIBUTING.md) to get the inside scoop on how we develop Memgraph. It's your roadmap for suggesting bug fixes and enhancements. Contribute your skills and ideas!
### Code of Conduct
Our commitment to a respectful and professional community is unwavering. Every participant in Memgraph is expected to adhere to a stringent Code of Conduct. Please carefully review [the complete text](CODE_OF_CONDUCT.md) to gain a comprehensive understanding of the behaviors that are both expected and explicitly prohibited.
We maintain a zero-tolerance policy towards any violations. Our shared commitment to this Code of Conduct ensures that Memgraph remains a place where integrity and excellence are paramount.
### :scroll: License
Memgraph Community is available under the [BSL
license](./licenses/BSL.txt).</br> Memgraph Enterprise is available under the
[MEL license](./licenses/MEL.txt).
## :busts_in_silhouette: Community
- :purple_heart: [**Discord**](https://discord.gg/memgraph)
- :ocean: [**Stack Overflow**](https://stackoverflow.com/questions/tagged/memgraphdb)
- :bird: [**Twitter**](https://twitter.com/memgraphdb)
- :movie_camera:
[**YouTube**](https://www.youtube.com/channel/UCZ3HOJvHGxtQ_JHxOselBYg)
<p align="center">
<a href="#">
<img src="https://img.shields.io/badge/⬆️ back_to_top_⬆-white" alt="Back to top" title="Back to top"/>
</a>
</p>
* linux
* clang 3.8 (good c++11 support, especially lock free atomics)
* antlr (compiler frontend)
* cppitertools
* fmt format
* google benchmark
* google test
* glog
* gflags

39
apollo_archives.py Executable file
View File

@ -0,0 +1,39 @@
#!/usr/bin/env python3
import json
import os
import re
import subprocess
import sys
# paths
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
BUILD_OUTPUT_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "build_release", "output"))
# helpers
def run_cmd(cmd, cwd):
return subprocess.run(cmd, cwd=cwd, check=True,
stdout=subprocess.PIPE).stdout.decode("utf-8")
# check project
if re.search(r"release", os.environ.get("PROJECT", "")) is None:
print(json.dumps([]))
sys.exit(0)
# generate archive
deb_name = run_cmd(["find", ".", "-maxdepth", "1", "-type", "f",
"-name", "memgraph*.deb"], BUILD_OUTPUT_DIR).split("\n")[0][2:]
arch = run_cmd(["dpkg", "--print-architecture"], BUILD_OUTPUT_DIR).split("\n")[0]
version = deb_name.split("-")[1]
# generate Debian package file name as expected by Debian Policy
standard_deb_name = "memgraph_{}-1_{}.deb".format(version, arch)
archive_path = os.path.relpath(os.path.join(BUILD_OUTPUT_DIR,
deb_name), SCRIPT_DIR)
archives = [{
"name": "Release (deb package)",
"archive": archive_path,
"filename": standard_deb_name,
}]
print(json.dumps(archives, indent=4, sort_keys=True))

19
apollo_archives.yaml Normal file
View File

@ -0,0 +1,19 @@
- name: Binaries
archive:
- build_debug/memgraph
- build_release/memgraph
- build_release/tools/src/mg_import_csv
- config
filename: binaries.tar.gz
- name: Doxygen documentation
cd: docs/doxygen/html
archive:
- .
filename: documentation.tar.gz
host: true
- name: Release (user docs)
project: ^NEVER$ # TODO (mferencevic): replace with 'release' once user documentation creation is fixed (couscous -> pandoc)
archive: docs/user_technical/docs.tar.gz
filename: release_user_docs.tar.gz

85
apollo_build.yaml Normal file
View File

@ -0,0 +1,85 @@
- name: Diff build
project: ^mg-master-diff$
commands: |
# Copy untouched repository to parent folder.
cd ..
cp -r memgraph parent
cd memgraph
# Initialize and create documentation.
TIMEOUT=1200 ./init
doxygen Doxyfile
# Remove default build directory.
rm -r build
# Build debug binaries.
mkdir build_debug
cd build_debug
cmake ..
TIMEOUT=1200 make -j$THREADS
# Build coverage binaries.
cd ..
mkdir build_coverage
cd build_coverage
cmake -DTEST_COVERAGE=ON ..
TIMEOUT=1200 make -j$THREADS memgraph__unit
# Build release binaries.
cd ..
mkdir build_release
cd build_release
cmake -DCMAKE_BUILD_TYPE=release ..
TIMEOUT=1200 make -j$THREADS memgraph tools memgraph__macro_benchmark memgraph__stress memgraph__manual__card_fraud_generate_snapshot
# Checkout to parent commit and initialize.
cd ../../parent
git checkout HEAD~1
TIMEOUT=1200 ./init
# Build parent release binaries.
mkdir build_release
cd build_release
cmake -DCMAKE_BUILD_TYPE=release ..
TIMEOUT=1200 make -j$THREADS memgraph memgraph__macro_benchmark
# release build is the default one
- name: Release build
commands: |
TIMEOUT=1200 ./init
doxygen Doxyfile
# Remove default build directory.
rm -r build
# Build debug binaries.
mkdir build_debug
cd build_debug
cmake ..
TIMEOUT=1200 make -j$THREADS
# Build coverage binaries.
cd ..
mkdir build_coverage
cd build_coverage
cmake -DTEST_COVERAGE=ON ..
TIMEOUT=1200 make -j$THREADS memgraph__unit
# Build release binaries.
cd ..
mkdir build_release
cd build_release
cmake -DCMAKE_BUILD_TYPE=Release -DUSE_READLINE=OFF ..
TIMEOUT=1200 make -j$THREADS
# Create Debian package.
mkdir output
cd output
cpack -G DEB --config ../CPackConfig.cmake
# Create user technical documentation for community site.
cd ../../docs/user_technical
# TODO (mferencevic): uncomment this once couscous is replaced with pandoc
#./bundle_community

View File

@ -1,90 +0,0 @@
#.rst:
# FindSeccomp
# -----------
#
# Try to locate the libseccomp library.
# If found, this will define the following variables:
#
# ``Seccomp_FOUND``
# True if the seccomp library is available
# ``Seccomp_INCLUDE_DIRS``
# The seccomp include directories
# ``Seccomp_LIBRARIES``
# The seccomp libraries for linking
#
# If ``Seccomp_FOUND`` is TRUE, it will also define the following
# imported target:
#
# ``Seccomp::Seccomp``
# The Seccomp library
#
# Since 5.44.0.
#=============================================================================
# Copyright (c) 2017 Martin Flöser <mgraesslin@kde.org>
# Copyright (c) 2017 David Kahles <david.kahles96@gmail.com>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=============================================================================
find_package(PkgConfig QUIET)
pkg_check_modules(PKG_Libseccomp QUIET libseccomp)
find_path(Seccomp_INCLUDE_DIRS
NAMES
seccomp.h
HINTS
${PKG_Libseccomp_INCLUDE_DIRS}
)
find_library(Seccomp_LIBRARIES
NAMES
seccomp
HINTS
${PKG_Libseccomp_LIBRARY_DIRS}
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Seccomp
FOUND_VAR
Seccomp_FOUND
REQUIRED_VARS
Seccomp_LIBRARIES
Seccomp_INCLUDE_DIRS
)
if (Seccomp_FOUND AND NOT TARGET Seccomp::Seccomp)
add_library(Seccomp::Seccomp UNKNOWN IMPORTED)
set_target_properties(Seccomp::Seccomp PROPERTIES
IMPORTED_LOCATION "${Seccomp_LIBRARIES}"
INTERFACE_INCLUDE_DIRECTORIES "${Seccomp_INCLUDE_DIRS}"
)
endif()
mark_as_advanced(Seccomp_LIBRARIES Seccomp_INCLUDE_DIRS)
include(FeatureSummary)
set_package_properties(Seccomp PROPERTIES
URL "https://github.com/seccomp/libseccomp"
DESCRIPTION "The enhanced seccomp library."
)

View File

@ -1,67 +0,0 @@
# Try to find jemalloc library
#
# Use this module as:
# find_package(jemalloc)
#
# or:
# find_package(jemalloc REQUIRED)
#
# This will define the following variables:
#
# JEMALLOC_FOUND True if the system has the jemalloc library.
# Jemalloc_INCLUDE_DIRS Include directories needed to use jemalloc.
# Jemalloc_LIBRARIES Libraries needed to link to jemalloc.
#
# The following cache variables may also be set:
#
# Jemalloc_INCLUDE_DIR The directory containing jemalloc/jemalloc.h.
# Jemalloc_LIBRARY The path to the jemalloc static library.
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(jemalloc
FOUND_VAR JEMALLOC_FOUND
REQUIRED_VARS
JEMALLOC_LIBRARY
JEMALLOC_INCLUDE_DIR
)
if(JEMALLOC_INCLUDE_DIR)
message(STATUS "Found jemalloc include dir: ${JEMALLOC_INCLUDE_DIR}")
else()
message(WARNING "jemalloc not found!")
endif()
if(JEMALLOC_LIBRARY)
message(STATUS "Found jemalloc library: ${JEMALLOC_LIBRARY}")
else()
message(WARNING "jemalloc library not found!")
endif()
if(JEMALLOC_FOUND)
set(Jemalloc_LIBRARIES ${JEMALLOC_LIBRARY})
set(Jemalloc_INCLUDE_DIRS ${JEMALLOC_INCLUDE_DIR})
else()
if(Jemalloc_FIND_REQUIRED)
message(FATAL_ERROR "Cannot find jemalloc!")
else()
message(WARNING "jemalloc is not found!")
endif()
endif()
if(JEMALLOC_FOUND AND NOT TARGET Jemalloc::Jemalloc)
message(STATUS "JEMALLOC NOT TARGET")
add_library(Jemalloc::Jemalloc UNKNOWN IMPORTED)
set_target_properties(Jemalloc::Jemalloc
PROPERTIES
IMPORTED_LOCATION "${JEMALLOC_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${JEMALLOC_INCLUDE_DIR}"
)
endif()
mark_as_advanced(
JEMALLOC_INCLUDE_DIR
JEMALLOC_LIBRARY
)

19
config/benchmarking.conf Normal file
View File

@ -0,0 +1,19 @@
# MEMGRAPH DEFAULT BENCHMARKING CONFIG
# NOTE: all paths are relative to the run folder
# (where the executable is run)
# no durability
--durability-enabled=false
--snapshot-on-exit=false
--db-recover-on-startup=false
# no GC
--gc-cycle-sec=-1
--skiplist_gc_interval=-1
# no query execution time limit
--query_execution_time_sec=-1
# number of workers
--num-workers=1

122
config/community.conf Normal file
View File

@ -0,0 +1,122 @@
# Default Memgraph Configuration
#
# This is the default configuration for memgraph. Settings from this file will
# be overridden by a configuration file in '$HOME/.memgraph/config', so you can
# keep this file intact. Additional configuration can be specified in a file
# pointed to by 'MEMGRAPH_CONFIG' environment variable or by passing arguments
# on the command line.
#
# Each configuration setting is of the form: '--setting-name=value'.
## Database
# IP address the server should listen on.
--interface=0.0.0.0
# Port the server should listen on.
--port=7687
# Path to a SSL certificate file that should be used.
--cert-file=/etc/memgraph/ssl/cert.pem
# Path to a SSL key file that should be used.
--key-file=/etc/memgraph/ssl/key.pem
# Number of workers used by the Memgraph server. By default, this will be the
# number of processing units available on the machine.
# --num-workers=8
# Interval, in seconds, when the garbage collection (GC) should run. GC is used
# for releasing memory that is no longer needed. For example, deleted graph
# elements which cannot be seen by any running or new transactions. If set to
# -1 the GC will never run (use with caution, memory will never get released).
--gc-cycle-sec=30
# If Memgraph detects there is less available RAM than the given number in MB,
# it will log a warning.
--memory-warning-threshold=1024
# The telemetry collects data about the machine that is executing the database
# (CPU, Memory, OS and Kernel Information) and data about the database runtime
# (CPU usage, Memory usage, Vertices and Edges count). It is used to provide a
# better product, easy to disable and does not collect any sensitive data.
--telemetry-enabled=true
# Memgraph offers an option to store a certain amount of data on a disk. More
# precisely, the user can pass a list of properties they wish to keep stored on
# a disk. The property names have to be separated with a comma. An example
# would be --properties-on-disk=biography,summary.
#--properties-on-disk=
## Ids
# Memgraph can generate an identifier for each vertex or edge. The
# generated ids are returned with the id function.
# Memgraph can generate an identifier for each vertex.
--generate-vertex-ids=true
# Memgraph can generate an identifier for each edge.
--generate-edge-ids=true
## Query
#
# Various settings related to openCypher query execution.
# Maximum allowed query execution time, in seconds. Any queries exceeding this
# limit will be aborted. Setting to -1 removes the limit.
--query-execution-time-sec=30
# Cache generated query execution plans. This speeds up planning repeated
# queries which produce multiple complex execution plans. The downside is that
# some executions may use inferior plans if the database state changed. To
# disable caching, set to false.
#--query-plan-cache=false
# Time to live for cached query plans, in seconds. This tries to minimize the
# downside of caching by evicting old plans after the given time.
#--query-plan-cache-ttl=60
## Durability
#
# Memgraph can store database state to persistent storage. Two mechanisms
# are used: snapshots store the total current database state while write-ahead
# logs store small changes incrementally. They are used in tandem to provide
# fast and storage-efficient persistence. Some aspects of snapshot taking
# are configurable, while write-ahead logging is pre-configured for optimal
# performance.
--durability-enabled=true
# Path to the directory where snapshots and write-ahead log files will be stored.
--durability-directory=/var/lib/memgraph/durability
# Recover the database on startup.
--db-recover-on-startup=true
# Interval of taking snapshots, in seconds. If set to -1, the snapshot feature
# will be turned off.
--snapshot-cycle-sec=300
# Create a snapshot when closing Memgraph.
--snapshot-on-exit=true
# Maximum number of kept snapshots. Old snapshots will be deleted to make room
# for new ones. If set to -1, the number of kept snapshots is unlimited.
--snapshot-max-retained=3
## Logging
# Path to where the log should be stored.
--log-file=/var/log/memgraph/memgraph.log
# If true, log messages will go to stderr in addition to logfiles.
#--also-log-to-stderr=true
## Additional Configuration Inclusion
# Include additional configuration from this file. Settings with the same name
# will override previously read values. Note, that reading the configuration,
# which called '--flag-file' will continue after inclusion. Therefore, settings
# after '--flag-file' may override the included ones.
#--flag-file=another.conf

View File

@ -0,0 +1,15 @@
# MEMGRAPH DEFAULT DURABILITY STRESS TESTING CONFIG
# NOTE: all paths are relative to the run folder
# (where the executable is run)
# enable durability
--durability-enabled=true
--snapshot-cycle-sec=5
--snapshot-on-exit=false
--snapshot-max-retained=2
--db-recover-on-startup=true
# increase query timeout (10 min)
--query-execution-time-sec=600

View File

@ -1,133 +0,0 @@
header: >-
Memgraph Configuration
This is the main configuration file for Memgraph. You can modify this file to
suit your specific needs. Additional configuration can be specified by
including another configuration file, in a file pointed to by the
'MEMGRAPH_CONFIG' environment variable or by passing arguments on the command
line.
Each configuration setting is in the form: '--setting-name=value'.
footer: >-
Additional Configuration Inclusion
You can include additional configuration files from this file. Additional
files are processed after this file. Settings that are set in the additional
files will override previously set values. Additional configuration files are
specified with the '--flag-file' flag.
Example:
--flag-file=another.conf
modifications:
# Each modification should consist of the following parameters:
# * name: the name of the flag that should be modified (with underscores)
# [string]
# * value: the value that should be set instead of the binary provided
# default value [string]
# * override: set to `true` to uncomment the config option by default
# [boolean]
- name: "data_directory"
value: "/var/lib/memgraph"
override: true
- name: "log_file"
value: "/var/log/memgraph/memgraph.log"
override: true
- name: "log_level"
value: "WARNING"
override: true
- name: "bolt_num_workers"
value: ""
override: false
- name: "bolt_cert_file"
value: "/etc/memgraph/ssl/cert.pem"
override: false
- name: "bolt_key_file"
value: "/etc/memgraph/ssl/key.pem"
override: false
- name: "storage_properties_on_edges"
value: "true"
override: true
- name: "storage_recover_on_startup"
value: "true"
override: true
- name: "storage_snapshot_interval_sec"
value: "300"
override: true
- name: "storage_snapshot_on_exit"
value: "true"
override: true
- name: "storage_snapshot_retention_count"
value: "3"
override: true
- name: "storage_wal_enabled"
value: "true"
override: true
- name: "telemetry_enabled"
value: "true"
override: true
- name: "query_modules_directory"
value: "/usr/lib/memgraph/query_modules"
override: true
- name: "auth_module_executable"
value: "/usr/lib/memgraph/auth_module/example.py"
override: false
- name: "memory_limit"
value: "0"
override: true
- name: "isolation_level"
value: "SNAPSHOT_ISOLATION"
override: true
- name: "storage_mode"
value: "IN_MEMORY_TRANSACTIONAL"
override: true
- name: "allow_load_csv"
value: "true"
override: false
- name: "storage_parallel_index_recovery"
value: "false"
override: true
- name: "storage_parallel_schema_recovery"
value: "false"
override: true
- name: "storage_enable_schema_metadata"
value: "false"
override: true
- name: "query_callable_mappings_path"
value: "/etc/memgraph/apoc_compatibility_mappings.json"
override: true
undocumented:
- "flag_file"
- "also_log_to_stderr"
- "help"
- "help_xml"
- "version"
- "organization_name"
- "license_key"

View File

@ -1,124 +0,0 @@
#!/usr/bin/env python3
import argparse
import copy
import os
import subprocess
import sys
import textwrap
import xml.etree.ElementTree as ET
import yaml
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
CONFIG_FILE = os.path.join(SCRIPT_DIR, "flags.yaml")
WIDTH = 80
def wrap_text(s, initial_indent="# "):
return "\n#\n".join(
map(lambda x: textwrap.fill(x, WIDTH, initial_indent=initial_indent, subsequent_indent="# "), s.split("\n"))
)
def extract_flags(binary_path):
ret = {}
data = subprocess.run([binary_path, "--help-xml"], stdout=subprocess.PIPE).stdout.decode("utf-8")
# If something is printed out before the help output, it will break the the
# XML parsing -> filter out if something is not XML line because something
# can be logged before gflags output (e.g. during the global objects init).
# This gets called during memgraph build phase to generate default config
# file later installed under /etc/memgraph/memgraph.conf
# NOTE: Don't use \n in the gflags description strings.
# NOTE: Check here if gflags version changes because of the XML format.
data = "\n".join([line for line in data.split("\n") if line.startswith("<")])
root = ET.fromstring(data)
for child in root:
if child.tag == "usage" and child.text.lower().count("warning"):
raise Exception("You should set the usage message!")
if child.tag == "flag":
flag = {}
for elem in child:
flag[elem.tag] = elem.text if elem.text is not None else ""
flag["override"] = False
ret[flag["name"]] = flag
return ret
def apply_config_to_flags(config, flags):
flags = copy.deepcopy(flags)
for name in config["undocumented"]:
flags.pop(name)
for modification in config["modifications"]:
name = modification["name"]
if name not in flags:
print("WARNING: Flag '" + name + "' missing from binary!", file=sys.stderr)
continue
flags[name]["default"] = modification["value"]
flags[name]["override"] = modification["override"]
return flags
def extract_sections(flags):
sections = []
other = []
current_section = ""
current_flags = []
for name in sorted(flags.keys()):
section = name.split("_")[0]
if section == current_section:
current_flags.append(name)
else:
if len(current_flags) < 2:
other.extend(current_flags)
else:
sections.append((current_section, current_flags))
current_section = section
current_flags = [name]
if len(current_flags) < 2:
other.extend(current_flags)
else:
sections.append((current_section, current_flags))
sections.append(("other", other))
assert set(sum(map(lambda x: x[1], sections), [])) == set(
flags.keys()
), "The section extraction algorithm lost some flags!"
return sections
def generate_config_file(sections, flags):
ret = wrap_text(config["header"]) + "\n\n\n"
for section, section_flags in sections:
ret += wrap_text(section.capitalize(), initial_indent="## ") + "\n\n"
for name in section_flags:
flag = flags[name]
helpstr = flag["meaning"] + " [" + flag["type"] + "]"
ret += wrap_text(helpstr) + "\n"
prefix = "# " if not flag["override"] else ""
ret += prefix + "--" + flag["name"].replace("_", "-") + "=" + flag["default"] + "\n\n"
ret += "\n"
ret += wrap_text(config["footer"])
return ret.strip() + "\n"
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("memgraph_binary", help="path to Memgraph binary")
parser.add_argument("output_file", help="path where to store the generated Memgraph " "configuration file")
parser.add_argument("--config-file", default=CONFIG_FILE, help="path to generator configuration file")
args = parser.parse_args()
flags = extract_flags(args.memgraph_binary)
with open(args.config_file) as f:
config = yaml.safe_load(f)
flags = apply_config_to_flags(config, flags)
sections = extract_sections(flags)
data = generate_config_file(sections, flags)
dirname = os.path.dirname(args.output_file)
if dirname and not os.path.exists(dirname):
os.makedirs(dirname)
with open(args.output_file, "w") as f:
f.write(data)

View File

@ -1,26 +0,0 @@
{
"dbms.components": "mgps.components",
"apoc.util.validate": "mgps.validate",
"db.schema.nodeTypeProperties": "schema.NodeTypeOroperties",
"db.schema.relTypeProperties": "schema.RelTypeProperties",
"apoc.coll.contains": "collections.contains",
"apoc.coll.partition": "collections.partition",
"apoc.coll.toSet": "collections.to_set",
"apoc.coll.unionAll": "collections.unionAll",
"apoc.coll.removeAll": "collections.remove_all",
"apoc.coll.union": "collections.union",
"apoc.coll.sum": "collections.sum",
"apoc.coll.pairs": "collections.pairs",
"apoc.map.fromLists": "map.from_lists",
"apoc.map.removeKeys": "map.remove_keys",
"apoc.map.merge": "map.merge",
"apoc.create.nodes": "create.nodes",
"apoc.create.removeProperties": "create.remove_properties",
"apoc.create.node": "create.node",
"apoc.create.removeLabel": "create.remove_label",
"apoc.refactor.invert": "refactor.invert",
"apoc.refactor.cloneNode": "refactor.clone_node",
"apoc.refactor.cloneSubgraph": "refactor.clone_subgraph",
"apoc.refactor.cloneSubgraphFromPath": "refactor.clone_subgraph_from_path",
"apoc.label.exists": "label.exists"
}

15
config/stress.conf Normal file
View File

@ -0,0 +1,15 @@
# MEMGRAPH DEFAULT STRESS TESTING CONFIG
# NOTE: all paths are relative to the run folder
# (where the executable is run)
# enable durability
--durability-enabled=true
--snapshot-cycle-sec=600
--snapshot-on-exit=true
--snapshot-max-retained=1
--db-recover-on-startup=false
# increase query timeout (10 min)
--query-execution-time-sec=600

9
config/testing.conf Normal file
View File

@ -0,0 +1,9 @@
# MEMGRAPH DEFAULT TESTING CONFIG
# NOTE: all paths are relative to the run folder
# (where the executable is run)
# no durability
--durability-enabled=false
--snapshot-on-exit=false
--db-recover-on-startup=false

2
customers/CMakeLists.txt Normal file
View File

@ -0,0 +1,2 @@
project(mg_customers)
add_subdirectory(otto)

View File

@ -0,0 +1,70 @@
DISCLAIMER: this is just an initial test, graph might not resemble
the graph in the use case at all and the data might be completely
irrelevant.
We tried generating a few sample graphs from the vague description
given in the use case doc. Then we tried writing queries that would
solve the problem of updating nodes when a leaf value changes,
assuming all the internal nodes compute only the sum function.
We start by creating an index on `id` property to improve initial lookup
performance:
CREATE INDEX ON :Leaf(id)
Set values of all leafs to 1:
MATCH (u:Leaf) SET u.value = 1
Now we initialize the values of all other nodes in the graph:
MATCH (u) WHERE NOT u:Leaf SET u.value = 0
MATCH (u) WITH u
ORDER BY u.topological_index DESC
MATCH (u)-->(v) SET u.value = u.value + v.value
Change the value of a leaf:
MATCH (u:Leaf {id: "18"}) SET u.value = 10
We have to reset all the updated nodes to a neutral element:
MATCH (u:Leaf {id: "18"})<-[* bfs]-(v)
WHERE NOT v:Leaf SET v.value = 0
Finally, we recalculate their values in topological order:
MATCH (u:Leaf {id: "18"})<-[* bfs]-(v)
WITH v ORDER BY v.topological_index DESC
MATCH (v)-->(w) SET v.value = v.value + w.value
There are a few assumptions made worth pointing out.
* We are able to efficiently maintain topological order
of vertices in the graph.
* It is possible to accumulate the value of the function. Formally:
$$f(x_1, x_2, ..., x_n) = g(...(g(g(x_1, x_2), x_3), ...), x_n)$$
* There is a neutral element for the operation. However, this
assumption can be dropped by introducing an artificial neutral element.
Number of operations required is proportional to the sum of degrees of affected
nodes.
We generated graph with $10^5$ nodes ($20\ 000$ nodes in each layer), varied the
degree distribution in node layers and measured time for the query to execute:
| # | Root-Category-Group degree | Group-CustomGroup-Leaf degree | Time |
|:-:|:---------------------------:|:-----------------------------:|:---------:|
| 1 | [1, 10] | [20, 40] | ~1.1s |
| 2 | [1, 10] | [50, 100] | ~2.5s |
| 3 | [10, 50] | [50, 100] | ~3.3s |
Due to the structure of the graph, update of a leaf required update of almost
all the nodes in the graph so we don't show times required for initial graph
update and update after leaf change separately.
However, there is not enough info on the use case to make the test more
sophisticated.

View File

@ -0,0 +1,71 @@
---
title: "Elliott Management"
subtitle: "Proof of Concept Report"
header-title: "Elliott Management POC"
date: 2017-10-28
copyright: "©2017 Memgraph Ltd. All rights reserved."
titlepage: true
titlepage-color: FFFFFF
titlepage-text-color: 101010
titlepage-rule-color: 101010
titlepage-rule-height: 1
...
# Introduction
We tried generating a few sample graphs from the description given at
the in-person meetings. Then, we tried writing queries that would solve
the problem of updating nodes when a leaf value changes, assuming all the
internal nodes compute only the sum function.
# Technical details
We started by creating an index on `id` property to improve initial lookup
performance:
CREATE INDEX ON :Leaf(id)
Afther that, we set values of all leafs to 1:
MATCH (u:Leaf) SET u.value = 1
We then initialized the values of all other nodes in the graph:
MATCH (u) WHERE NOT u:Leaf SET u.value = 0
MATCH (u) WITH u
ORDER BY u.topological_index DESC
MATCH (u)-->(v) SET u.value = u.value + v.value
Leaf value change and update of affected values in the graph can
be done using three queries. To change the value of a leaf:
MATCH (u:Leaf {id: "18"}) SET u.value = 10
Then we had to reset all the affected nodes to the neutral element:
MATCH (u:Leaf {id: "18"})<-[* bfs]-(v)
WHERE NOT v:Leaf SET v.value = 0
Finally, we recalculated their values in topological order:
MATCH (u:Leaf {id: "18"})<-[* bfs]-(v)
WITH v ORDER BY v.topological_index DESC
MATCH (v)-->(w) SET v.value = v.value + w.value
There are a few assumptions necessary for the approach above to work.
* We are able to maintain topological order of vertices during graph
structure changes.
* It is possible to accumulate the value of the function. Formally:
$$f(x_1, x_2, ..., x_n) = g(...(g(g(x_1, x_2), x_3), ...), x_n)$$
* There is a neutral element for the operation. However, this
assumption can be dropped by introducing an artificial neutral element.
Above assumptions could be changed, relaxed or dropped, depending on the
specifics of the use case.
Number of operations required is proportional to the sum of degrees of affected
nodes.

View File

@ -0,0 +1,12 @@
CREATE INDEX ON :Leaf(id);
MATCH (u:Leaf) SET u.value = 1;
MATCH (u) WHERE NOT u:Leaf SET u.value = 0;
MATCH (u) WITH u
ORDER BY u.topological_index DESC
MATCH (u)-->(v) SET u.value = u.value + v.value;
MATCH (u:Leaf {id: "85000"}) SET u.value = 10;
MATCH (u:Leaf {id: "85000"})<-[* bfs]-(v)
WHERE NOT v:Leaf SET v.value = 0;
MATCH (u:Leaf {id: "85000"})<-[* bfs]-(v)
WITH v ORDER BY v.topological_index DESC
MATCH (v)-->(w) SET v.value = v.value + w.value;

125
customers/elliott/generate_dag Executable file
View File

@ -0,0 +1,125 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Generates a DAG from JSON spec in [config] and outputs nodes to
[filename]_nodes, and edges to [filename]_edges in format convertible
to Memgraph snapshot.
Here's an example JSON spec:
{
"layers": [
{
"name": "A",
"sublayers": 1,
"degree_lo": 1,
"degree_hi": 3,
"nodes": 4
},
{
"name": "B",
"sublayers": 3,
"degree_lo": 2,
"degree_hi": 3,
"nodes": 10
},
{
"name": "C",
"sublayers": 1,
"degree_lo": 1,
"degree_hi": 1,
"nodes": 5
}
]
}
Nodes from each layer will be randomly divided into sublayers. A node can
only have edges pointing to nodes in lower sublayers of the same layer, or
to nodes from the layer directly below it. Out-degree is chosen uniformly
random from [degree_lo, degree_hi] interval."""
import argparse
from itertools import accumulate
import json
import random
def _split_into_sum(n, k):
assert 1 <= n, "n should be at least 1"
assert k <= n, "k shouldn't be greater than n"
xs = [0] + sorted(random.sample(range(1, n), k-1)) + [n]
return [b - a for a, b in zip(xs, xs[1:])]
def generate_dag(graph_config, seed=None):
random.seed(seed)
nodes = []
edges = []
layer_lo = 1
for layer in graph_config:
sublayers = _split_into_sum(layer['nodes'], layer['sublayers'])
sub_range = accumulate([layer_lo] + sublayers)
layer['sublayer_range'] = list(sub_range)
nodes.extend([
(u, layer['name'])
for u in range(layer_lo, layer_lo + layer['nodes'])
])
layer_lo += layer['nodes']
edges = []
for layer, next_layer in zip(graph_config, graph_config[1:]):
degree_lo = layer['degree_lo']
degree_hi = layer['degree_hi']
sub_range = layer['sublayer_range']
sub_range_next = next_layer['sublayer_range']
layer_lo = sub_range[0]
next_layer_hi = sub_range_next[-1]
for sub_lo, sub_hi in zip(sub_range, sub_range[1:]):
for u in range(sub_lo, sub_hi):
num_edges = random.randint(degree_lo, degree_hi)
for _ in range(num_edges):
v = random.randint(sub_hi, next_layer_hi - 1)
edges.append((u, v))
for sub_lo, sub_hi in zip(sub_range_next, sub_range_next[1:]):
for u in range(sub_lo, sub_hi):
v = random.randint(layer_lo, sub_lo - 1)
edges.append((v, u))
return nodes, edges
if __name__ == '__main__':
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=__doc__)
parser.add_argument('config', type=str, help='graph config JSON file')
parser.add_argument('filename', type=str,
help='nodes will be stored to filename_nodes, '
'edges to filename_edges')
parser.add_argument('--seed', type=int,
help='seed for the random generator (default = '
'current system time)')
args = parser.parse_args()
with open(args.config, 'r') as f:
graph_config = json.loads(f.read())['layers']
nodes, edges = generate_dag(graph_config, seed=args.seed)
# print nodes into CSV file
with open('{}_nodes'.format(args.filename), 'w') as out:
out.write('nodeId:ID(Node),name,topological_index:Int,:LABEL\n')
for node_id, layer in nodes:
out.write('{0},{1}{0},{0},{1}\n'.format(node_id, layer))
# print edges into CSV file
with open('{}_edges'.format(args.filename), 'w') as out:
out.write(':START_ID(Node),:END_ID(Node),:TYPE\n')
for u, v in edges:
out.write('{},{},child\n'.format(u, v))

View File

@ -0,0 +1,39 @@
{
"layers": [
{
"name": "Root",
"sublayers": 1,
"degree_lo": 1,
"degree_hi": 10,
"nodes": 20000
},
{
"name": "Category",
"sublayers": 5,
"degree_lo": 1,
"degree_hi": 10,
"nodes": 20000
},
{
"name": "Group",
"sublayers": 1,
"degree_lo": 20,
"degree_hi": 40,
"nodes": 20000
},
{
"name": "CustomGroup",
"sublayers": 15,
"degree_lo": 20,
"degree_hi": 40,
"nodes": 20000
},
{
"name": "Leaf",
"sublayers": 1,
"degree_lo": 1,
"degree_hi": 1,
"nodes": 20000
}
]
}

View File

@ -0,0 +1,39 @@
{
"layers": [
{
"name": "Root",
"sublayers": 1,
"degree_lo": 1,
"degree_hi": 10,
"nodes": 20000
},
{
"name": "Category",
"sublayers": 5,
"degree_lo": 1,
"degree_hi": 10,
"nodes": 20000
},
{
"name": "Group",
"sublayers": 1,
"degree_lo": 50,
"degree_hi": 100,
"nodes": 20000
},
{
"name": "CustomGroup",
"sublayers": 15,
"degree_lo": 50,
"degree_hi": 100,
"nodes": 20000
},
{
"name": "Leaf",
"sublayers": 1,
"degree_lo": 50,
"degree_hi": 100,
"nodes": 20000
}
]
}

View File

@ -0,0 +1,39 @@
{
"layers": [
{
"name": "Root",
"sublayers": 1,
"degree_lo": 10,
"degree_hi": 50,
"nodes": 20000
},
{
"name": "Category",
"sublayers": 5,
"degree_lo": 10,
"degree_hi": 50,
"nodes": 20000
},
{
"name": "Group",
"sublayers": 1,
"degree_lo": 50,
"degree_hi": 100,
"nodes": 20000
},
{
"name": "CustomGroup",
"sublayers": 15,
"degree_lo": 50,
"degree_hi": 100,
"nodes": 20000
},
{
"name": "Leaf",
"sublayers": 1,
"degree_lo": 50,
"degree_hi": 100,
"nodes": 20000
}
]
}

View File

@ -0,0 +1,3 @@
set(exec_name customers_otto_parallel_connected_components)
add_executable(${exec_name} parallel_connected_components.cpp)
target_link_libraries(${exec_name} memgraph_lib)

View File

@ -0,0 +1,118 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This script attempts to evaluate the feasibility of using Memgraph for
Otto group's usecase. The usecase is finding connected componentes in
a large, very sparse graph (cca 220M nodes, 250M edges), based on a dynamic
inclusion / exclusion of edges (w.r.t variable parameters and the source node
type).
This implementation defines a random graph with the given number of nodes
and edges and looks for connected components using breadth-first expansion.
Edges are included / excluded based on a simple expression, only demonstrating
possible usage.
"""
from argparse import ArgumentParser
import logging
from time import time
from collections import defaultdict
from math import log2
from random import randint
from neo4j.v1 import GraphDatabase
log = logging.getLogger(__name__)
def generate_graph(sess, node_count, edge_count):
# An index that will speed-up edge creation.
sess.run("CREATE INDEX ON :Node(id)").consume()
# Create the given number of nodes with a randomly selected type from:
# [0.5, 1.5, 2.5].
sess.run(("UNWIND range(0, {} - 1) AS id CREATE "
"(:Node {{id: id, type: 0.5 + tointeger(rand() * 3)}})").format(
node_count)).consume()
# Create the given number of edges, each with a 'value' property of
# a random [0, 3.0) float. Each edge connects two random nodes, so the
# expected node degree is (edge_count * 2 / node_count). Generate edges
# so the connectivity is non-uniform (to produce connected components of
# various sizes).
sess.run(("UNWIND range(0, {0} - 1) AS id WITH id "
"MATCH (from:Node {{id: tointeger(rand() * {1})}}), "
"(to:Node {{id: tointeger(rand() * {1} * id / {0})}}) "
"CREATE (from)-[:Edge {{value: 3 * rand()}}]->(to)").format(
edge_count, node_count)).consume()
def get_connected_ids(sess, node_id):
# Matches a node with the given ID and returns the IDs of all the nodes
# it is connected to. Note that within the BFS lambda expression there
# is an expression used to filter out edges expanded over.
return sess.run((
"MATCH (from:Node {{id: {}}})-"
"[*bfs (e, n | abs(from.type - e.value) < 0.80)]-(d) "
"RETURN count(*) AS c").format(node_id)).data()[0]['c']
def parse_args():
parser = ArgumentParser(description=__doc__)
parser.add_argument('--endpoint', type=str, default='localhost:7687',
help='Memgraph instance endpoint. ')
parser.add_argument('--node-count', type=int, default=1000,
help='The number of nodes in the graph')
parser.add_argument('--edge-count', type=int, default=1000,
help='The number of edges in the graph')
parser.add_argument('--sample-count', type=int, default=None,
help='The number of samples to take')
return parser.parse_args()
def main():
args = parse_args()
logging.basicConfig(level=logging.INFO)
log.info("Memgraph - Otto test database generator")
logging.getLogger("neo4j").setLevel(logging.WARNING)
driver = GraphDatabase.driver(
'bolt://' + args.endpoint,
auth=("ignored", "ignored"),
encrypted=False)
sess = driver.session()
sess.run("MATCH (n) DETACH DELETE n").consume()
log.info("Generating graph with %s nodes and %s edges...",
args.node_count, args.edge_count)
generate_graph(sess, args.node_count, args.edge_count)
# Track which vertices have been found as part of a component.
start_time = time()
max_query_time = 0
log.info("Looking for connected components...")
# Histogram of log2 sizes of connected components found.
histogram = defaultdict(int)
sample_count = args.sample_count if args.sample_count else args.node_count
for i in range(sample_count):
node_id = randint(0, args.node_count - 1)
query_start_time = time()
log2_size = int(log2(1 + get_connected_ids(sess, node_id)))
max_query_time = max(max_query_time, time() - query_start_time)
histogram[log2_size] += 1
elapsed = time() - start_time
log.info("Connected components found in %.2f sec (avg %.2fms, max %.2fms)",
elapsed, elapsed / sample_count * 1000, max_query_time * 1000)
log.info("Component size histogram (count | range)")
for log2_size, count in sorted(histogram.items()):
log.info("\t%5d | %d - %d", count, 2 ** log2_size,
2 ** (log2_size + 1) - 1)
sess.close()
driver.close()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,207 @@
#include <algorithm>
#include <limits>
#include <mutex>
#include <random>
#include <set>
#include <stack>
#include <thread>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "data_structures/union_find.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "storage/property_value.hpp"
#include "threading/sync/spinlock.hpp"
#include "utils/bound.hpp"
#include "utils/timer.hpp"
DEFINE_int32(thread_count, 1, "Number of threads");
DEFINE_int32(vertex_count, 1000, "Number of vertices");
DEFINE_int32(edge_count, 1000, "Number of edges");
DECLARE_int32(gc_cycle_sec);
static const std::string kLabel{"kLabel"};
static const std::string kProperty{"kProperty"};
void GenerateGraph(database::GraphDb &db) {
{
database::GraphDbAccessor dba{db};
dba.BuildIndex(dba.Label(kLabel), dba.Property(kProperty));
dba.Commit();
}
// Randomize the sequence of IDs of created vertices and edges to simulate
// real-world lack of locality.
auto make_id_vector = [](size_t size) {
gid::Generator generator{0};
std::vector<gid::Gid> ids(size);
for (size_t i = 0; i < size; ++i)
ids[i] = generator.Next(std::experimental::nullopt);
std::random_shuffle(ids.begin(), ids.end());
return ids;
};
std::vector<VertexAccessor> vertices;
vertices.reserve(FLAGS_vertex_count);
{
CHECK(FLAGS_vertex_count % FLAGS_thread_count == 0)
<< "Thread count must be a factor of vertex count";
LOG(INFO) << "Generating " << FLAGS_vertex_count << " vertices...";
utils::Timer timer;
auto vertex_ids = make_id_vector(FLAGS_vertex_count);
std::vector<std::thread> threads;
SpinLock vertices_lock;
for (int i = 0; i < FLAGS_thread_count; ++i) {
threads.emplace_back([&db, &vertex_ids, &vertices, &vertices_lock, i]() {
database::GraphDbAccessor dba{db};
auto label = dba.Label(kLabel);
auto property = dba.Property(kProperty);
auto batch_size = FLAGS_vertex_count / FLAGS_thread_count;
for (int j = i * batch_size; j < (i + 1) * batch_size; ++j) {
auto vertex = dba.InsertVertex(vertex_ids[j]);
vertex.add_label(label);
vertex.PropsSet(property, static_cast<int64_t>(vertex_ids[j]));
vertices_lock.lock();
vertices.emplace_back(vertex);
vertices_lock.unlock();
}
dba.Commit();
});
}
for (auto &t : threads) t.join();
LOG(INFO) << "Generated " << FLAGS_vertex_count << " vertices in "
<< timer.Elapsed().count() << " seconds.";
}
{
database::GraphDbAccessor dba{db};
for (int i = 0; i < FLAGS_vertex_count; ++i)
vertices[i] = *dba.Transfer(vertices[i]);
LOG(INFO) << "Generating " << FLAGS_edge_count << " edges...";
auto edge_ids = make_id_vector(FLAGS_edge_count);
std::mt19937 pseudo_rand_gen{std::random_device{}()};
std::uniform_int_distribution<> rand_dist{0, FLAGS_vertex_count - 1};
auto edge_type = dba.EdgeType("edge");
utils::Timer timer;
for (int i = 0; i < FLAGS_edge_count; ++i)
dba.InsertEdge(vertices[rand_dist(pseudo_rand_gen)],
vertices[rand_dist(pseudo_rand_gen)], edge_type,
edge_ids[i]);
dba.Commit();
LOG(INFO) << "Generated " << FLAGS_edge_count << " edges in "
<< timer.Elapsed().count() << " seconds.";
}
}
auto EdgeIteration(database::GraphDb &db) {
database::GraphDbAccessor dba{db};
int64_t sum{0};
for (auto edge : dba.Edges(false)) sum += edge.from().gid() + edge.to().gid();
return sum;
}
auto VertexIteration(database::GraphDb &db) {
database::GraphDbAccessor dba{db};
int64_t sum{0};
for (auto v : dba.Vertices(false))
for (auto e : v.out()) sum += e.gid() + e.to().gid();
return sum;
}
auto ConnectedComponentsEdges(database::GraphDb &db) {
UnionFind<int64_t> connectivity{FLAGS_vertex_count};
database::GraphDbAccessor dba{db};
for (auto edge : dba.Edges(false))
connectivity.Connect(edge.from().gid(), edge.to().gid());
return connectivity.Size();
}
auto ConnectedComponentsVertices(database::GraphDb &db) {
UnionFind<int64_t> connectivity{FLAGS_vertex_count};
database::GraphDbAccessor dba{db};
for (auto from : dba.Vertices(false)) {
for (auto out_edge : from.out())
connectivity.Connect(from.gid(), out_edge.to().gid());
}
return connectivity.Size();
}
auto ConnectedComponentsVerticesParallel(database::GraphDb &db) {
UnionFind<int64_t> connectivity{FLAGS_vertex_count};
SpinLock connectivity_lock;
// Define bounds of vertex IDs for each thread to use.
std::vector<PropertyValue> bounds;
for (int64_t i = 0; i < FLAGS_thread_count; ++i)
bounds.emplace_back(i * FLAGS_vertex_count / FLAGS_thread_count);
bounds.emplace_back(std::numeric_limits<int64_t>::max());
std::vector<std::thread> threads;
for (int i = 0; i < FLAGS_thread_count; ++i) {
threads.emplace_back(
[&connectivity, &connectivity_lock, &bounds, &db, i]() {
database::GraphDbAccessor dba{db};
for (auto from :
dba.Vertices(dba.Label(kLabel), dba.Property(kProperty),
utils::MakeBoundInclusive(bounds[i]),
utils::MakeBoundExclusive(bounds[i + 1]), false)) {
for (auto out_edge : from.out()) {
std::lock_guard<SpinLock> lock{connectivity_lock};
connectivity.Connect(from.gid(), out_edge.to().gid());
}
}
});
}
for (auto &t : threads) t.join();
return connectivity.Size();
}
auto Expansion(database::GraphDb &db) {
std::vector<int> component_ids(FLAGS_vertex_count, -1);
int next_component_id{0};
std::stack<VertexAccessor> expansion_stack;
database::GraphDbAccessor dba{db};
for (auto v : dba.Vertices(false)) {
if (component_ids[v.gid()] != -1) continue;
auto component_id = next_component_id++;
expansion_stack.push(v);
while (!expansion_stack.empty()) {
auto next_v = expansion_stack.top();
expansion_stack.pop();
if (component_ids[next_v.gid()] != -1) continue;
component_ids[next_v.gid()] = component_id;
for (auto e : next_v.out()) expansion_stack.push(e.to());
for (auto e : next_v.in()) expansion_stack.push(e.from());
}
}
return next_component_id;
}
int main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);
FLAGS_gc_cycle_sec = -1;
database::SingleNode db;
GenerateGraph(db);
auto timed_call = [&db](auto callable, const std::string &descr) {
LOG(INFO) << "Running " << descr << "...";
utils::Timer timer;
auto result = callable(db);
LOG(INFO) << "\tDone in " << timer.Elapsed().count()
<< " seconds, result: " << result;
};
timed_call(EdgeIteration, "Edge iteration");
timed_call(VertexIteration, "Vertex iteration");
timed_call(ConnectedComponentsEdges, "Connected components - Edges");
timed_call(ConnectedComponentsVertices, "Connected components - Vertices");
timed_call(ConnectedComponentsVerticesParallel,
"Parallel connected components - Vertices");
timed_call(Expansion, "Expansion");
return 0;
}

View File

@ -0,0 +1,5 @@
WITH tointeger(rand() * 40000000) AS from_id
MATCH (from:Node {id : from_id}) WITH from
MATCH path = (from)-[*bfs..50 (e, n | degree(n) < 50)]->(to) WITH path LIMIT 10000 WHERE to.fraudulent
RETURN path, size(path)

View File

@ -0,0 +1,31 @@
{
"indexes":[
"Node.id"
],
"nodes":[
{
"count":40000000,
"labels":[
"Node"
],
"properties":{
"id":{
"type":"counter",
"param":"Node.id"
},
"fraudulent":{
"type":"bernoulli",
"param":0.0005
}
}
}
],
"edges":[
{
"count":80000000,
"from":"Node",
"to":"Node",
"type":"Edge"
}
]
}

View File

@ -0,0 +1,20 @@
{
"indexes" : ["Card.id", "Pos.id", "Transaction.fraud_reported"],
"nodes" : [
{
"count_per_worker" : 1250000,
"label" : "Card"
},
{
"count_per_worker" : 1250000,
"label" : "Pos"
},
{
"count_per_worker" : 2500000,
"label" : "Transaction"
}
],
"compromised_pos_probability" : 0.2,
"fraud_reported_probability" : 0.1,
"hop_probability" : 0.1
}

4
docs/.gitignore vendored
View File

@ -1,2 +1,6 @@
doxygen/html/
doxygen/latex/
user_technical/.bundle_community/
user_technical/.couscous/
user_technical/couscous.phar
user_technical/docs.tar.gz

View File

@ -1,230 +0,0 @@
# CSV Import Tool Documentation
CSV is a universal and very versatile data format used to store large quantities
of data. Each Memgraph database instance has a CSV import tool installed called
`mg_import_csv`. The CSV import tool should be used for initial bulk ingestion
of data into the database. Upon ingestion, the CSV importer creates a snapshot
that will be used by the database to recover its state on its next startup.
If you are already familiar with the Neo4j bulk import tool, then using the
`mg_import_csv` tool should be easy. The CSV import tool is fully compatible
with the [Neo4j CSV
format](https://neo4j.com/docs/operations-manual/current/tools/import/). If you
already have a pipeline set-up for Neo4j, you should only replace `neo4j-admin
import` with `mg_import_csv`.
## CSV File Format
Each row of a CSV file represents a single entry that should be imported into
the database. Both nodes and relationships can be imported into the database
using CSV files.
Each set of CSV files must have a header that describes the data that is stored
in the CSV files. Each field in the CSV header is in the format
`<name>[:<type>]` which identifies the name that should be used for that column
and the type that should be used for that column. The type is optional and
defaults to `string` (see the following chapter).
Each CSV field must be divided using the delimiter and each CSV field can either
be quoted or unquoted. When the field is quoted, the first and last character in
the field *must* be the quote character. If the field isn't quoted, and a quote
character appears in it, it is treated as a regular character. If a quote
character appears inside a quoted string then the quote character must be
doubled in order to escape it. Line feeds and carriage returns are ignored in
the CSV file, also, the file can't contain a NULL character.
## Properties
Both nodes and relationships can have properties added to them. When importing
properties, the CSV importer uses the name specified in the header of the
corresponding CSV column for the name of the property. A property is designated
by specifying one of the following types in the header:
- `integer`, `int`, `long`, `byte`, `short`: creates an integer property
- `float`, `double`: creates a float property
- `boolean`, `bool`: creates a boolean property
- `string`, `char`: creates a string property
When importing a boolean value, the CSV field should contain exactly the text
`true` to import a `True` boolean value. All other text values are treated as a
boolean value `False`.
If you want to import an array of values, you can do so by appending `[]` to any
of the above types. The values of the array are then determined by splitting
the raw CSV value using the array delimiter character.
Assuming that the array delimiter is `;`, the following example:
```plaintext
first_name,last_name:string,number:integer,aliases:string[]
John,Doe,1,Johnny;Jo;J-man
Melissa,Doe,2,Mel
```
Will yield these results:
```plaintext
CREATE ({first_name: "John", last_name: "Doe", number: 1, aliases: ["Johnny", "Jo", "J-man"]});
CREATE ({first_name: "Melissa", last_name: "Doe", number: 2, aliases: ["Mel"]});
```
### Nodes
When importing nodes, several more types can be specified in the header of the
CSV file (along with all property types):
- `ID`: id of the node that should be used as the node ID when importing
relationships
- `LABEL`: designates that the field contains additional labels for the node
- `IGNORE`: designates that the field should be ignored
The `ID` field type sets the internal ID that will be used for the node when
creating relationships. It is optional and nodes that don't have an ID value
specified will be imported, but can't be connected to any relationships. If you
want to save the ID value as a property in the database, just specify a name for
the ID (`user_id:ID`). If you just want to use the ID during the import, leave
out the name of the field (`:ID`). The `ID` field also supports creating
separate ID spaces. The ID space is specified with the ID space name appended
to the `ID` type in parentheses (`ID(user)`). That allows you to have the same
IDs (by value) for multiple different node files (for example, numbers from 1 to
N). The IDs in each ID space will be treated as an independent set of IDs that
don't interfere with IDs in another ID space.
The `LABEL` field type adds additional labels to the node. The value is treated
as an array type so that multiple additional labels can be specified for each
node. The value is split using the array delimiter (`--array-delimiter` flag).
### Relationships
In order to be able to import relationships, you must import the nodes in the
same invocation of `mg_import_csv` that is used to import the relationships.
When importing relationships, several more types can be specified in the header
of the CSV file (along with all property types):
- `START_ID`: id of the start node that should be connected with the
relationship
- `END_ID`: id of the end node that should be connected with the relationship
- `TYPE`: designates the type of the relationship
- `IGNORE`: designates that the field should be ignored
The `START_ID` field type sets the start node that should be connected with the
relationship to the end node. The field *must* be specified and the node ID
must be one of the node IDs that were specified in the node CSV files. The name
of this field is ignored. If the node ID is in an ID space, you can specify the
ID space for the in the same way as for the node ID (`START_ID(user)`).
The `END_ID` field type sets the end node that should be connected with the
relationship to the start node. The field *must* be specified and the node ID
must be one of the node IDs that were specified in the node CSV files. The name
of this field is ignored. If the node ID is in an ID space, you can specify the
ID space for the in the same way as for the node ID (`END_ID(user)`).
The `TYPE` field type sets the type of the relationship. Each relationship
*must* have a relationship type, but it doesn't necessarily need to be specified
in the CSV file, it can also be set externally for the whole CSV file. The name
of this field is ignored.
## CSV Importer Flags
The importer has many command line options that allow you to customize the way
the importer loads your data.
The two main flags that are used to specify the input CSV files are `--nodes`
and `--relationships`. Basic description of these flags is provided in the table
and more detailed explainion can be found further down bellow.
| Flag | Description |
|-----------------------| -------------- |
|`--nodes` | Used to specify CSV files that contain the nodes to the importer. |
|`--relationships` | Used to specify CSV files that contain the relationships to the importer.|
|`--delimiter` | Sets the delimiter that should be used when splitting the CSV fields (default `,`)|
|`--quote` | Sets the quote character that should be used to quote a CSV field (default `"`)|
|`--array-delimiter` | Sets the delimiter that should be used when splitting array values (default `;`)|
|`--id-type` | Specifies which data type should be used to store the supplied <br /> node IDs when storing them as properties (if the field name is supplied). <br /> The supported values are either `STRING` or `INTEGER`. (default `STRING`)|
|`--ignore-empty-strings` | Instructs the importer to treat all empty strings as `Null` values <br /> instead of an empty string value (default `false`)|
|`--ignore-extra-columns` | Instructs the importer to ignore all columns (instead of raising an error) <br /> that aren't specified after the last specified column in the CSV header. (default `false`) |
| `--skip-bad-relationships`| Instructs the importer to ignore all relationships (instead of raising an error) <br /> that refer to nodes that don't exist in the node files. (default `false`) |
|`--skip-duplicate-nodes` | Instructs the importer to ignore all duplicate nodes (instead of raising an error). <br /> Duplicate nodes are nodes that have an ID that is the same as another node that was already imported. (default `false`) |
| `--trim-strings`| Instructs the importer to trim all of the loaded CSV field values before processing them further. <br /> Trimming the fields removes all leading and trailing whitespace from them. (default `false`) |
The `--nodes` and `--relationships` flags are used to specify CSV files that
contain the nodes and relationships to the importer. Multiple files can be
specified in each supplied `--nodes` or `--relationships` flag. Files that are
supplied in one `--nodes` or `--relationships` flag are treated by the CSV
parser as one big CSV file. Only the first line of the first file is parsed for
the CSV header, all other files (and rows) are treated as data. This is useful
when you have a very large CSV file and don't want to edit its first line just
to add a CSV header. Instead, you can specify the header in a separate file
(e.g. `users_header.csv` or `friendships_header.csv`) and have the data intact
in the large file (e.g. `users.csv` or `friendships.csv`). Also, you can supply
additional labels for each set of node files.
The format of `--nodes` flag is:
`[<label>[:<label>]...=]<file>[,<file>][,<file>]...`. Take note that only the
first `<file>` part is mandatory, all other parts of the flag value are
optional. Multiple `--nodes` flags can be supplied to describe multiple sets of
different node files. For the importer to work, at least one `--nodes` flag
*must* be supplied.
The format of `--relationships` flag is: `[<type>=]<file>[,<file>][,<file>]...`.
Take note that only the first `<file>` part is mandatory, all other parts of the
flag value are optional. Multiple `--relationships` flags can be supplied to
describe multiple sets of different relationship files. The `--relationships`
flag isn't mandatory.
## CSV Parser Logic
The CSV parser uses the same logic as the standard Python CSV parser. The data
is parsed in the same way as the following snippet:
```python
import csv
for row in csv.reader(stream, strict=True):
# process 'row'
```
Python uses 'excel' as the default dialect when parsing CSV files and the
default settings for the CSV parser are:
- delimiter: `','`
- doublequote: `True`
- escapechar: `None`
- lineterminator: `'\r\n'`
- quotechar: `'"'`
- skipinitialspace: `False`
The above snippet can be expanded to:
```python
import csv
for row in csv.reader(stream, delimiter=',', doublequote=True,
escapechar=None, lineterminator='\r\n',
quotechar='"', skipinitialspace=False,
strict=True):
# process 'row'
```
For more information about the meaning of the above values, see:
https://docs.python.org/3/library/csv.html#csv.Dialect
## Errors
1. [Skipping duplicate node with ID '{}'. For more details, visit:
memgr.ph/csv-import-tool.](#error-1)
2. [Skipping bad relationship with START_ID '{}'. For more details, visit:
memgr.ph/csv-import-tool.](#error-2)
3. [Skipping bad relationship with END_ID '{}'. For more details, visit:
memgr.ph/csv-import-tool.](#error-3)
## Skipping duplicate node with ID {} {#error-1}
Duplicate nodes are nodes that have an ID that is the same as another node that
was already imported. You can instruct the importer to ignore all duplicate
nodes (instead of raising an error) by using the `--skip-duplicate-nodes` flag.
## Skipping bad relationship with START_ID {} {#error-2}
A node with the id `START_ID` doesn't exist. You can instruct the importer to
ignore all bad relationships (instead of raising an error) that refer to nodes
that don't exist in the node files by using the `--skip-bad-relationships` flag.
## Skipping bad relationship with END_ID {} {#error-3}
A node with the id `END_ID` doesn't exist. You can instruct the importer to
ignore all bad relationships (instead of raising an error) that refer to nodes
that don't exist in the node files by using the `--skip-bad-relationships` flag.

View File

@ -0,0 +1,193 @@
# C++ Code Conventions
This chapter describes code conventions which should be followed when writing
C++ code.
## Code Style
Memgraph uses the Google Style Guide for C++ in most of its code. You should
follow them whenever writing new code. The style guide can be found
[here](https://google.github.io/styleguide/cppguide.html).
### Additional Style Conventions
Code style conventions which are left undefined by Google are specified here.
#### Template parameter naming
Template parameter names should start with capital letter 'T' followed by a
short descriptive name. For example:
```cpp
template <typename TKey, typename TValue>
class KeyValueStore
```
## Code Formatting
You should install `clang-format` and run it on code you change or add. The
root of Memgraph's project contains the `.clang-format` file, which specifies
how formatting should behave. Running `clang-format -style=file` in the
project's root will read the file and behave as expected. For ease of use, you
should integrate formatting with your favourite editor.
The code formatting isn't enforced, because sometimes manual formatting may
produce better results. Though, running `clang-format` is strongly encouraged.
## Documentation
Besides following the comment guidelines from [Google Style
Guide](https://google.github.io/styleguide/cppguide.html#Comments), your
documentation of the public API should be
[Doxygen](https://github.com/doxygen/doxygen) compatible. For private parts of
the code or for comments accompanying the implementation, you are free to
break doxygen compatibility. In both cases, you should write your
documentation as full sentences, correctly written in English.
## Doxygen
To start a Doxygen compatible documentation string, you should open your
comment with either a JavaDoc style block comment (`/**`) or a line comment
containing 3 slashes (`///`). Take a look at the 2 examples below.
### Block Comment
```cpp
/**
* One sentence, brief description.
*
* Long form description.
*/
```
### Line Comment
```cpp
///
/// One sentence, brief description.
///
/// Long form description.
///
```
If you only have a brief description, you may collapse the documentation into
a single line.
### Block Comment
```cpp
/** Brief description. */
```
### Line Comment
```cpp
/// Brief description.
```
Whichever style you choose, keep it consistent across the whole file.
Doxygen supports various commands in comments, such as `@file` and `@param`.
These help Doxygen to render specified things differently or to track them for
cross referencing. If you want to learn more, take a look at these two links:
* http://www.stack.nl/~dimitri/doxygen/manual/docblocks.html
* http://www.stack.nl/~dimitri/doxygen/manual/commands.html
## Examples
Below are a few examples of documentation from the codebase.
### Function
```cpp
/**
* Removes whitespace characters from the start and from the end of a string.
*
* @param s String that is going to be trimmed.
*
* @return Trimmed string.
*/
inline std::string Trim(const std::string &s);
```
### Class
```cpp
/** Base class for logical operators.
*
* Each operator describes an operation, which is to be performed on the
* database. Operators are iterated over using a @c Cursor. Various operators
* can serve as inputs to others and thus a sequence of operations is formed.
*/
class LogicalOperator
: public ::utils::Visitable<HierarchicalLogicalOperatorVisitor> {
public:
/** Constructs a @c Cursor which is used to run this operator.
*
* @param GraphDbAccessor Used to perform operations on the database.
*/
virtual std::unique_ptr<Cursor> MakeCursor(GraphDbAccessor &db) const = 0;
/** Return @c Symbol vector where the results will be stored.
*
* Currently, outputs symbols are only generated in @c Produce operator.
* @c Skip, @c Limit and @c OrderBy propagate the symbols from @c Produce (if
* it exists as input operator). In the future, we may want this method to
* return the symbols that will be set in this operator.
*
* @param SymbolTable used to find symbols for expressions.
* @return std::vector<Symbol> used for results.
*/
virtual std::vector<Symbol> OutputSymbols(const SymbolTable &) const {
return std::vector<Symbol>();
}
virtual ~LogicalOperator() {}
};
```
### File Header
```cpp
/// @file visitor.hpp
///
/// This file contains the generic implementation of visitor pattern.
///
/// There are 2 approaches to the pattern:
///
/// * classic visitor pattern using @c Accept and @c Visit methods, and
/// * hierarchical visitor which also uses @c PreVisit and @c PostVisit
/// methods.
///
/// Classic Visitor
/// ===============
///
/// Explanation on the classic visitor pattern can be found from many
/// sources, but here is the link to hopefully most easily accessible
/// information: https://en.wikipedia.org/wiki/Visitor_pattern
///
/// The idea behind the generic implementation of classic visitor pattern is to
/// allow returning any type via @c Accept and @c Visit methods. Traversing the
/// class hierarchy is relegated to the visitor classes. Therefore, visitor
/// should call @c Accept on children when visiting their parents. To implement
/// such a visitor refer to @c Visitor and @c Visitable classes.
///
/// Hierarchical Visitor
/// ====================
///
/// Unlike the classic visitor, the intent of this design is to allow the
/// visited structure itself to control the traversal. This way the internal
/// children structure of classes can remain private. On the other hand,
/// visitors may want to differentiate visiting composite types from leaf types.
/// Composite types are those which contain visitable children, unlike the leaf
/// nodes. Differentiation is accomplished by providing @c PreVisit and @c
/// PostVisit methods, which should be called inside @c Accept of composite
/// types. Regular @c Visit is only called inside @c Accept of leaf types.
/// To implement such a visitor refer to @c CompositeVisitor, @c LeafVisitor and
/// @c Visitable classes.
///
/// Implementation of hierarchical visiting is modelled after:
/// http://wiki.c2.com/?HierarchicalVisitorPattern
```

View File

@ -0,0 +1,288 @@
// dot -Tpng dependencies.dot -o /path/to/output.png
// TODO (buda): Put PropertyValueStore to storage namespace
digraph {
// At the beginning of each block there is a default style for that block
label="Memgraph Dependencies Diagram"; fontname="Roboto Bold"; fontcolor=black;
fontsize=26; labelloc=top; labeljust=right;
compound=true; // If true, allow edges between clusters
rankdir=TB; // Alternatives: LR
node [shape=record fontname="Roboto", fontsize=12, fontcolor=white];
edge [color="#B5AFB7"];
// -- Legend --
// dir=both arrowtail=diamond arrowhead=vee -> group ownership
// dir=both arrowtail=none, arrowhead=vee -> ownership; stack or uptr
subgraph cluster_tcp_end_client_communication {
label="TCP End Client Communication"; fontsize=14;
node [style=filled, color="#DD2222" fillcolor="#DD2222"];
// Owned elements
"communication::Server";
"io::network::Socket";
// Intracluster connections
"communication::Server" -> "io::network::Socket"
[label="socket_" dir=both arrowtail=none arrowhead=vee];
}
subgraph cluster_bolt_server {
label="Bolt Server"; fontsize=14;
node [style=filled, color="#62A2CA" fillcolor="#62A2CA"];
// Owned elements
"communication::bolt::SessionData";
"communication::bolt::Session";
"communication::bolt::Encoder";
"communication::bolt::Decoder";
// Intracluster connections
"communication::bolt::Session" -> "communication::bolt::Encoder"
[label="encoder_", dir=both arrowtail=none, arrowhead=vee];
"communication::bolt::Session" -> "communication::bolt::Decoder"
[label="decoder_", dir=both arrowtail=none, arrowhead=vee];
}
subgraph cluster_opencypher_engine {
label="openCypher Engine"; fontsize=14;
node [style=filled, color="#68BDF6" fillcolor="#68BDF6"];
// Owned Elements
"query::Interpreter";
"query::AstTreeStorage";
"query::TypedValue"
"query::Path";
"query::Simbol";
"query::Context";
"query::ExpressionEvaluator";
"query::Frame";
"query::SymbolTable";
"query::plan::LogicalOperator";
"query::plan::Cursor";
"query::plan::CostEstimator";
// Intracluster connections
"query::Interpreter" -> "query::AstTreeStorage"
[label="ast_cache" dir=both arrowtail=diamond arrowhead=vee];
"query::TypedValue" -> "query::Path";
"query::plan::Cursor" -> "query::Frame";
"query::plan::Cursor" -> "query::Context";
"query::plan::LogicalOperator" -> "query::Symbol";
"query::plan::LogicalOperator" -> "query::SymbolTable";
"query::plan::LogicalOperator" -> "query::plan::Cursor";
}
subgraph cluster_storage {
label="Storage" fontsize=14;
node [style=filled, color="#FB6E00" fillcolor="#FB6E00"];
// Owned Elements
"database::GraphDb";
"database::GraphDbAccessor";
"storage::Record";
"storage::Vertex";
"storage::Edge";
"storage::RecordAccessor";
"storage::VertexAccessor";
"storage::EdgeAccessor";
"storage::Common";
"storage::Label";
"storage::EdgeType";
"storage::Property";
"storage::compression";
"storage::SingleNodeConcurrentIdMapper";
"storage::Location";
"storage::StorageTypesLocation";
"PropertyValueStore";
"storage::RecordLock";
"mvcc::Version";
"mvcc::Record";
"mvcc::VersionList";
// Intracluster connections
"storage::VertexAccessor" -> "storage::RecordAccessor"
[arrowhead=onormal];
"storage::EdgeAccessor" -> "storage::RecordAccessor"
[arrowhead=onormal];
"storage::RecordAccessor" -> "database::GraphDbAccessor"
[style=dashed arrowhead=vee];
"storage::Vertex" -> "mvcc::Record"
[arrowhead=onormal];
"storage::Edge" -> "mvcc::Record"
[arrowhead=onormal];
"storage::Edge" -> "PropertyValueStore"
[arrowhead=vee];
"storage::Vertex" -> "PropertyValueStore"
[arrowhead=vee];
"storage::Edge" -> "mvcc::VersionList"
[label="from,to" arrowhead=vee style=dashed];
"storage::VertexAccessor" -> "storage::Vertex"
[arrowhead=vee];
"storage::EdgeAccessor" -> "storage::Edge"
[arrowhead=vee];
"storage::SingleNodeConcurrentIdMapper" -> "storage::StorageTypesLocation"
[arrowhead=vee];
"storage::StorageTypesLocation" -> "storage::Location"
[arrowhead=vee];
"storage::Storage" -> "storage::StorageTypesLocation"
[arrowhead=vee];
"storage::Property" -> "storage::Common"
[arrowhead=onormal];
"storage::Label" -> "storage::Common"
[arrowhead=onormal];
"storage::EdgeType" -> "storage::Common"
[arrowhead=onormal];
"storage::Property" -> "storage::Location"
[arrowhead=vee];
"PropertyValueStore" -> "storage::Property"
[arrowhead=vee];
"PropertyValueStore" -> "storage::Location"
[arrowhead=vee];
"database::GraphDbAccessor" -> "database::GraphDb"
[arrowhead=vee];
"database::GraphDbAccessor" -> "tx::TransactionId"
[arrowhead=vee];
"mvcc::VersionList" -> "storge::RecordLock"
[label="lock" arrowhead=vee];
"mvcc::VersionList" -> "mvcc::Record"
[label="head" arrowhead=vee];
"mvcc::Record" -> "mvcc::Version"
[arrowhead=onormal];
// Explicit positioning
{rank=same;
"database::GraphDbAccessor";
"storage::VertexAccessor";
"storage::EdgeAccessor";}
{rank=same;
"storage::Common";
"storage::compression";}
}
subgraph cluster_properties_on_disk {
label="Properties on Disk" fontsize=14;
node [style=filled, color="#102647" fillcolor="#102647"];
// Owned Elements
"storage::KVStore";
"rocksdb";
// Intracluster connections
"storage::KVStore" -> "rocksdb";
}
subgraph cluster_distributed {
label="Distributed" fontsize=14;
node [style=filled, color="#FFC500" fillcolor="#FFC500"];
// Owned Elements
"distributed::DataManager";
"distributed::DataRpcClients";
// Intracluster connections
"distributed::DataManager" -> "distributed::DataRpcClients"
[arrowhead=vee];
"storage::RecordAccessor" -> "distributed::DataManager"
[style=dashed arrowhead=vee];
}
subgraph cluster_dynamic_partitioning {
label="Dynamic Partitioning" fontsize=14;
node [style=filled, color="#720096" fillcolor="#720096"];
// Owned Elements
"DynamicPartitioner";
}
subgraph cluster_security {
label="Security" fontsize=14;
node [style=filled, color="#857F87" fillcolor="#857F87"];
// Owned Elements
"Communication Encryption";
"Data Encryption";
"Access Control";
"Audit Logging";
}
subgraph cluster_web_dashboard {
label="Dashaboard" fontsize=14;
node [style=filled, color="#FF0092" fillcolor="#FF0092"];
// Owned Elements
"Memgraph Ops / Memgraph Cockpit";
}
subgraph cluster_rpc {
label="RPC" fontsize=14;
node [style=filled, color="#857F87" fillcolor="#857F87"];
// Owned Elements
"communication::rpc::Server";
"communication::rpc::Client";
}
subgraph cluster_ingestion {
label="Ingestion" fontsize=14;
node [style=filled, color="#0B6D88" fillcolor="#0B6D88"];
// Owned Elements
"Extract";
"Transform";
"Load";
"Amazon S3";
"Kafka";
// Intracluster connections
"Extract" -> "Amazon S3";
"Extract" -> "Kafka";
// Explicit positioning
{rank=same;"Extract";"Transform";"Load";}
}
// -- Intercluster connections --
// cluster_tcp_end_client_communication -- cluster_bolt_server
"communication::Server" -> "communication::bolt::SessionData" [color=black];
"communication::Server" -> "communication::bolt::Session" [color=black];
// cluster_bolt_server -> cluster_storage
"communication::bolt::SessionData" -> "database::GraphDb" [color=red];
"communication::bolt::Session" -> "database::GraphDbAccessor" [color=red];
// cluster_bolt_server -> cluster_opencypher_engine
"communication::bolt::SessionData" -> "query::Interpreter" [color=red];
// cluster_opencypher_engine -- cluster_storage
"query::Interpreter" -> "database::GraphDbAccessor" [color=black];
"query::Interpreter" -> "storage::VertexAccessor" [color=black];
"query::Interpreter" -> "storage::EdgeAccessor" [color=black];
"query::TypedValue" -> "storage::VertexAccessor" [color=black];
"query::TypedValue" -> "storage::EdgeAccessor" [color=black];
"query::Path" -> "storage::VertexAccessor"
[label="vertices" dir=both arrowtail=diamond arrowhead=vee color=black];
"query::Path" -> "storage::EdgeAccessor"
[label="edges" dir=both arrowtail=diamond arrowhead=vee color=black];
"query::plan::LogicalOperator" -> "database::GraphDbAccessor"
[color=black arrowhead=vee];
// cluster_distributed -- cluster_storage
"distributed::DataManager" -> "database::GraphDb"
[arrowhead=vee style=dashed color=red];
"distributed::DataManager" -> "tx::TransactionId"
[label="ves_caches_key" dir=both arrowhead=none arrowtail=diamond
color=red];
"distributed::DataManager" -> "storage::Vertex"
[label="vertices_caches" dir=both arrowhead=none arrowtail=diamond
color=red];
"distributed::DataManager" -> "storage::Edge"
[label="edges_caches" dir=both arrowhead=none arrowtail=diamond
color=red];
// cluster_storage -- cluster_properties_on_disk
"PropertyValueStore" -> "storage::KVStore"
[label="static" arrowhead=vee color=black];
// cluster_dynamic_partitioning -- cluster_storage
"database::GraphDb" -> "DynamicPartitioner"
[arrowhead=vee color=red];
"DynamicPartitioner" -> "database::GraphDbAccessor"
[arrowhead=vee color=black];
}

View File

@ -0,0 +1,43 @@
# Distributed addressing
In distributed Memgraph a single graph element must be owned by exactly
one worker. It is possible that multiple workers have cached copies of
a single graph element (which is inevitable), but there is only one
owner.
The owner of a graph element can change. This is not yet implemented,
but is intended. Graph partitioning is intended to be dynamic.
Graph elements refer to other graph elements that are possibly on some
other worker. Even though each graph element is identified with a unique
ID, that ID does not contain the information about where that element
currently resides (which worker is the owner).
Thus we introduce the concept of a global address. It indicates both
which graph element is referred to (it's global ID), and where it
resides. Semantically it's a pair of two elements, but for efficiency
it's stored in 64 bits.
The global address is efficient for usage in a cluster: it indicates
where something can be found. However, finding a graph element based on
it's ID is still not a free operation (in the current implementation
it's a skiplist lookup). So, whenever possible, it's better to use local
addresses (pointers).
Succinctly, the requirements for addressing are:
- global addressing containing location info
- fast local addressing
- storage of both types in the same location efficiently
- translation between the two
The `storage::Address` class handles the enumerated storage
requirements. It stores either a local or global address in the size of
a local pointer (typically 8 bytes).
Conversion between the two is done in multiple places. The general
approach is to use local addresses (when possible) only for local
in-memory handling. All the communication and persistence uses global
addresses. Also, when receiving address from another worker, attempt to
localize addresses as soon as possible, so that least code has to worry
about potential inefficiency of using a global address for a local graph
element.

View File

@ -0,0 +1,50 @@
# Distributed durability
Durability in distributed is slightly different then in single-node as
the state itself is shared between multiple workers and none of those
states are independent.
Note that recovering from persistent storage must result in a stable
database state. This means that across the cluster the state
modification of every transaction that was running is either recovered
fully or not at all. Also, if transaction A committed before transaction B,
then if B is recovered so must A.
## Snapshots
It is possibly avoidable but highly desirable that the database can be
recovered from snapshot only, without relying on WAL files. For this to
be possible in distributed, it must be ensured that the same
transactions are recovered on all the workers (including master) in the
cluster. Since the snapshot does not contain information about which
state change happened in which transaction, the only way to achieve this
is to have synchronized snapshots. This means that the process of
creating a snapshot, which is in itself transactional (it happens within
a transaction and thus observes some consistent database state), must
happen in the same transaction. This is achieved by the master starting
a snapshot generating transaction and triggering the process on all
workers in the cluster.
## WAL
Unlike the snapshot, write-ahead logs contain the information on which
transaction made which state change. This makes it possible to include
or exclude transactions during the recovery process. What is necessary
however is a global consensus on which of the transactions should be
recovered and which not, to ensure recovery into a consistent state.
It would be possible to achieve this with some kind of synchronized
recovery process, but it would impose constraints on cluster startup and
would not be trivial.
A simpler alternative is that the consensus is achieved beforehand,
while the database (to be recovered) is still operational. What is
necessary is to keep track of which transactions are guaranteed to
have been flushed to the WAL files on all the workers in the cluster. It
makes sense to keep this record on the master, so a mechanism is
introduced which periodically pings all the workers, telling them to
flush their WALs, and writes some sort of a log indicating that this has
been confirmed. The downside of this is a periodic broadcast must be
done, and that potentially slightly less data can be recovered in the
case of a crash then if using a post-crash consensus. It is however much
simpler to implement.

View File

@ -0,0 +1,48 @@
## Dynamic Graph Partitioning
Memgraph supports dynamic graph partitioning similar to the Spinner algorithm,
mentioned in this paper: [https://arxiv.org/pdf/1404.3861.pdf].
Dgp is useful because it tries to group `local` date on the same worker, i.e.
it tries to keep closely connected data on one worker. It tries to avoid jumps
across workers when querying/traversing the distributed graph.
### Our implementation
It works independently on each worker but it is always running the migration
on only one worker at the same time. It achieves that by sharing a token
between workers, and the token ownership is transferred to the next worker
when the current worker finishes its migration step.
The reason that we want workers to work in disjoint time slots is it avoid
serialization errors caused by creating/removing edges of vertices during
migrations, which might cause an update of some vertex from two or more
different transactions.
### Migrations
For each vertex and workerid (label in the context of Dgp algorithm) we define
a score function. Score function takes into account labels of surrounding
endpoints of vertex edges (in/out) and the capacity of the worker with said
label. Score function loosely looks like this
```
locality(v, l) =
count endpoints of edges of vertex `v` with label `l` / degree of `v`
capacity(l) =
number of vertices on worker `l` divided by the worker capacity
(usually equal to the average number of vertices per worker)
score(v, l) = locality(v, l) - capacity(l)
```
We also define two flags alongside ```dynamic_graph_partitioner_enabled```,
```dgp_improvement_threshold``` and ```dgp_max_batch_size```.
These two flags are used during the migration phase.
When deciding if we need to migrate some vertex `v` from worker `l1` to worker
`l2` we examine the difference in scores, i.e.
if score(v, l1) - dgp_improvement_threshold / 100 < score(v, l2) then we
migrate the vertex.
Max batch size flag limits the number of vertices we can transfer in one batch
(one migration step).
Setting this value to a too large value will probably cause
a lot of interference with client queries, and having it a small value
will slow down convergence of the algorithm.

View File

@ -0,0 +1,54 @@
# Memgraph distributed
This chapter describes some of the concepts used in distributed
Memgraph. By "distributed" here we mean the sharding of a single graph
onto multiple processing units (servers).
## Conceptual organization
There is a single master and multiple workers. The master contains all
the global sources of truth (transaction engine,
[label|edge-type|property] to name mappings). Also, in the current
organization it is the only one that contains a Bolt server (for
communication with the end client) and an interpretation engine. Workers
contain the data and means of subquery interpretation (query plans
recieved from the master) and means of communication with the master and
other workers.
In many query plans the load on the master is much larger then the load
on the workers. For that reason it might be beneficial to make the
master contain less data (or none at all), and/or having multiple
interpretation masters.
## Logic organization
Both the distributed and the single node Memgraph use the same codebase.
In cases where the behavior in single-node differs from that in
distributed, some kind of dynamic behavior change is implemented (either
through inheritance or conditional logic).
### GraphDb
The `database::GraphDb` is an "umbrella" object for parts of the
database such as storage, garbage collection, transaction engine etc.
There is a class heirarchy of `GraphDb` implementations, as well as a
base interface object. There are subclasses for single-node, master and
worker deplotyments. Which implementation is used depends on the
configuration processed in the `main` entry point of memgraph.
The `GraphDb` interface exposes getters to base classes of
other similar heirarchies (for example to `tx::Engine`). In that way
much of the code that uses those objects (for example query plan
interpretation) is agnostic to the type of deployment.
### RecordAccessors
The functionality of `RecordAccessors` and it's subclasses is already
documented. It's important to note that the same implementation of
accessors is used in all deployments, with internal changes of behavior
depending on the locality of the graph element (vertex or edge) the
accessor represents. For example, if the graph element is local, an
update operation on an accessor will make the necessary MVCC ops, update
local data, indexes, the write-ahead log etc. However, if the accessor
represents a remote graph element, an update will trigger an RPC message
to the owner about the update and a change in the local cache.

View File

@ -0,0 +1,103 @@
# Distributed updates
Operations that modify the graph state are somewhat more complex in the
distributed system, as opposed to a single-node Memgraph deployment. The
complexity arises from two factors.
First, the data being modified is not necessarily owned by the worker
performing the modification. This situation is completely valid workers
execute parts of the query plan and parts must be executed by the
master.
Second, there are less guarantees regarding multi-threaded access. In
single-node Memgraph it was guaranteed that only one transaction will be
performing database work in a single transaction. This implied that
per-version storage could be thread-unsafe. In distributed Memgraph it
is possible that multiple threads could be performing work in the same
transaction as a consequence of the query being executed at the same
time on multiple workers and those executions interacting with the
globally partitioned database state.
## Deferred state modification
Making the per-version data storage thread-safe would most likely have a
performance impact very undesirable in a transactional database intended
for high throughput.
An alternative is that state modification over unsafe structures is not
performed immediately when requested, but postponed until it is safe to
do (there is is a guarantee of no concurrent access).
Since local query plan execution is done the same way on local data as
it is in single-node Memgraph, it is not possible to deffer that part of
the modification story. What can be deferred are modifications requested
by other workers. Since local query plan execution still is
single-threaded, this approach is safe.
At the same time those workers requesting the remote update can update
local copies (caches) of the not-owned data since that cache is only
being used by the single, local-execution thread.
### Visibility
Since updates are deferred the question arises: when do the updates
become visible? The above described process offers the following
visibility guarantees:
- updates done on the local state are visible to the owner
- updates done on the local state are NOT visible to anyone else during
the same (transaction + command)
- updates done on remote state are deferred on the owner and not
visible to the owner until applied
- updates done on the remote state are applied immediately to the local
caches and thus visible locally
This implies an inconsistent view of the database state. In a concurrent
execution of a single query this can hardly be avoided and is accepted
as such. It does not change the Cypher query execution semantic in any
of the well-defined scenarios. It possibly changes some of the behaviors
in which the semantic is not well defined even in single-node execution.
### Synchronization, update application
In many queries it is mandatory to observe the latest global graph state
(typically when returning it to the client). That means that before that
happens all the deferred updates need to be applied, and all the caches
to remote data invalidated. Exactly this happens when executing queries
that modify the graph state. At some point a global synchronization
point is reached. First it is waited that all workers finish the
execution of query plan parts performing state modifications. After that
all the workers are told to apply the deferred updates they received to
their graph state. Since there is no concurrent query plan execution,
this is safe. Once that is done all the local caches are cleared and the
requested data can be returned to the client.
### Command advancement
In complex queries where a read part follows a state modification part
the synchronization process after the state modification part is
followed by command advancement, like in single-node execution.
## Creation
Graph element creation is not deferred. This is practical because the
response to a creation is the global ID of the newly created element. At
the same time it is safe because no other worker (including the owner)
will be using the newly added graph element.
## Updating
Updating is deferred, as described. Note that this also means that
record locking conflicts are deferred and serialization errors
(including lock timeouts) are postponed until the deferred update
application phase. In certain scenarios it might be beneficial to force
these errors to happen earlier, when the deferred update request is
processed.
## Deletion
Deletion is also deferred. Deleting an edge implies a modification of
it's endpoint vertices, which must be deferred as those data structures
are not thread-safe. Deleting a vertex is either with detaching, in
which case an arbitrary number of updates are implied in the vertex's
neighborhood, or without detaching which relies on checking the current
state of the graph which is generally impossible in distributed.

View File

@ -0,0 +1,22 @@
# Snapshots
A "snapshot" is a record of the current database state stored in permanent
storage. Note that the term "snapshot" is used also in the context of
the transaction engine to denote a set of running transactions.
A snapshot is written to the file by Memgraph periodically if so
configured. The snapshot creation process is done within a transaction created
specifically for that purpose. The transaction is needed to ensure that
the stored state is internally consistent.
The database state can be recovered from the snapshot during startup, if
so configured. This recovery works in conjunction with write-ahead log
recovery.
A single snapshot contains all the data needed to recover a database. In
that sense snapshots are independent of each other and old snapshots can
be deleted once the new ones are safely stored, if it is not necessary
to revert the database to some older state.
The exact format of the snapshot file is defined inline in the snapshot
creation code.

View File

@ -0,0 +1,47 @@
# Write-ahead logging
Typically WAL denotes the process of writing a "log" of database
operations (state changes) to persistent storage before committing the
transaction, thus ensuring that the state can be recovered (in the case
of a crash) for all the transactions which the database committed.
The WAL is a fine-grained durability format. It's purpose is to store
database changes fast. It's primary purpose is not to provide
space-efficient storage, nor to support fast recovery. For that reason
it's often used in combination with a different persistence mechanism
(in Memgraph's case the "snapshot") that has complementary
characteristics.
### Guarantees
Ensuring that the log is written before the transaction is committed can
slow down the database. For that reason this guarantee is most often
configurable in databases. In Memgraph it is at the moment not
guaranteed, nor configurable. The WAL is flushed to the disk
periodically and transactions do not wait for this to complete.
### Format
The WAL file contains a series of DB state changes called `StateDelta`s.
Each of them describes what the state change is and in which transaction
it happened. Also some kinds of meta-information needed to ensure proper
state recovery are recorded (transaction beginnings and commits/abort).
The following is guaranteed w.r.t. `StateDelta` ordering in
a single WAL file:
- For two ops in the same transaction, if op A happened before B in the
database, that ordering is preserved in the log.
- Transaction begin/commit/abort messages also appear in exactly the
same order as they were executed in the transactional engine.
### Recovery
The database can recover from the WAL on startup. This works in
conjunction with snapshot recovery. The database attempts to recover from
the latest snapshot and then apply as much as possible from the WAL
files. Only those transactions that were not recovered from the snapshot
are recovered from the WAL, for speed efficiency. It is possible (but
inefficient) to recover the database from WAL only, provided all the WAL
files created from DB start are available. It is not possible to recover
partial database state (i.e. from some suffix of WAL files, without the
preceding snapshot).

View File

@ -0,0 +1,110 @@
# DatabaseAccessor
A `DatabaseAccessor` actually wraps a transactional access to database
data, for a single transaction. In that sense the naming is bad. It
encapsulates references to the database and the transaction object.
It contains logic for working with database content (graph element
data) in the context of a single transaction. All CRUD operations are
performed within a single transaction (as Memgraph is a transactional
database), and therefore iteration over data, finding a specific graph
element etc are all functionalities of a `GraphDbAccessor`.
In single-node Memgraph the database accessor also defined the lifetime
of a transaction. Even though a `Transaction` object was owned by the
transactional engine, it was `GraphDbAccessor`'s lifetime that object
was bound to (the transaction was implicitly aborted in
`GraphDbAccessor`'s destructor, if it was not explicitly ended before
that).
# RecordAccessor
It is important to understand data organization and access in the
storage layer. This discussion pertains to vertices and edges as graph
elements that the end client works with.
Memgraph uses MVCC (documented on it's own page). This means that for
each graph element there could be different versions visible to
different currently executing transactions. When we talk about a
`Vertex` or `Edge` as a data structure we typically mean one of those
versions. In code this semantic is implemented so that both those classes
inherit `mvcc::Record`, which in turn inherits `mvcc::Version`.
Handling MVCC and visibility is not in itself trivial. Next to that,
there is other book-keeping to be performed when working with data. For
that reason, Memgraph uses "accessors" to define an API of working with
data in a safe way. Most of the code in Memgraph (for example the
interpretation code) should work with accessors. There is a
`RecordAccessor` as a base class for `VertexAccessor` and
`EdgeAccessor`. Following is an enumeration of their purpose.
### Data access
The client interacts with Memgraph using the Cypher query language. That
language has certain semantics which imply that multiple versions of the
data need to be visible during the execution of a single query. For
example: expansion over the graph is always done over the graph state as
it was at the beginning of the transaction.
The `RecordAccessor` exposes functions to switch between the old and the new
versions of the same graph element (intelligently named `SwitchOld` and
`SwitchNew`) within a single transaction. In that way the client code
(mostly the interpreter) can avoid dealing with the underlying MVCC
version concepts.
### Updates
Data updates are also done through accessors. Meaning: there are methods
on the accessors that modify data, the client code should almost never
interact directly with `Vertex` or `Edge` objects.
The accessor layer takes care of creating version in the MVCC layer and
performing updates on appropriate versions.
Next, for many kinds of updates it is necessary to update the relevant
indexes. There are implicit indexes for vertex labels, as
well as user-created indexes for (label, property) pairs. The accessor
layer takes care of updating the indexes when these values are changed.
Each update also triggers a log statement in the write-ahead log. This
is also handled by the accessor layer.
### Distributed
In distributed Memgraph accessors also contain a lot of the remote graph
element handling logic. More info on that is available in the
documentation for distributed.
### Deferred MVCC data lookup for Edges
Vertices and edges are versioned using MVCC. This means that for each
transaction an MVCC lookup needs to be done to determine which version
is visible to that transaction. This tends to slow things down due to
cache invalidations (version lists and versions are stored in arbitrary
locations on the heap).
However, for edges, only the properties are mutable. The edge endpoints
and type are fixed once the edge is created. For that reason both edge
endpoints and type are available in vertex data, so that when expanding
it is not mandatory to do MVCC lookups of versioned, mutable data. This
logic is implemented in `RecordAccessor` and `EdgeAccessor`.
### Exposure
The original idea and implementation of graph element accessors was that
they'd prevent client code from ever interacting with raw `Vertex` or
`Edge` data. This however turned out to be impractical when implementing
distributed Memgraph and the raw data members have since been exposed
(through getters to old and new version pointers). However, refrain from
working with that data directly whenever possible! Always consider the
accessors to be the first go-to for interacting with data, especially
when in the context of a transaction.
# Skiplist accessor
The term "accessor" is also used in the context of a skiplist. Every
operation on a skiplist must be performed within on an
accessor. The skiplist ensures that there will be no physical deletions
of an object during the lifetime of an accessor. This mechanism is used
to ensure deletion correctness in a highly concurrent container.
We only mention that here to avoid confusion regarding terminology.

View File

@ -0,0 +1,131 @@
# Property storage
Although the reader is probably familiar with properties in *Memgraph*, let's
briefly recap.
Both vertices and edges can store an arbitrary number of properties. Properties
are, in essence, ordered pairs of property names and property values. Each
property name within a single graph element (edge/node) can store a single
property value. Property names are represented as strings, while property values
must be one of the following types:
Type | Description
-----------|------------
`Null` | Denotes that the property has no value. This is the same as if the property does not exist.
`String` | A character string, i.e. text.
`Boolean` | A boolean value, either `true` or `false`.
`Integer` | An integer number.
`Float` | A floating-point number, i.e. a real number.
`List` | A list containing any number of property values of any supported type. It can be used to store multiple values under a single property name.
`Map` | A mapping of string keys to values of any supported type.
Property values are modeled in a class conveniently called `PropertyValue`.
## Mapping between property names and property keys.
Although users think of property names in terms of descriptive strings
(e.g. "location" or "department"), *Memgraph* internally converts those names
into property keys which are, essentially, unsigned 16-bit integers.
Property keys are modelled by a not-so-conveniently named class called
`Property` which can be found in `storage/types.hpp`. The actual conversion
between property names and property keys is done within the `ConcurrentIdMapper`
but the internals of that implementation are out of scope for understanding
property storage.
## PropertyValueStore
Both `Edge` and `Vertex` objects contain an instance of `PropertyValueStore`
object which is responsible for storing properties of a corresponding graph
element.
An interface of `PropertyValueStore` is as follows:
Method | Description
-----------|------------
`at` | Returns the `PropertyValue` for a given `Property` (key).
`set` | Stores a given `PropertyValue` under a given `Property` (key).
`erase` | Deletes a given `Property` (key) alongside its corresponding `PropertyValue`.
`clear` | Clears the storage.
`iterator`| Provides an extension of `std::input_iterator` that iterates over storage.
## Storage location
By default, *Memgraph* is an in-memory database and all properties are therefore
stored in working memory unless specified otherwise by the user. User has an
option to specify via the command line which properties they wish to be stored
on disk.
Storage location of each property is encapsulated within a `Property` object
which is ensured by the `ConcurrentIdMapper`. More precisely, the unsigned 16-bit
property key has the following format:
```
|---location--|------id------|
|-Memory|Disk-|-----2^15-----|
```
In other words, the most significant bit determines the location where the
property will be stored.
### In-memory storage
The underlying implementation of in-memory storage for the time being is
`std::vector<std::pair<Property, PropertyValue>>`. Implementations of`at`, `set`
and `erase` are linear in time. This implementation is arguably more efficient
than `std::map` or `std::unordered_map` when the average number of properties of
a record is relatively small (up to 10) which seems to be the case.
### On-disk storage
#### KVStore
Disk storage is modeled by an abstraction of key-value storage as implemented in
`storage/kvstore.hpp'. An interface of this abstraction is as follows:
Method | Description
----------------|------------
`Put` | Stores the given value under the given key.
`Get` | Obtains the given value stored under the given key.
`Delete` | Deletes a given (key, value) pair from storage..
`DeletePrefix` | Deletes all (key, value) pairs where key begins with a given prefix.
`Size` | Returns the size of the storage or, optionally, the number of stored pairs that begin with a given prefix.
`iterator` | Provides an extension of `std::input_iterator` that iterates over storage.
Keys and values in this context are of type `std::string`.
The actual underlying implementation of this abstraction uses
[RocksDB]{https://rocksdb.org} &mdash; a persistent key-value store for fast
storage.
It is worthy to note that the custom iterator implementation allows the user
to iterate over a given prefix. Otherwise, the implementation follows familiar
c++ constructs and can be used as follows:
```
KVStore storage = ...;
for (auto it = storage.begin(); it != storage.end(); ++it) {}
for (auto kv : storage) {}
for (auto it = storage.begin("prefix"); it != storage.end("prefix"); ++it) {}
```
Note that it is not possible to scan over multiple prefixes. For instance, one
might assume that you can scan over all keys that fall in a certain
lexicographical range. Unfortunately, that is not the case and running the
following code will result in an infinite loop with a touch of undefined
behavior.
```
KVStore storage = ...;
for (auto it = storage.begin("alpha"); it != storage.end("omega"); ++it) {}
```
#### Data organization on disk
Each `PropertyValueStore` instance can access a static `KVStore` object that can
store `(key, value)` pairs on disk. The key of each property on disk consists of
two parts &mdash; a unique identifier (unsigned 64-bit integer) of the current
record version (see mvcc docummentation for further clarification) and a
property key as described above. The actual value of the property is serialized
into a bytestring using bolt `BaseEncoder`. Similarly, deserialization is
performed by bolt `Decoder`.

78
docs/dev/quick-start.md Normal file
View File

@ -0,0 +1,78 @@
# Quick Start
A short chapter on downloading the Memgraph source, compiling and running.
## Obtaining the Source Code
Memgraph uses `git` for source version control. You will need to install `git`
on your machine before you can download the source code.
On Debian systems, you can do it inside a terminal with the following
command:
sudo apt-get install git
On ArchLinux or Gentoo, you probably already know what to do.
After installing `git`, you are now ready to fetch your own copy of Memgraph
source code. Run the following command:
git clone https://phabricator.memgraph.io/diffusion/MG/memgraph.git
The above will create a `memgraph` directory and put all source code there.
## Compiling Memgraph
With the source code, you are now ready to compile Memgraph. Well... Not
quite. You'll need to download Memgraph's dependencies first.
In your terminal, position yourself in the obtained memgraph directory.
cd memgraph
### Installing Dependencies
On Debian systems, all of the dependencies should be setup by running the
`init` script:
./init -s
Currently, other systems aren't supported in the `init` script. But you can
issue the needed steps manually. First run the `init` script.
./init
The script will output the required packages, which you should be able to
install via your favorite package manager. For example, `pacman` on ArchLinux.
After installing the packages, issue the following commands:
mkdir -p build
./libs/setups.sh
### Compiling
With all of the dependencies installed, you need to configure the build
system. To do that, execute the following:
cd build
cmake ..
If everything went OK, you can now, finally, compile Memgraph.
make -j$(nproc)
### Running
After the compilation verify that Memgraph works:
./memgraph --version
To make extra sure, run the unit tests:
ctest -R unit -j$(nproc)
## Problems
If you have any trouble running the above commands, contact your nearest
developer who successfully built Memgraph. Ask for help and insist on getting
this document updated with correct steps!

View File

@ -0,0 +1,152 @@
# Bootstrapping Compilation Toolchain for Memgraph
Requirements:
* libstdc++ shipped with gcc-6.3 or gcc-6.4
* cmake >= 3.1, Debian Stretch uses cmake-3.7.2
* clang-3.9
## Installing gcc-6.4
gcc-6.3 has a bug, so use the 6.4 version which is just a bugfix release.
Requirements on CentOS 7:
* wget
* make
* gcc (bootstrap)
* gcc-c++ (bootstrap)
* gmp-devel (bootstrap)
* mpfr-devel (bootstrap)
* libmpc-devel (bootstrap)
* zip
* perl
* dejagnu (testing)
* expect (testing)
* tcl (testing)
```
wget ftp://ftp.mpi-sb.mpg.de/pub/gnu/mirror/gcc.gnu.org/pub/gcc/releases/gcc-6.4.0/gcc-6.4.0.tar.gz
tar xf gcc-6.4.0.tar.gz
cd gcc-6.4.0
mkdir build
cd build
../configure --disable-multilib --prefix=<install-dst>
make
# Testing
make -k check
make install
```
*Do not put gcc + libs on PATH* (unless you know what you are doing).
## Installing cmake-3.7.2
Requirements on CentOS 7:
* wget
* make
* gcc
* gcc-c++
* ncurses-devel (optional, for ccmake)
```
wget https://cmake.org/files/v3.7/cmake-3.7.2.tar.gz
tar xf cmake-3.7.2.tar.gz
cd cmake-3.7.2.tar.gz
./bootstrap --prefix<install-dst>
make
make install
```
Put cmake on PATH (if appropriate)
**Fix the bug in CpackRPM**
`"<path-to-cmake>/share/cmake-3.7/Modules/CPackRPM.cmake" line 2273 of 2442`
The line
```
set(RPMBUILD_FLAGS "-bb")
```
needs to be before
```
if(CPACK_RPM_GENERATE_USER_BINARY_SPECFILE_TEMPLATE OR NOT CPACK_RPM_USER_BINARY_SPECFILE)
```
It was probably accidentally placed after, and is fixed in later cmake
releases.
## Installing clang-3.9
Requirements on CentOS 7:
* wget
* make
* cmake
```
wget http://releases.llvm.org/3.9.1/llvm-3.9.1.src.tar.xz
tar xf llvm-3.9.1.src.tar.xz
mv llvm-3.9.1.src llvm
wget http://releases.llvm.org/3.9.1/cfe-3.9.1.src.tar.xz
tar xf cfe-3.9.1.src.tar.xz
mv cfe-3.9.1.src llvm/tools/clang
cd llvm
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE="Release" -DGCC_INSTALL_PREFIX=<gcc-dir> \
-DCMAKE_C_COMPILER=<gcc> -DCMAKE_CXX_COMPILER=<g++> \
-DCMAKE_CXX_LINK_FLAGS="-L<gcc-dir>/lib64 -Wl,-rpath,<gcc-dir>/lib64" \
-DCMAKE_INSTALL_PREFIX=<install-dst> ..
make
# Testing
make check-clang
make install
```
Put clang on PATH (if appropriate)
## Memgraph
Requirements on CentOS 7:
* libuuid-devel (antlr4)
* java-1.8.0-openjdk (antlr4)
* boost-static (too low version --- compile manually)
* rpm-build (RPM)
* python3 (tests, ...)
* which (required for rocksdb)
* sbcl (lisp C++ preprocessing)
### Boost 1.62
```
wget https://netix.dl.sourceforge.net/project/boost/boost/1.62.0/boost_1_62_0.tar.gz
tar xf boost_1_62_0.tar.gz
cd boost_1_62_0
./bootstrap.sh --with-toolset=clang --with-libraries=iostreams,serialization --prefix=<install-dst>
./b2
# Default installs to /usr/local/
./b2 install
```
### Building Memgraph
clang is *required* to be findable by cmake, i.e. it should be on PATH.
cmake isn't required to be on the path, since you run it manually, so can use
the full path to executable in order to run it. Obviously, it is convenient to
put cmake also on PATH.
Building is done as explained in [Quick Start](quick-start.md), but each
`make` invocation needs to be prepended with:
`LD_RUN_PATH=<gcc-dir>/lib64 make ...`
### RPM
Name format: `memgraph-<version>-<pkg-version>.<arch>.rpm`

177
docs/dev/workflow.md Normal file
View File

@ -0,0 +1,177 @@
# Memgraph Workflow
This chapter describes the usual workflow for working on Memgraph.
## Git
Memgraph uses [git](https://git-scm.com/) for source version control. If you
obtained the source, you probably already have it installed. Before you can
track new changes, you need to setup some basic information.
First, tell git your name:
git config --global user.name "FirstName LastName"
Then, set your Memgraph email:
git config --global user.email "my.email@memgraph.com"
Finally, make git aware of your favourite editor:
git config --global core.editor "vim"
## Phabricator
All of the code in Memgraph needs to go through code review before it can be
accepted in the codebase. This is done through
[Phabricator](https://phacility.com/phabricator/). The command line tool for
interfacing with Phabricator is
[arcanist](https://phacility.com/phabricator/arcanist/). You should already
have it installed if you followed the steps in [Quick Start](quick-start.md).
The only required setup is to go in the root of Memgraph's project and run:
arc install-certificate
## Working on Your Feature Branch
Git has a concept of source code *branches*. The `master` branch contains all
of the changes which were reviewed and accepted in Memgraph's code base. The
`master` branch is selected by default.
### Creating a Branch
When working on a new feature or fixing a bug, you should create a new branch
out of the `master` branch. For example, let's say you are adding static type
checking to the query language compiler. You would create a branch called
`mg_query_static_typing` with the following command:
git branch mg_query_static_typing
To switch to that branch, type:
git checkout mg_query_static_typing
Since doing these two steps will happen often, you can use a shortcut command:
git checkout -b mg_query_static_typing
Note that a branch is created from the currently selected branch. So, if you
wish to create another branch from `master` you need to switch to `master`
first.
The usual convention for naming your branches is `mg_<feature_name>`, you may
switch underscores ('\_') for hyphens ('-').
Do take care not to mix the case of your branch names! Certain operating
systems (like Windows) don't distinguish the casing in git branches. This may
cause hard to track down issues when trying to switch branches. Therefore, you
should always name your branches with lowercase letters.
### Making and Committing Changes
When you have a branch for your new addition, you can now actually start
implementing it. After some amount of time, you may have created new files,
modified others and maybe even deleted unused files. You need to tell git to
track those changes. This is accomplished with `git add` and `git rm`
commands.
git add path-to-new-file path-to-modified-file
git rm path-to-deleted-file
To check that everything is correctly tracked, you may use the `git status`
command. It will also print the name of the currently selected branch.
If everything seems OK, you should commit these changes to git.
git commit
You will be presented with an editor where you need to type the commit
message. Writing a good commit message is an art in itself. You should take a
look at the links below. We try to follow these conventions as much as
possible.
* [How to Write a Git Commit Message](http://chris.beams.io/posts/git-commit/)
* [A Note About Git Commit Messages](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html)
* [stopwritingramblingcommitmessages](http://stopwritingramblingcommitmessages.com/)
### Sending Changes on a Review
After finishing your work on your feature branch, you will want to send it on
code review. This is done through Arcanist. To do that, run the following
command:
arc diff
You will, once again, be presented with an editor where you need to describe
your whole work. `arc` will by default fill that description with your commit
messages. The title and summary of your work should also follow the
conventions of git messages as described above. If you followed the
guidelines, the message filled by `arc` should be fine.
In addition to the message, you need to fill the `Reviewers:` line with
usernames of people who should do the code review.
You changes will be visible on Phabricator as a so called "diff". You can find
the default view of active diffs
[here](https://phabricator.memgraph.io/differential/)
### Updating Changes Based on Review
When you get comments in the code review, you will want to make additional
modifications to your work. The same workflow as before applies: [Making and
Committing Changes](#making-and-committing-changes)
After making those changes, send them back on code review:
arc diff
### Updating From New Master
Let's say that, while you were working, someone else added some new features
to the codebase that you would like to use in your current work. To obtain
those changes you should update your `master` branch:
git checkout master
git pull origin master
Now, these changes are on `master`, but you want them in your local branch. To
do that, use `git rebase`:
git checkout mg_query_static_typing
git rebase master
During `git rebase`, you may get reports that some files have conflicting
changes. If you need help resolving them, don't be afraid to ask around! After
you've resolved them, mark them as done with `git add` command. You may
then continue with `git rebase --continue`.
After the `git rebase` is done, you will now have new changes from `master` on
your feature branch as if you just created and started working on that branch.
You may continue with the usual workflow of [Making and Committing
Changes](#making-and-committing-changes) and [Sending Changes on a
Review](#sending-changes-on-a-review).
### Sending Your Changes on Master Branch
When your changes pass the code review, you are ready to integrate them in the
`master` branch. To do that, run the following command:
arc land
Arcanist will take care of obtaining the latest changes from `master` and
merging your changes on top. If the `land` was successful, Arcanist will
delete your local branch and you will be back on `master`. Continuing from the
examples above, the deleted branch would be `mg_query_static_typing`.
This marks the completion of your changes, and you are ready to work on
something else.
### Note For People Familiar With Git
Since Arcanist takes care of merging your git commits and pushing them on
`master`, you should *never* have to call `git merge` and `git push`. If you
find yourself typing those commands, check that you are doing the right thing.
The most common mistake is to use `git merge` instead of `git rebase` for the
case described in [Updating From New Master](#updating-from-new-master).

View File

@ -1,6 +1,7 @@
# Memgraph Code Documentation
IMPORTANT: Auto-generated (run doxygen Doxyfile in the project root).
IMPORTANT: auto-generated (run doxygen Doxyfile in the project root)
* HTML - Open docs/doxygen/html/index.html.
* Latex - Run make inside docs/doxygen/latex.
* HTML - just open docs/doxygen/html/index.html
* Latex - run make inside docs/doxygen/latex

View File

@ -0,0 +1,20 @@
# Kafka - data extractor
The data extractor is responsible for loading data from Kafka. In order to do
so, it needs to know the URI of the Kafka leader broker. Once the extractor
connects to Kafka, it starts importing data.
Data extractor depends on [cppkafka](https://github.com/mfontanini/cppkafka)
which makes message consumption just a few API calls, as seen
[here](https://github.com/mfontanini/cppkafka/wiki/Consuming-messages).
There are also other metadata that can be passed to data extractor that are
defined with our [extension](opencypher.md) of openCypher.
A full list of configurable metadata can be found
[here](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md).
Memgraph supports customizing the following:
* `metadata.broker.list` which is a required parameter, set by `KAFKA 'URI'`
* `queue.buffering.max.ms` set by `BATCH INTERVAL`

View File

@ -0,0 +1,52 @@
# Kafka - openCypher clause
One must be able to specify the following when importing data from Kafka:
* Kafka URI
* Transform [script](transform.md) URI
Kafka endpoint is the URI of the leader broker and it is required for data
[extractor](extractor.md).
Minimum required syntax looks like:
```opencypher
CREATE STREAM kafka_stream AS LOAD DATA KAFKA '127.0.0.1/topic' WITH TRANSFORM
'127.0.0.1/transform.py';
```
The `CREATE STREAM` clause happens in a transaction.
The full openCypher clause for creating a stream is:
```opencypher
CREATE STREAM stream_name AS
LOAD DATA KAFKA 'URI'
WITH TRANSFORM 'URI'
[BATCH INTERVAL milliseconds]
```
The `WITH TRANSFORM` parameter should contain a URI of the transform script.
The `BATCH_INTERVAL` parameter defines the time interval in milliseconds
that defines the time between two successive stream importing operations.
The `DROP` clause deletes a stream:
```opencypher
DROP STREAM stream_name;
```
The `SHOW` clause enables you to see all configured streams:
```opencypher
SHOW STREAMS;
```
You can also start/stop streams with the `START` and `STOP` clauses:
```opencypher
START STREAM stream_name [LIMIT count BATCHES];
STOP STREAM stream_name;
```
There are also convenience clauses to start and stop all streams:
```opencypher
START ALL STREAMS;
STOP ALL STREAMS;
```

View File

@ -0,0 +1,52 @@
# Kafka - data transform
The transform script is a user defined script written in Python. The script
should be aware of the data format in the Kafka message.
Each Kafka message is byte length encoded, which means that the first eight
bytes of each message contain the length of the message.
More on the message format can be seen
[here](https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-Messagesets).
The script will be embedded in our C++ codebase using pythons
[embedding](https://docs.python.org/3.5/extending/embedding.html) feature.
A sample code for a streaming transform script could look like this:
```python
import struct
import sys
def get_records():
while True:
message_len = sys.stdin.read(8)
if len(message_len) == 8:
message_len = struct.unpack("L", message_len)[0]
record = sys.stdin.read(message_len)
yield record
else:
assert len(message_len) == 0, message_len
return
def create_vertex(fields):
return "CREATE (n:Node {{id: {}}})".format(fields[1])
def create_edge(fields):
return "MATCH (n:Node {{id: {}}}) "\
"MATCH ((m:Node {{id : {}}})) "\
"CREATE (n)-[e:Edge{{value: {}}}]->(m) "\
.format(fields[1], fields[2], fields[3])
for record in get_records():
fields = record.split("\t")
if fields[0] == "v":
return create_vertex(fields):
else:
return create_edge(fields)
```
The script should output openCypher query strings based on the type of the
records.

View File

@ -0,0 +1,23 @@
# Technical Documentation
## About Memgraph
Memgraph is an ACID compliant high performance transactional in-memory graph
database management system featuring highly concurrent
data structures, multi-version concurrency control and asynchronous IO.
[//]: # (When adding a new documentation file, please add it to the list)
## Contents
* [About Memgraph](#about-memgraph)
* [Quick Start](quick-start.md)
* [Examples](examples.md)
* [Drivers](drivers.md)
* [Data Storage](storage.md)
* [openCypher Query Language](open-cypher.md)
* [Import Tools](import-tools.md)
* [Concepts](concepts.md)
* [Upcoming Features](upcoming-features.md)
[//]: # (Nothing should go below the contents section)

View File

@ -0,0 +1,58 @@
#!/bin/bash
working_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
build_dir=.bundle_community
# curl and php are required
if ! which php >/dev/null; then
echo "php is required!"
exit 1
fi
cd ${working_dir}
generator="couscous.phar"
# download the generator if it's not already downloaded
if [ ! -f ${generator} ]; then
curl -OS http://couscous.io/${generator}
fi
# create build directory
if [ -d $build_dir ]; then
rm -rf $build_dir
fi
mkdir $build_dir
# copy all files to build directory
cp couscous.yml *.md $generator $build_dir
# go to build directory
cd $build_dir
# modify config file
cat >> couscous.yml <<EOF
template:
directory: .
EOF
# create template file
echo "{{ content|raw }}" > default.twig
# generate the site
php ${generator} generate
# create contents file
cd .couscous/generated
tail -n +$( grep -n "Contents" index.html | cut -d ':' -f 1 ) index.html | \
grep href | grep .html | \
sed -r 's/^[^"]+"//' | sed -r 's/.html">/ /' | sed -r 's/<.+//' > .contents
rm index.html
# create archive
target=docs.tar.gz
if [ -f $target ]; then
rm $target
fi
tar -czf $working_dir/$target .contents *.html
echo "Created archive: $working_dir/$target"

View File

@ -0,0 +1,72 @@
## Concepts
### Weighted Shortest Path
Weighted shortest path problem is the problem of finding a path between two
nodes in a graph such that the sum of the weights of edges connecting nodes on
the path is minimized.
More about the *weighted shortest path* problem can be found
[here](https://en.wikipedia.org/wiki/Shortest_path_problem).
## Implementation
Our implementation of the *weighted shortest path* algorithm uses a modified
version of Dijkstra's algorithm that can handle length restriction. The length
restriction parameter is optional, and when it's not set it could increase the
complexity of the algorithm.
A sample query that finds a shortest path between two nodes can look like this:
```opencypher
MATCH (a {id: 723})-[edge_list *wShortest 10 (e, n | e.weight) total_weight]-(b {id: 882}) RETURN *
```
This query has an upper bound length restriction set to `10`. This means that no
path that traverses more than `10` edges will be considered as a valid result.
#### Upper Bound Implications
Since the upper bound parameter is optional, we can have different results based
on this parameter.
Lets take a look at the following graph and queries.
```
5 5
/-----[1]-----\
/ \
/ \ 2
[0] [4]---------[5]
\ /
\ /
\--[2]---[3]--/
3 3 3
```
```opencypher
MATCH (a {id: 0})-[edge_list *wShortest 3 (e, n | e.weight) total_weight]-(b {id: 5}) RETURN *
```
```opencypher
MATCH (a {id: 0})-[edge_list *wShortest (e, n | e.weight) total_weight]-(b {id: 5}) RETURN *
```
The first query will try to find the weighted shortest path between nodes `0`
and `5` with the restriction on the path length set to `3`, and the second query
will try to find the weighted shortest path with no restriction on the path
length.
The expected result for the first query is `0 -> 1 -> 4 -> 5` with total cost of
`12`, while the expected result for the second query is `0 -> 2 -> 3 -> 4 -> 5`
with total cost of `11`. Obviously, the second query can find the true shortest
path because it has no restrictions on the length.
To handle cases when the length restriction is set, *weighted shortest path*
algorithm uses both vertex and distance as the state. This causes the search
space to increase by the factor of the given upper bound. On the other hand, not
setting the upper bound parameter, the search space might contain the whole
graph.
Because of this, one should always try to narrow down the upper bound limit to
be as precise as possible in order to have a more performant query.

View File

@ -0,0 +1,2 @@
title: Memgraph
subTitle: Technical Documentation

View File

@ -0,0 +1,4 @@
## Dynamic Graph Partitioner
Memgraph supports dynamic graph partitioning which improves performance on badly partitioned dataset over workers dynamically. To enable it, use the
```--dynamic_graph_partitioner_enabled``` flag.

View File

@ -0,0 +1,191 @@
## Bolt Drivers
### Python Driver Example
Neo4j officially supports Python for interacting with an openCypher and Bolt
compliant database. For details consult the
[official documentation](http://neo4j.com/docs/api/python-driver) and the
[GitHub project](https://github.com/neo4j/neo4j-python-driver). Following is
a basic usage example:
```python
from neo4j.v1 import GraphDatabase, basic_auth
# Initialize and configure the driver.
# * provide the correct URL where Memgraph is reachable;
# * use an empty user name and password.
driver = GraphDatabase.driver("bolt://localhost:7687",
auth=basic_auth("", ""))
# Start a session in which queries are executed.
session = driver.session()
# Execute openCypher queries.
# After each query, call either `consume()` or `data()`
session.run('CREATE (alice:Person {name: "Alice", age: 22})').consume()
# Get all the vertices from the database (potentially multiple rows).
vertices = session.run('MATCH (n) RETURN n').data()
# Assuming we started with an empty database, we should have Alice
# as the only row in the results.
only_row = vertices.pop()
alice = only_row["n"]
# Print out what we retrieved.
print("Found a vertex with labels '{}', name '{}' and age {}".format(
alice['name'], alice.labels, alice['age'])
# Remove all the data from the database.
session.run('MATCH (n) DETACH DELETE n').consume()
# Close the session and the driver.
session.close()
driver.close()
```
### Java Driver Example
The details about Java driver can be found
[on GitHub](https://github.com/neo4j/neo4j-java-driver).
The example below is equivalent to Python example. Major difference is that
`Config` object has to be created before the driver construction.
```java
import org.neo4j.driver.v1.*;
import org.neo4j.driver.v1.types.*;
import static org.neo4j.driver.v1.Values.parameters;
import java.util.*;
public class JavaQuickStart {
public static void main(String[] args) {
// Initialize driver.
Config config = Config.build().toConfig();
Driver driver = GraphDatabase.driver("bolt://localhost:7687",
AuthTokens.basic("",""),
config);
// Execute basic queries.
try (Session session = driver.session()) {
StatementResult rs1 = session.run("MATCH (n) DETACH DELETE n");
StatementResult rs2 = session.run(
"CREATE (alice: Person {name: 'Alice', age: 22})");
StatementResult rs3 = session.run( "MATCH (n) RETURN n");
List<Record> records = rs3.list();
Record record = records.get(0);
Node node = record.get("n").asNode();
System.out.println(node.get("name").asString());
} catch (Exception e) {
System.out.println(e);
System.exit(1);
}
// Cleanup.
driver.close();
}
}
```
### Javascript Driver Example
The details about Javascript driver can be found
[on GitHub](https://github.com/neo4j/neo4j-javascript-driver).
The Javascript example below is equivalent to Python and Java examples.
Here is an example related to `Node.js`. Memgraph doesn't have integrated
support for `WebSocket` which is required during the execution in any web
browser. If you want to run `openCypher` queries from a web browser,
[websockify](https://github.com/novnc/websockify) has to be up and running.
Requests from web browsers are wrapped into `WebSocket` messages, and a proxy
is needed to handle the overhead. The proxy has to be configured to point out
to Memgraph's Bolt port and web browser driver has to send requests to the
proxy port.
```javascript
var neo4j = require('neo4j-driver').v1;
var driver = neo4j.driver("bolt://localhost:7687",
neo4j.auth.basic("neo4j", "1234"));
var session = driver.session();
function die() {
session.close();
driver.close();
}
function run_query(query, callback) {
var run = session.run(query, {});
run.then(callback).catch(function (error) {
console.log(error);
die();
});
}
run_query("MATCH (n) DETACH DELETE n", function (result) {
console.log("Database cleared.");
run_query("CREATE (alice: Person {name: 'Alice', age: 22})", function (result) {
console.log("Record created.");
run_query("MATCH (n) RETURN n", function (result) {
console.log("Record matched.");
var alice = result.records[0].get("n");
console.log(alice.labels[0]);
console.log(alice.properties["name"]);
session.close();
driver.close();
});
});
});
```
### C# Driver Example
The C# driver is hosted
[on GitHub](https://github.com/neo4j/neo4j-dotnet-driver). The example below
performs the same work as all of the previous examples.
```csh
using System;
using System.Linq;
using Neo4j.Driver.V1;
public class Basic {
public static void Main(string[] args) {
// Initialize the driver.
var config = Config.DefaultConfig;
using(var driver = GraphDatabase.Driver("bolt://localhost:7687", AuthTokens.None, config))
using(var session = driver.Session())
{
// Run basic queries.
session.Run("MATCH (n) DETACH DELETE n").Consume();
session.Run("CREATE (alice:Person {name: \"Alice\", age: 22})").Consume();
var result = session.Run("MATCH (n) RETURN n").First();
var alice = (INode) result["n"];
Console.WriteLine(alice["name"]);
Console.WriteLine(string.Join(", ", alice.Labels));
Console.WriteLine(alice["age"]);
}
Console.WriteLine("All ok!");
}
}
```
### Secure Sockets Layer (SSL)
Secure connections are supported and enabled by default. The server initially
ships with a self-signed testing certificate. The certificate can be replaced
by editing the following parameters in `/etc/memgraph/memgraph.conf`:
```
--cert-file=/path/to/ssl/certificate.pem
--key-file=/path/to/ssl/privatekey.pem
```
To disable SSL support and use insecure connections to the database you should
set both parameters (`--cert-file` and `--key-file`) to empty values.
### Limitations
Memgraph is currently in early stage, and has a number of limitations we plan
to remove in future versions.
#### Multiple Users & Authorization
Memgraph is currently single-user only. There is no way to control user
privileges. The default user has read and write privileges over the whole
database.

View File

@ -0,0 +1,519 @@
## Examples
This chapter shows you how to use Memgraph on real-world data and how to get
interesting and useful information out of it.
### TED Talks Example
[TED](https://www.ted.com/) is a nonprofit organization devoted to spreading
ideas, usually in the form of short, powerful talks.
Today, TED talks are influential videos from expert speakers on almost all
topics &mdash; from science to business to global issues.
Here we present a small dataset which consists of 97 talks. We'll show you how
to model this data as a graph and demonstrate a few example queries.
#### Data Model
Each TED talk has a main speaker, so we
identify two types of nodes &mdash; `Talk` and `Speaker`. Also, we will add
an edge of type `Gave` pointing to a `Talk` from its main `Speaker`.
Each speaker has a name so we can add property `name` to `Speaker` node.
Likewise, we'll add properties `name`, `title` and `description` to node
`Talk`. Furthermore, each talk is given in a specific TED event, so we can
create node `Event` with property `name` and relationship `InEvent` between
talk and event.
Talks are tagged with keywords to facilitate searching, hence we
add node `Tag` with property `name` and relationship `HasTag` between talk and
tag. Moreover, users give ratings to each talk by selecting up to three
predefined string values. Therefore we add node `Rating` with these values as
property `name` and relationship`HasRating` with property `user_count` between
talk and rating nodes.
#### Example Queries
We have prepared a database snapshot for this example, so you can easily import
it when starting Memgraph using the `--durability-directory` option.
```bash
/usr/lib/memgraph/memgraph --durability-directory /usr/share/memgraph/examples/TEDTalk \
--durability-enabled=false --snapshot-on-exit=false
```
When using Memgraph installed from DEB or RPM package, you may need to stop
the currently running Memgraph server before you can import the example. Use
the following command:
```bash
systemctl stop memgraph
```
When using Docker, you can import the example with the following command:
```bash
docker run -p 7687:7687 \
-v mg_lib:/var/lib/memgraph -v mg_log:/var/log/memgraph -v mg_etc:/etc/memgraph \
memgraph --durability-directory /usr/share/memgraph/examples/TEDTalk \
--durability-enabled=false --snapshot-on-exit=false
```
Now you're ready to try out some of the following queries.
NOTE: If you modify the dataset, the changes will stay only during this run of
Memgraph.
1) Find all talks given by specific speaker:
```opencypher
MATCH (n:Speaker {name: "Hans Rosling"})-[:Gave]->(m:Talk)
RETURN m.title;
```
2) Find the top 20 speakers with most talks given:
```opencypher
MATCH (n:Speaker)-[:Gave]->(m)
RETURN n.name, COUNT(m) as TalksGiven
ORDER BY TalksGiven DESC LIMIT 20;
```
3) Find talks related by tag to specific talk and count them:
```opencypher
MATCH (n:Talk {name: "Michael Green: Why we should build wooden skyscrapers"})
-[:HasTag]->(t:Tag)<-[:HasTag]-(m:Talk)
WITH * ORDER BY m.name
RETURN t.name, COLLECT(m.name), COUNT(m) AS TalksCount
ORDER BY TalksCount DESC;
```
4) Find 20 most frequently used tags:
```opencypher
MATCH (t:Tag)<-[:HasTag]-(n:Talk)
RETURN t.name as Tag, COUNT(n) AS TalksCount
ORDER BY TalksCount DESC, Tag LIMIT 20;
```
5) Find 20 talks most rated as "Funny". If you want to query by other ratings,
possible values are: Obnoxious, Jaw-dropping, OK, Persuasive, Beautiful,
Confusing, Longwinded, Unconvincing, Fascinating, Ingenious, Courageous, Funny,
Informative and Inspiring.
```opencypher
MATCH (r:Rating{name:"Funny"})<-[e:HasRating]-(m:Talk)
RETURN m.name, e.user_count ORDER BY e.user_count DESC LIMIT 20;
```
6) Find inspiring talks and their speakers from the field of technology:
```opencypher
MATCH (n:Talk)-[:HasTag]->(m:Tag {name: "technology"})
MATCH (n)-[r:HasRating]->(p:Rating {name: "Inspiring"})
MATCH (n)<-[:Gave]-(s:Speaker)
WHERE r.user_count > 1000
RETURN n.title, s.name, r.user_count ORDER BY r.user_count DESC;
```
7) Now let's see one real-world example &mdash; how to make a real-time
recommendation. If you've just watched a talk from a certain
speaker (e.g. Hans Rosling) you might be interested in finding more talks from
the same speaker on a similar topic:
```opencypher
MATCH (n:Speaker {name: "Hans Rosling"})-[:Gave]->(m:Talk)
MATCH (t:Talk {title: "New insights on poverty"})-[:HasTag]->(tag:Tag)<-[:HasTag]-(m)
WITH * ORDER BY tag.name
RETURN m.title as Title, COLLECT(tag.name), COUNT(tag) as TagCount
ORDER BY TagCount DESC, Title;
```
The following few queries are focused on extracting information about
TED events.
8) Find how many talks were given per event:
```opencypher
MATCH (n:Event)<-[:InEvent]-(t:Talk)
RETURN n.name as Event, COUNT(t) AS TalksCount
ORDER BY TalksCount DESC, Event
LIMIT 20;
```
9) Find the most popular tags in the specific event:
```opencypher
MATCH (n:Event {name:"TED2006"})<-[:InEvent]-(t:Talk)-[:HasTag]->(tag:Tag)
RETURN tag.name as Tag, COUNT(t) AS TalksCount
ORDER BY TalksCount DESC, Tag
LIMIT 20;
```
10) Discover which speakers participated in more than 2 events:
```opencypher
MATCH (n:Speaker)-[:Gave]->(t:Talk)-[:InEvent]->(e:Event)
WITH n, COUNT(e) AS EventsCount WHERE EventsCount > 2
RETURN n.name as Speaker, EventsCount
ORDER BY EventsCount DESC, Speaker;
```
11) For each speaker search for other speakers that participated in same
events:
```opencypher
MATCH (n:Speaker)-[:Gave]->()-[:InEvent]->(e:Event)<-[:InEvent]-()<-[:Gave]-(m:Speaker)
WHERE n.name != m.name
WITH DISTINCT n, m ORDER BY m.name
RETURN n.name AS Speaker, COLLECT(m.name) AS Others
ORDER BY Speaker;
```
### Football Example
[Football](https://en.wikipedia.org/wiki/Association_football)
(soccer for the heathens) is a team sport played between two teams of eleven
players with a spherical ball. The game is played on a rectangular pitch with
a goal at each and. The object of the game is to score by moving the ball
beyond the goal line into the opposing goal. The game is played by more than
250 million players in over 200 countries, making it the world's most
popular sport.
In this example, we will present a graph model of a reasonably sized dataset
of football matches across world's most popular leagues.
#### Data Model
In essence, we are trying to model a set of football matches. All information
about a single match is going to be contained in three nodes and two edges.
Two of the nodes will represent the teams that have played the match, while the
third node will represent the game itself. Both edges are directed from the
team nodes to the game node and are labeled as `:Played`.
Let us consider a real life example of this model&mdash;Arsene Wenger's 1000th.
game in charge of Arsenal. This was a regular fixture of a 2013/2014
English Premier League, yet it was written in the stars that this historic
moment would be a big London derby against Chelsea on Stanford Bridge. The
sketch below shows how this game is being modeled in our database.
```
+---------------+ +-----------------------------+
|n: Team | |w: Game |
| |-[:Played {side: "home", outcome: "won"}]-->| |
|name: "Chelsea"| |HT_home_score: 4 |
+---------------+ |HT_away_score: 0 |
|HT_result: "H" |
|FT_home_score: 6 |
|FT_away_score: 0 |
|FT_result: "H" |
+---------------+ |date: "2014-03-22" |
|m: Team | |league: "ENG-Premier League" |
| |-[:Played {side: "away", outcome: "lost"}]->|season: 2013 |
|name: "Arsenal"| |referee: "Andre Marriner" |
+---------------+ +-----------------------------+
```
#### Example Queries
We have prepared a database snapshot for this example, so you can easily import
it when starting Memgraph using the `--durability-directory` option.
```bash
/usr/lib/memgraph/memgraph --durability-directory /usr/share/memgraph/examples/football \
--durability-enabled=false --snapshot-on-exit=false
```
When using Docker, you can import the example with the following command:
```bash
docker run -p 7687:7687 \
-v mg_lib:/var/lib/memgraph -v mg_log:/var/log/memgraph -v mg_etc:/etc/memgraph \
memgraph --durability-directory /usr/share/memgraph/examples/football \
--durability-enabled=false --snapshot-on-exit=false
```
Now you're ready to try out some of the following queries.
NOTE: If you modify the dataset, the changes will stay only during this run of
Memgraph.
1) You might wonder, what leagues are supported?
```opencypher
MATCH (n:Game)
RETURN DISTINCT n.league AS League
ORDER BY League;
```
2) We have stored a certain number of seasons for each league. What is the
oldest/newest season we have included?
```opencypher
MATCH (n:Game)
RETURN DISTINCT n.league AS League, MIN(n.season) AS Oldest, MAX(n.season) AS Newest
ORDER BY League;
```
3) You have already seen one game between Chelsea and Arsenal, let's list all of
them in chronological order.
```opencypher
MATCH (n:Team {name: "Chelsea"})-[e:Played]->(w:Game)<-[f:Played]-(m:Team {name: "Arsenal"})
RETURN w.date AS Date, e.side AS Chelsea, f.side AS Arsenal,
w.FT_home_score AS home_score, w.FT_away_score AS away_score
ORDER BY Date;
```
4) How about filtering games in which Chelsea won?
```opencypher
MATCH (n:Team {name: "Chelsea"})-[e:Played {outcome: "won"}]->
(w:Game)<-[f:Played]-(m:Team {name: "Arsenal"})
RETURN w.date AS Date, e.side AS Chelsea, f.side AS Arsenal,
w.FT_home_score AS home_score, w.FT_away_score AS away_score
ORDER BY Date;
```
5) Home field advantage is a thing in football. Let's list the number of home
defeats for each Premier League team in the 2016/2017 season.
```opencypher
MATCH (n:Team)-[:Played {side: "home", outcome: "lost"}]->
(w:Game {league: "ENG-Premier League", season: 2016})
RETURN n.name AS Team, count(w) AS home_defeats
ORDER BY home_defeats, Team;
```
6) At the end of the season the team with the most points wins the league. For
each victory, a team is awarded 3 points and for each draw it is awarded
1 point. Let's find out how many points did reigning champions (Chelsea) have
at the end of 2016/2017 season.
```opencypher
MATCH (n:Team {name: "Chelsea"})-[:Played {outcome: "drew"}]->(w:Game {season: 2016})
WITH n, COUNT(w) AS draw_points
MATCH (n)-[:Played {outcome: "won"}]->(w:Game {season: 2016})
RETURN draw_points + 3 * COUNT(w) AS total_points;
```
7) In fact, why not retrieve the whole table?
```opencypher
MATCH (n)-[:Played {outcome: "drew"}]->(w:Game {league: "ENG-Premier League", season: 2016})
WITH n, COUNT(w) AS draw_points
MATCH (n)-[:Played {outcome: "won"}]->(w:Game {league: "ENG-Premier League", season: 2016})
RETURN n.name AS Team, draw_points + 3 * COUNT(w) AS total_points
ORDER BY total_points DESC;
```
8) People have always debated which of the major leagues is the most exciting.
One basic metric is the average number of goals per game. Let's see the results
at the end of the 2016/2017 season. WARNING: This might shock you.
```opencypher
MATCH (w:Game {season: 2016})
RETURN w.league, AVG(w.FT_home_score) + AVG(w.FT_away_score) AS avg_goals_per_game
ORDER BY avg_goals_per_game DESC;
```
9) Another metric might be the number of comebacks&mdash;games where one side
was winning at half time but were overthrown by the other side by the end
of the match. Let's count such occurrences during all supported seasons across
all supported leagues.
```opencypher
MATCH (g:Game) WHERE
(g.HT_result = "H" AND g.FT_result = "A") OR
(g.HT_result = "A" AND g.FT_result = "H")
RETURN g.league AS League, count(g) AS Comebacks
ORDER BY Comebacks DESC;
```
10) Exciting leagues also tend to be very unpredictable. On that note, let's list
all triplets of teams where, during the course of one season, team A won against
team B, team B won against team C and team C won against team A.
```opencypher
MATCH (a)-[:Played {outcome: "won"}]->(p:Game {league: "ENG-Premier League", season: 2016})<--
(b)-[:Played {outcome: "won"}]->(q:Game {league: "ENG-Premier League", season: 2016})<--
(c)-[:Played {outcome: "won"}]->(r:Game {league: "ENG-Premier League", season: 2016})<--(a)
WHERE p.date < q.date AND q.date < r.date
RETURN a.name AS Team1, b.name AS Team2, c.name AS Team3;
```
### European road network example
In this section we will show how to use some of Memgraph's built-in graph
algorithms. More specifically, we will show how to use breadth-first search
graph traversal algorithm, and Dijkstra's algorithm for finding weighted
shortest paths between nodes in the graph.
#### Data model
One of the most common applications of graph traversal algorithms is driving
route computation, so we will use European road network graph as an example.
The graph consists of 999 major European cities from 39 countries in total.
Each city is connected to the country it belongs to via an edge of type `:In_`.
There are edges of type `:Road` connecting cities less than 500 kilometers
apart. Distance between cities is specified in the `length` property of the
edge.
#### Example queries
We have prepared a database snapshot for this example, so you can easily import
it when starting Memgraph using the `--durability-directory` option.
```bash
/usr/lib/memgraph/memgraph --durability-directory /usr/share/memgraph/examples/Europe \
--durability-enabled=false --snapshot-on-exit=false
```
When using Docker, you can import the example with the following command:
```bash
docker run -p 7687:7687 \
-v mg_lib:/var/lib/memgraph -v mg_log:/var/log/memgraph -v mg_etc:/etc/memgraph \
memgraph --durability-directory /usr/share/memgraph/examples/Europe \
--durability-enabled=false --snapshot-on-exit=false
```
Now you're ready to try out some of the following queries.
NOTE: If you modify the dataset, the changes will stay only during this run of
Memgraph.
Let's start off with a few simple queries.
1) Let's list all of the countries in our road network.
```opencypher
MATCH (c:Country) RETURN c.name ORDER BY c.name;
```
2) Which Croatian cities are in our road network?
```opencypher
MATCH (c:City)-[:In_]->(:Country {name: "Croatia"})
RETURN c.name ORDER BY c.name;
```
3) Which cities in our road network are less than 200 km away from Zagreb?
```opencypher
MATCH (:City {name: "Zagreb"})-[r:Road]->(c:City)
WHERE r.length < 200
RETURN c.name ORDER BY c.name;
```
Now let's try some queries using Memgraph's graph traversal capabilities.
4) Say you want to drive from Zagreb to Paris. You might wonder, what is the
least number of cities you have to visit if you don't want to drive more than
500 kilometers between stops. Since the edges in our road network don't connect
cities that are more than 500 km apart, this is a great use case for the
breadth-first search (BFS) algorithm.
```opencypher
MATCH p = (:City {name: "Zagreb"})
-[:Road * bfs]->
(:City {name: "Paris"})
RETURN nodes(p);
```
5) What if we want to bike to Paris instead of driving? It is unreasonable (and
dangerous!) to bike 500 km per day. Let's limit ourselves to biking no more
than 200 km in one go.
```opencypher
MATCH p = (:City {name: "Zagreb"})
-[:Road * bfs (e, v | e.length <= 200)]->
(:City {name: "Paris"})
RETURN nodes(p);
```
"What is this special syntax?", you might wonder.
`(e, v | e.length <= 200)` is called a *filter lambda*. It's a function that
takes an edge symbol `e` and a vertex symbol `v` and decides whether this edge
and vertex pair should be considered valid in breadth-first expansion by
returning true or false (or nil). In the above example, lambda is returning
true if edge length is not greater than 200, because we don't want to bike more
than 200 km in one go.
6) Let's say we also don't want to visit Vienna on our way to Paris, because we
have a lot of friends there and visiting all of them would take up a lot of our
time. We just have to update our filter lambda.
```opencypher
MATCH p = (:City {name: "Zagreb"})
-[:Road * bfs (e, v | e.length <= 200 AND v.name != "Vienna")]->
(:City {name: "Paris"})
RETURN nodes(p);
```
As you can see, without the additional restriction we could visit 11 cities. If
we want to avoid Vienna, we must visit at least 12 cities.
7) Instead of counting the cities visited, we might want to find the shortest
paths in terms of distance travelled. This is a textbook application of
Dijkstra's algorithm. The following query will return the list of cities on the
shortest path from Zagreb to Paris along with the total length of the path.
```opencypher
MATCH p = (:City {name: "Zagreb"})
-[:Road * wShortest (e, v | e.length) total_weight]->
(:City {name: "Paris"})
RETURN nodes(p) as cities, total_weight;
```
As you can see, the syntax is quite similar to breadth-first search syntax.
Instead of a filter lambda, we need to provide a *weight lambda* and the *total
weight symbol*. Given an edge and vertex pair, weight lambda must return the
cost of expanding to the given vertex using the given edge. The path returned
will have the smallest possible sum of costs and it will be stored in the total
weight symbol. A limitation of Dijkstra's algorithm is that the cost must be
non-negative.
8) We can also combine weight and filter lambdas in the shortest-path query.
Let's say we're interested in the shortest path that doesn't require travelling
more that 200 km in one go for our bike route.
```opencypher
MATCH p = (:City {name: "Zagreb"})
-[:Road * wShortest (e, v | e.length) total_weight (e, v | e.length <= 200)]->
(:City {name: "Paris"})
RETURN nodes(p) as cities, total_weight;
```
9) Let's try and find 10 cities that are furthest away from Zagreb.
```opencypher
MATCH (:City {name: "Zagreb"})
-[:Road * wShortest (e, v | e.length) total_weight]->
(c:City)
RETURN c, total_weight
ORDER BY total_weight DESC LIMIT 10;
```
It is not surprising to see that they are all in Siberia.
To learn more about these algorithms, we suggest you check out their Wikipedia
pages:
* [Breadth-first search](https://en.wikipedia.org/wiki/Breadth-first_search)
* [Dijkstra's algorithm](https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm)
Now you're ready to explore the world of graph databases with Memgraph
by yourself and try it on many more examples and datasets.
### Graph Gists Examples
A nice looking set of small graph examples can be found
[here](https://neo4j.com/graphgists/). You can take any use-case and try to
execute the queries against Memgraph. To clear the database between trying out
examples, execute the query:
```opencypher
MATCH (n) DETACH DELETE n;
```

View File

@ -0,0 +1,118 @@
## Import Tools
Memgraph comes with tools for importing data into the database. Currently,
only import of CSV formatted is supported. We plan to support more formats in
the future.
### CSV Import Tool
CSV data should be in Neo4j CSV compatible format. Detailed format
specification can be found
[here](https://neo4j.com/docs/operations-manual/current/tools/import/file-header-format/).
The import tool is run from the console, using the `mg_import_csv` command.
If you installed Memgraph using Docker, you will need to run the importer
using the following command:
```bash
docker run -v mg_lib:/var/lib/memgraph -v mg_etc:/etc/memgraph -v mg_import:/import-data \
--entrypoint=mg_import_csv memgraph
```
You can pass CSV files containing node data using the `--nodes` option.
Multiple files can be specified by repeating the `--nodes` option. At least
one node file should be specified. Similarly, graph edges (also known as
relationships) are passed via the `--relationships` option. Multiple
relationship files are imported by repeating the option. Unlike nodes,
relationships are not required.
After reading the CSV files, the tool will by default search for the installed
Memgraph configuration. If the configuration is found, the data will be
written in the configured durability directory. If the configuration isn't
found, you will need to use the `--out` option to specify the output file. You
can use the same option to override the default behaviour.
Memgraph will recover the imported data on the next startup by looking in the
durability directory.
For information on other options, run:
```bash
mg_import_csv --help
```
When using Docker, this translates to:
```bash
docker run --entrypoint=mg_import_csv memgraph --help
```
#### Example
Let's import a simple dataset.
Store the following in `comment_nodes.csv`.
```
id:ID(COMMENT_ID),country:string,browser:string,content:string,:LABEL
0,Croatia,Chrome,yes,Message;Comment
1,United Kingdom,Chrome,thanks,Message;Comment
2,Germany,,LOL,Message;Comment
3,France,Firefox,I see,Message;Comment
4,Italy,Internet Explorer,fine,Message;Comment
```
Now, let's add `forum_nodes.csv`.
```
id:ID(FORUM_ID),title:string,:LABEL
0,General,Forum
1,Support,Forum
2,Music,Forum
3,Film,Forum
4,Programming,Forum
```
And finally, set relationships between comments and forums in
`relationships.csv`.
```
:START_ID(COMMENT_ID),:END_ID(FORUM_ID),:TYPE
0,0,POSTED_ON
1,1,POSTED_ON
2,2,POSTED_ON
3,3,POSTED_ON
4,4,POSTED_ON
```
Now, you can import the dataset in Memgraph.
WARNING: Your existing recovery data will be considered obsolete, and Memgraph
will load the new dataset.
Use the following command:
```bash
mg_import_csv --overwrite --nodes=comment_nodes.csv --nodes=forum_nodes.csv --relationships=relationships.csv
```
If using Docker, things are a bit more complicated. First you need to move the
CSV files where the Docker image can see them:
```bash
mkdir -p /var/lib/docker/volumes/mg_import/_data
cp comment_nodes.csv forum_nodes.csv relationships.csv /var/lib/docker/volumes/mg_import/_data
```
Then, run the importer with the following:
```bash
docker run -v mg_lib:/var/lib/memgraph -v mg_etc:/etc/memgraph -v mg_import:/import-data \
--entrypoint=mg_import_csv memgraph \
--overwrite \
--nodes=/import-data/comment_nodes.csv --nodes=/import-data/forum_nodes.csv \
--relationships=/import-data/relationships.csv
```
Next time you run Memgraph, the dataset will be loaded.

View File

@ -0,0 +1,857 @@
## openCypher Query Language
[*openCypher*](http://www.opencypher.org/) is a query language for querying
graph databases. It aims to be intuitive and easy to learn, while
providing a powerful interface for working with graph based data.
*Memgraph* supports most of the commonly used constructs of the language. This
chapter contains the details of implemented features. Additionally,
not yet supported features of the language are listed.
* [Reading Existing Data](#reading-existing-data)
* [Writing New Data](#writing-new-data)
* [Reading & Writing](#reading-amp-writing)
* [Indexing](#indexing)
* [Other Features](#other-features)
### Reading Existing Data
The simplest usage of the language is to find data stored in the
database. For that purpose, the following clauses are offered:
* `MATCH`, which searches for patterns;
* `WHERE`, for filtering the matched data and
* `RETURN`, for defining what will be presented to the user in the result
set.
* `UNION` and `UNION ALL` for combining results from multiple queries.
#### MATCH
This clause is used to obtain data from Memgraph by matching it to a given
pattern. For example, to find each node in the database, you can use the
following query.
```opencypher
MATCH (node) RETURN node
```
Finding connected nodes can be achieved by using the query:
```opencypher
MATCH (node1)-[connection]-(node2) RETURN node1, connection, node2
```
In addition to general pattern matching, you can narrow the search down by
specifying node labels and properties. Similarly, edge types and properties
can also be specified. For example, finding each node labeled as `Person` and
with property `age` being 42, is done with the following query.
```opencypher
MATCH (n :Person {age: 42}) RETURN n
```
While their friends can be found with the following.
```opencypher
MATCH (n :Person {age: 42})-[:FriendOf]-(friend) RETURN friend
```
There are cases when a user needs to find data which is connected by
traversing a path of connections, but the user doesn't know how many
connections need to be traversed. openCypher allows for designating patterns
with *variable path lengths*. Matching such a path is achieved by using the
`*` (*asterisk*) symbol inside the edge element of a pattern. For example,
traversing from `node1` to `node2` by following any number of connections in a
single direction can be achieved with:
```opencypher
MATCH (node1)-[r*]->(node2) RETURN node1, r, node2
```
If paths are very long, finding them could take a long time. To prevent that,
a user can provide the minimum and maximum length of the path. For example,
paths of length between 2 and 4 can be obtained with a query like:
```opencypher
MATCH (node1)-[r*2..4]->(node2) RETURN node1, r, node2
```
It is possible to name patterns in the query and return the resulting paths.
This is especially useful when matching variable length paths:
```opencypher
MATCH path = ()-[r*2..4]->() RETURN path
```
More details on how `MATCH` works can be found
[here](https://neo4j.com/docs/developer-manual/current/cypher/clauses/match/).
The `MATCH` clause can be modified by prepending the `OPTIONAL` keyword.
`OPTIONAL MATCH` clause behaves the same as a regular `MATCH`, but when it
fails to find the pattern, missing parts of the pattern will be filled with
`null` values. Examples can be found
[here](https://neo4j.com/docs/developer-manual/current/cypher/clauses/optional-match/).
#### WHERE
You have already seen that simple filtering can be achieved by using labels
and properties in `MATCH` patterns. When more complex filtering is desired,
you can use `WHERE` paired with `MATCH` or `OPTIONAL MATCH`. For example,
finding each person older than 20 is done with the this query.
```opencypher
MATCH (n :Person) WHERE n.age > 20 RETURN n
```
Additional examples can be found
[here](https://neo4j.com/docs/developer-manual/current/cypher/clauses/where/).
#### RETURN
The `RETURN` clause defines which data should be included in the resulting
set. Basic usage was already shown in the examples for `MATCH` and `WHERE`
clauses. Another feature of `RETURN` is renaming the results using the `AS`
keyword.
Example.
```opencypher
MATCH (n :Person) RETURN n AS people
```
That query would display all nodes under the header named `people` instead of
`n`.
When you want to get everything that was matched, you can use the `*`
(*asterisk*) symbol.
This query:
```opencypher
MATCH (node1)-[connection]-(node2) RETURN *
```
is equivalent to:
```opencypher
MATCH (node1)-[connection]-(node2) RETURN node1, connection, node2
```
`RETURN` can be followed by the `DISTINCT` operator, which will remove
duplicate results. For example, getting unique names of people can be achieved
with:
```opencypher
MATCH (n :Person) RETURN DISTINCT n.name
```
Besides choosing what will be the result and how it will be named, the
`RETURN` clause can also be used to:
* limit results with `LIMIT` sub-clause;
* skip results with `SKIP` sub-clause;
* order results with `ORDER BY` sub-clause and
* perform aggregations (such as `count`).
More details on `RETURN` can be found
[here](https://neo4j.com/docs/developer-manual/current/cypher/clauses/return/).
##### SKIP & LIMIT
These sub-clauses take a number of how many results to skip or limit.
For example, to get the first 3 results you can use this query.
```opencypher
MATCH (n :Person) RETURN n LIMIT 3
```
If you want to get all the results after the first 3, you can use the
following.
```opencypher
MATCH (n :Person) RETURN n SKIP 3
```
The `SKIP` and `LIMIT` can be combined. So for example, to get the 2nd result,
you can do:
```opencypher
MATCH (n :Person) RETURN n SKIP 1 LIMIT 1
```
##### ORDER BY
Since the patterns which are matched can come in any order, it is very useful
to be able to enforce some ordering among the results. In such cases, you can
use the `ORDER BY` sub-clause.
For example, the following query will get all `:Person` nodes and order them
by their names.
```opencypher
MATCH (n :Person) RETURN n ORDER BY n.name
```
By default, ordering will be in the ascending order. To change the order to be
descending, you should append `DESC`.
For example, to order people by their name descending, you can use this query.
```opencypher
MATCH (n :Person) RETURN n ORDER BY n.name DESC
```
You can also order by multiple variables. The results will be sorted by the
first variable listed. If the values are equal, the results are sorted by the
second variable, and so on.
Example. Ordering by first name descending and last name ascending.
```opencypher
MATCH (n :Person) RETURN n ORDER BY n.name DESC, n.lastName
```
Note that `ORDER BY` sees only the variable names as carried over by `RETURN`.
This means that the following will result in an error.
```opencypher
MATCH (n :Person) RETURN old AS new ORDER BY old.name
```
Instead, the `new` variable must be used:
```opencypher
MATCH (n: Person) RETURN old AS new ORDER BY new.name
```
The `ORDER BY` sub-clause may come in handy with `SKIP` and/or `LIMIT`
sub-clauses. For example, to get the oldest person you can use the following.
```opencypher
MATCH (n :Person) RETURN n ORDER BY n.age DESC LIMIT 1
```
##### Aggregating
openCypher has functions for aggregating data. Memgraph currently supports
the following aggregating functions.
* `avg`, for calculating the average.
* `collect`, for collecting multiple values into a single list or map. If given a single expression values are collected into a list. If given two expressions, values are collected into a map where the first expression denotes map keys (must be string values) and the second expression denotes map values.
* `count`, for counting the resulting values.
* `max`, for calculating the maximum result.
* `min`, for calculating the minimum result.
* `sum`, for getting the sum of numeric results.
Example, calculating the average age:
```opencypher
MATCH (n :Person) RETURN avg(n.age) AS averageAge
```
Collecting items into a list:
```opencypher
MATCH (n :Person) RETURN collect(n.name) AS list_of_names
```
Collecting items into a map:
```opencypher
MATCH (n :Person) RETURN collect(n.name, n.age) AS map_name_to_age
```
Click
[here](https://neo4j.com/docs/developer-manual/current/cypher/functions/aggregating/)
for additional details on how aggregations work.
#### UNION and UNION ALL
openCypher supports combining results from multiple queries into a single result
set. That result will contain rows that belong to queries in the union
respecting the union type.
Using `UNION` will contain only distinct rows while `UNION ALL` will keep all
rows from all given queries.
Restrictions when using `UNION` or `UNION ALL`:
* The number and the names of columns returned by queries must be the same
for all of them.
* There can be only one union type between single queries, i.e. a query can't
contain both `UNION` and `UNION ALL`.
Example, get distinct names that are shared between persons and movies:
```opencypher
MATCH(n: Person) RETURN n.name as name UNION MATCH(n: Movie) RETURN n.name as name
```
Example, get all names that are shared between persons and movies (including duplicates):
```opencypher
MATCH(n: Person) RETURN n.name as name UNION ALL MATCH(n: Movie) RETURN n.name as name
```
### Writing New Data
For adding new data, you can use the following clauses.
* `CREATE`, for creating new nodes and edges.
* `SET`, for adding new or updating existing labels and properties.
* `DELETE`, for deleting nodes and edges.
* `REMOVE`, for removing labels and properties.
You can still use the `RETURN` clause to produce results after writing, but it
is not mandatory.
Details on which kind of data can be stored in *Memgraph* can be found in
**Data Storage** chapter.
#### CREATE
This clause is used to add new nodes and edges to the database. The creation
is done by providing a pattern, similarly to `MATCH` clause.
For example, to create 2 new nodes connected with a new edge, use this query.
```opencypher
CREATE (node1)-[:edge_type]->(node2)
```
Labels and properties can be set during creation using the same syntax as in
[MATCH](#match) patterns. For example, creating a node with a label and a
property:
```opencypher
CREATE (node :Label {property: "my property value"}
```
Additional information on `CREATE` is
[here](https://neo4j.com/docs/developer-manual/current/cypher/clauses/create/).
#### SET
The `SET` clause is used to update labels and properties of already existing
data.
Example. Incrementing everyone's age by 1.
```opencypher
MATCH (n :Person) SET n.age = n.age + 1
```
Click
[here](https://neo4j.com/docs/developer-manual/current/cypher/clauses/create/)
for a more detailed explanation on what can be done with `SET`.
#### DELETE
This clause is used to delete nodes and edges from the database.
Example. Removing all edges of a single type.
```opencypher
MATCH ()-[edge :type]-() DELETE edge
```
When testing the database, you want to often have a clean start by deleting
every node and edge in the database. It is reasonable that deleting each node
should delete all edges coming into or out of that node.
```opencypher
MATCH (node) DELETE node
```
But, openCypher prevents accidental deletion of edges. Therefore, the above
query will report an error. Instead, you need to use the `DETACH` keyword,
which will remove edges from a node you are deleting. The following should
work and *delete everything* in the database.
```opencypher
MATCH (node) DETACH DELETE node
```
More examples are
[here](https://neo4j.com/docs/developer-manual/current/cypher/clauses/delete/).
#### REMOVE
The `REMOVE` clause is used to remove labels and properties from nodes and
edges.
Example.
```opencypher
MATCH (n :WrongLabel) REMOVE n :WrongLabel, n.property
```
### Reading & Writing
OpenCypher supports combining multiple reads and writes using the
`WITH` clause. In addition to combining, the `MERGE` clause is provided which
may create patterns if they do not exist.
#### WITH
The write part of the query cannot be simply followed by another read part. In
order to combine them, `WITH` clause must be used. The names this clause
establishes are transferred from one part to another.
For example, creating a node and finding all nodes with the same property.
```opencypher
CREATE (node {property: 42}) WITH node.property AS propValue
MATCH (n {property: propValue}) RETURN n
```
Note that the `node` is not visible after `WITH`, since only `node.property`
was carried over.
This clause behaves very much like `RETURN`, so you should refer to features
of `RETURN`.
#### MERGE
The `MERGE` clause is used to ensure that a pattern you are looking for exists
in the database. This means that if the pattern is not found, it will be
created. In a way, this clause is like a combination of `MATCH` and `CREATE`.
Example. Ensure that a person has at least one friend.
```opencypher
MATCH (n :Person) MERGE (n)-[:FriendOf]->(m)
```
The clause also provides additional features for updating the values depending
on whether the pattern was created or matched. This is achieved with `ON
CREATE` and `ON MATCH` sub clauses.
Example. Set a different properties depending on what `MERGE` did.
```opencypher
MATCH (n :Person) MERGE (n)-[:FriendOf]->(m)
ON CREATE SET m.prop = "created" ON MATCH SET m.prop = "existed"
```
For more details, click [this
link](https://neo4j.com/docs/developer-manual/current/cypher/clauses/merge/).
### Indexing
An index stores additional information on certain types of data, so that
retrieving said data becomes more efficient. Downsides of indexing are:
* requiring extra storage for each index and
* slowing down writes to the database.
Carefully choosing which data to index can tremendously improve data retrieval
efficiency, and thus make index downsides negligible.
Memgraph automatically indexes labeled data. This improves queries
which fetch nodes by label:
```opencypher
MATCH (n :Label) ... RETURN n
```
Indexing can also be applied to data with a specific combination of label and
property. These are not automatically created, instead a user needs to create
them explicitly. Creation is done using a special
`CREATE INDEX ON :Label(property)` language construct.
For example, to index nodes which is labeled as `:Person` and has a property
named `age`:
```opencypher
CREATE INDEX ON :Person(age)
```
After the index is created, retrieving those nodes will become more efficient.
For example, the following query will retrieve all nodes which have an `age`
property, instead of fetching each `:Person` node and checking whether the
property exists.
```opencypher
MATCH (n :Person {age: 42}) RETURN n
```
Using index based retrieval also works when filtering labels and properties
with `WHERE`. For example, the same effect as in the previous example can be
done with:
```opencypher
MATCH (n) WHERE n:Person AND n.age = 42 RETURN n
```
Since the filter inside `WHERE` can contain any kind of an expression, the
expression can be complicated enough so that the index does not get used. We
are continuously improving the recognition of index usage opportunities from a
`WHERE` expression. If there is any suspicion that an index may not be used,
we recommend putting properties and labels inside the `MATCH` pattern.
Currently, once an index is created it cannot be deleted. This feature will be
implemented very soon. The expected syntax for removing an index will be `DROP
INDEX ON :Label(property)`.
### Other Features
The following sections describe some of the other supported features.
#### Filtering Variable Length Paths
OpenCypher supports only simple filtering when matching variable length paths.
For example:
```opencypher
MATCH (n)-[edge_list:Type * {x: 42}]-(m)
```
This will produce only those paths whose edges have the required `Type` and `x`
property value. Edges that compose the produced paths are stored in a symbol
named `edge_list`. Naturally, the user could have specified any other symbol
name.
Memgraph extends openCypher with a syntax for arbitrary filter expressions
during path matching. The next example filters edges which have property `x`
between `0` and `10`.
```opencypher
MATCH (n)-[edge_list * (edge, node | 0 < edge.x < 10)]-(m)
```
Here we introduce a lambda function with parentheses, where the first two
arguments, `edge` and `node`, correspond to each edge and node during path
matching. `node` is the destination node we are moving to across the current
`edge`. The last `node` value will be the same value as `m`. Following the
pipe (`|`) character is an arbitrary expression which must produce a boolean
value. If `True`, matching continues, otherwise the path is discarded.
The previous example can be written using the `all` function:
```opencypher
MATCH (n)-[edge_list *]-(m) WHERE all(edge IN r WHERE 0 < edge.x < 10)
```
However, filtering using a lambda function is more efficient because paths
may be discarded earlier in the traversal. Furthermore, it provides more
flexibility for deciding what kind of paths are matched due to more expressive
filtering capabilities. Therefore, filtering through lambda functions should
be preferred whenever possible.
#### Breadth First Search
A typical graph use-case is searching for the shortest path between nodes.
The openCypher standard does not define this feature, so Memgraph provides
a custom implementation, based on the edge expansion syntax.
Finding the shortest path between nodes can be done using breadth-first
expansion:
```opencypher
MATCH (a {id: 723})-[edge_list:Type *bfs..10]-(b {id: 882}) RETURN *
```
The above query will find all paths of length up to 10 between nodes `a` and `b`.
The edge type and maximum path length are used in the same way like in variable
length expansion.
To find only the shortest path, simply append `LIMIT 1` to the `RETURN` clause.
```opencypher
MATCH (a {id: 723})-[edge_list:Type *bfs..10]-(b {id: 882}) RETURN * LIMIT 1
```
Breadth-first expansion allows an arbitrary expression filter that determines
if an expansion is allowed. Following is an example in which expansion is
allowed only over edges whose `x` property is greater than `12` and nodes `y`
whose property is less than `3`:
```opencypher
MATCH (a {id: 723})-[*bfs..10 (e, n | e.x > 12 and n.y < 3)]-() RETURN *
```
The filter is defined as a lambda function over `e` and `n`, which denote the edge
and node being expanded over in the breadth first search. Note that if the user
omits the edge list symbol (`edge_list` in previous examples) it will not be included
in the result.
There are a few benefits of the breadth-first expansion approach, as opposed to
a specialized `shortestPath` function. For one, it is possible to inject
expressions that filter on nodes and edges along the path itself, not just the final
destination node. Furthermore, it's possible to find multiple paths to multiple destination
nodes regardless of their length. Also, it is possible to simply go through a node's
neighbourhood in breadth-first manner.
Currently, it isn't possible to get all shortest paths to a single node using
Memgraph's breadth-first expansion.
#### Weighted Shortest Path
Another standard use-case in a graph is searching for the weighted shortest
path between nodes. The openCypher standard does not define this feature, so
Memgraph provides a custom implementation, based on the edge expansion syntax.
Finding the weighted shortest path between nodes is done using the weighted
shortest path expansion:
```opencypher
MATCH (a {id: 723})-[
edge_list *wShortest 10 (e, n | e.weight) total_weight
]-(b {id: 882})
RETURN *
```
The above query will find the shortest path of length up to 10 nodes between
nodes `a` and `b`. The length restriction parameter is optional.
Weighted Shortest Path expansion allows an arbitrary expression that determines
the weight for the current expansion. Total weight of a path is calculated as
the sum of all weights on the path between two nodes. Following is an example in
which the weight between nodes is defined as the product of edge weights
(instead of sum), assuming all weights are greater than '1':
```opencypher
MATCH (a {id: 723})-[
edge_list *wShortest 10 (e, n | log(e.weight)) total_weight
]-(b {id: 882})
RETURN exp(total_weight)
```
Weighted Shortest Path expansions also allows an arbitrary expression filter
that determines if an expansion is allowed. Following is an example in which
expansion is allowed only over edges whose `x` property is greater than `12`
and nodes `y` whose property is less than `3`:
```opencypher
MATCH (a {id: 723})-[
edge_list *wShortest 10 (e, n | e.weight) total_weight (e, n | e.x > 12 and n.y < 3)
]-(b {id: 882})
RETURN exp(total_weight)
```
Both weight and filter expression are defined as lambda functions over `e` and
`n`, which denote the edge and the node being expanded over in the weighted
shortest path search.
#### UNWIND
The `UNWIND` clause is used to unwind a list of values as individual rows.
Example. Produce rows out of a single list.
```opencypher
UNWIND [1,2,3] AS listElement RETURN listElement
```
More examples are
[here](https://neo4j.com/docs/developer-manual/current/cypher/clauses/unwind/).
#### Functions
You have already been introduced to one type of functions, [aggregating
functions](#aggregating). This section contains the list of other supported
functions.
Name | Description
-----------------|------------
`coalesce` | Returns the first non null argument.
`startNode` | Returns the starting node of an edge.
`endNode` | Returns the destination node of an edge.
`degree` | Returns the number of edges (both incoming and outgoing) of a node.
`head` | Returns the first element of a list.
`last` | Returns the last element of a list.
`properties` | Returns the properties of a node or an edge.
`size` | Returns the number of elements in a list or a map. When given a string it returns the number of characters. When given a path it returns the number of expansions (edges) in that path.
`toBoolean` | Converts the argument to a boolean.
`toFloat` | Converts the argument to a floating point number.
`toInteger` | Converts the argument to an integer.
`type` | Returns the type of an edge as a character string.
`keys` | Returns a list keys of properties from an edge or a node. Each key is represented as a string of characters.
`labels` | Returns a list of labels from a node. Each label is represented as a character string.
`nodes` | Returns a list of nodes from a path.
`relationships` | Returns a list of relationships from a path.
`range` | Constructs a list of value in given range.
`tail` | Returns all elements after the first of a given list.
`abs` | Returns the absolute value of a number.
`ceil` | Returns the smallest integer greater than or equal to given number.
`floor` | Returns the largest integer smaller than or equal to given number.
`round` | Returns the number, rounded to the nearest integer. Tie-breaking is done using the *commercial rounding*, where -1.5 produces -2 and 1.5 produces 2.
`exp` | Calculates `e^n` where `e` is the base of the natural logarithm, and `n` is the given number.
`log` | Calculates the natural logarithm of a given number.
`log10` | Calculates the logarithm (base 10) of a given number.
`sqrt` | Calculates the square root of a given number.
`acos` | Calculates the arccosine of a given number.
`asin` | Calculates the arcsine of a given number.
`atan` | Calculates the arctangent of a given number.
`atan2` | Calculates the arctangent2 of a given number.
`cos` | Calculates the cosine of a given number.
`sin` | Calculates the sine of a given number.
`tan` | Calculates the tangent of a given number.
`sign` | Applies the signum function to a given number and returns the result. The signum of positive numbers is 1, of negative -1 and for 0 returns 0.
`e` | Returns the base of the natural logarithm.
`pi` | Returns the constant *pi*.
`rand` | Returns a random floating point number between 0 (inclusive) and 1 (exclusive).
`startsWith` | Check if the first argument starts with the second.
`endsWith` | Check if the first argument ends with the second.
`contains` | Check if the first argument has an element which is equal to the second argument.
`all` | Check if all elements of a list satisfy a predicate.<br/>The syntax is: `all(variable IN list WHERE predicate)`.<br/> NOTE: Whenever possible, use Memgraph's lambda functions when [matching](#filtering-variable-length-paths) instead.
`single` | Check if only one element of a list satisfies a predicate.<br/>The syntax is: `single(variable IN list WHERE predicate)`.
`reduce` | Accumulate list elements into a single result by applying an expression. The syntax is:<br/>`reduce(accumulator = initial_value, variable IN list | expression)`.
`assert` | Raises an exception reported to the client if the given argument is not `true`.
`counter` | Generates integers that are guaranteed to be unique on the database level, for the given counter name.
`counterSet` | Sets the counter with the given name to the given value.
`indexInfo` | Returns a list of all the indexes available in the database. The list includes indexes that are not yet ready for use (they are concurrently being built by another transaction).
`id` | Returns identifier for a given node or edge. To enable automatic generation of the identifiers, `--generate-vertex-ids` and `--generate-edge-ids` parameters have to be set on `true` (enabled in the configuration by default).
#### String Operators
Apart from comparison and concatenation operators openCypher provides special
string operators for easier matching of substrings:
Operator | Description
-------------------|------------
`a STARTS WITH b` | Returns true if prefix of string a is equal to string b.
`a ENDS WITH b` | Returns true if suffix of string a is equal to string b.
`a CONTAINS b` | Returns true if some substring of string a is equal to string b.
#### Parameters
When automating the queries for Memgraph, it comes in handy to change only
some parts of the query. Usually, these parts are values which are used for
filtering results or similar, while the rest of the query remains the same.
Parameters allow reusing the same query, but with different parameter values.
The syntax uses the `$` symbol to designate a parameter name. We don't allow
old Cypher parameter syntax using curly braces. For example, you can parameterize
filtering a node property:
```opencypher
MATCH (node1 {property: $propertyValue}) RETURN node1
```
You can use parameters instead of any literal in the query, but not instead of
property maps even though that is allowed in standard openCypher. Following
example is illegal in Memgraph:
```opencypher
MATCH (node1 $propertyValue) RETURN node1
```
To use parameters with Python driver use following syntax:
```python
session.run('CREATE (alice:Person {name: $name, age: $ageValue}',
name='Alice', ageValue=22)).consume()
```
To use parameters which names are integers you will need to wrap parameters in
a dictionary and convert them to strings before running a query:
```python
session.run('CREATE (alice:Person {name: $0, age: $1}',
{'0': "Alice", '1': 22})).consume()
```
To use parameters with some other driver please consult appropriate
documentation.
#### CASE
Conditional expressions can be expressed in openCypher language by simple and
generic form of `CASE` expression. A simple form is used to compare an expression
against multiple predicates. For the first matched predicate result of the
expression provided after the `THEN` keyword is returned. If no expression is
matched value following `ELSE` is returned is provided, or `null` if `ELSE` is not
used:
```opencypher
MATCH (n)
RETURN CASE n.currency WHEN "DOLLAR" THEN "$" WHEN "EURO" THEN "€" ELSE "UNKNOWN" END
```
In generic form, you don't need to provide an expression whose value is compared to
predicates, but you can list multiple predicates and the first one that evaluates
to true is matched:
```opencypher
MATCH (n)
RETURN CASE WHEN n.height < 30 THEN "short" WHEN n.height > 300 THEN "tall" END
```
### Differences
Although we try to implement openCypher query language as closely to the
language reference as possible, we had to make some changes to enhance the
user experience.
#### Symbolic Names
We don't allow symbolic names (variables, label names...) to be openCypher
keywords (WHERE, MATCH, COUNT, SUM...).
#### Unicode Codepoints in String Literal
Use `\u` followed by 4 hex digits in string literal for UTF-16 codepoint and
`\U` with 8 hex digits for UTF-32 codepoint in Memgraph.
### Difference from Neo4j's Cypher Implementation
The openCypher initiative stems from Neo4j's Cypher query language. Following is a list
of most important differences between Neo's Cypher and Memgraph's openCypher implementation,
for users that are already familiar with Neo4j. There might be other differences not documented
here (especially subtle semantic ones).
#### Unsupported Constructs
* Data importing. Memgraph doesn't support Cypher's CSV importing capabilities.
* The `FOREACH` language construct for performing an operation on every list element.
* The `CALL` construct for a standalone function call. This can be expressed using
`RETURN functioncall()`. For example, with Memgraph you can get information about
the indexes present in the database using the `RETURN indexinfo()` openCypher query.
* Stored procedures.
* Regular expressions for string matching.
* `shortestPath` and `allShortestPaths` functions. `shortestPath` can be expressed using
Memgraph's breadth-first expansion syntax already described in this document.
* Patterns in expressions. For example, Memgraph doesn't support `size((n)-->())`. Most of the time
the same functionalities can be expressed differently in Memgraph using `OPTIONAL` expansions,
function calls etc.
* Map projections such as `MATCH (n) RETURN n {.property1, .property2}`.
#### Unsupported Functions
General purpose functions:
* `exists(n.property)` - This can be expressed using `n.property IS NOT NULL`.
* `length()` is named `size()` in Memgraph.
Path functions:
* `extract()`
Aggregation functions:
* `count(DISTINCT variable)` - This can be expressed using `WITH DISTINCT variable RETURN count(variable)`.
Mathematical functions:
* `percentileDisc()`
* `stDev()`
* `point()`
* `distance()`
* `degrees()`
String functions:
* `replace()`
* `substring()`
* `left()`
* `trim()`
* `toupper()`
* `tolower()`
* `split()`
* `reverse()`
List functions:
* `any()`
* `none()`
* `single()`
* `head()`
* `last()`
* `tail()`

View File

@ -0,0 +1,281 @@
## Quick Start
This chapter outlines installing and running Memgraph, as well as executing
basic queries against the database.
### Installation
The Memgraph binary is offered as:
* Debian package for Debian 9 (Stretch);
* RPM package for CentOS 7 and
* Docker image.
After downloading the binary, proceed to the corresponding section below.
NOTE: Currently, newer versions of Memgraph are not backward compatible with
older versions. This is mainly noticeable by unsupported loading of storage
snapshots between different versions.
#### Docker Installation
Before proceeding with the installation, please install the Docker engine on
the system. Instructions on how to install Docker can be found on the
[official Docker website](https://docs.docker.com/engine/installation).
Memgraph Docker image was built with Docker version `1.12` and should be
compatible with all later versions.
After installing and running Docker, download the Memgraph Docker image and
import it with the following command.
```bash
docker load -i /path/to/memgraph-<version>-docker.tar.gz
```
Memgraph is then started with another docker command.
```bash
docker run -p 7687:7687 \
-v mg_lib:/var/lib/memgraph -v mg_log:/var/log/memgraph -v mg_etc:/etc/memgraph \
memgraph
```
On success, expect to see output similar to the following.
```bash
Starting 8 workers
Server is fully armed and operational
Listening on 0.0.0.0 at 7687
```
Memgraph is now ready to process queries, you may now proceed to
[querying](#querying). To stop Memgraph, press `Ctrl-c`.
Memgraph configuration is available in Docker's named volume `mg_etc`. On
Linux systems it should be in
`/var/lib/docker/volumes/mg_etc/_data/memgraph.conf`. After changing the
configuration, Memgraph needs to be restarted.
##### Note about named volumes
In case named volumes are reused between different versions of Memgraph, a user
has to be careful because Docker will overwrite a folder within the container
with existing data from the host machine. In the case where a new file is
introduced, or two versions of Memgraph are not compatible, the new feature
won't work or Memgraph won't be able to work correctly. The easiest way to
solve the issue is to use another named volume or to remove existing named
volume from the host with the following command.
```bash
docker volume rm <volume_name>
```
Named Docker volumes used in this documentation are: `mg_etc`, `mg_log` and
`mg_lib`. E.g. to avoid any configuration issues between different Memgraph
versions, `docker volume rm mg_etc` can be executed before running a new
container.
Another valid option is to try to migrate your existing volume to a
newer version of Memgraph. In case of any issues, send an email to
`tech@memgraph.com`.
##### Note for OS X/macOS Users
Although unlikely, some OS X/macOS users might experience minor difficulties
after following the Docker installation instructions. Instead of running on
`localhost`, a Docker container for Memgraph might be running on a custom IP
address. Fortunately, that IP address can be found using the following
algorithm:
1) Find out the container ID of the Memgraph container
By issuing the command `docker ps` the user should get an output similar to the
following:
```bash
CONTAINER ID IMAGE COMMAND CREATED ...
9397623cd87e memgraph "/usr/lib/memgraph/m…" 2 seconds ago ...
```
At this point, it is important to remember the container ID of the Memgraph
image. In our case, that is `9397623cd87e`.
2) Use the container ID to retrieve an IP of the container
```bash
docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' 9397623cd87e
```
The command above should yield the sought IP. If that IP does not correspond to
`localhost`, it should be used instead of `localhost` when firing up the
`neo4j-client` in the [querying](#querying) section.
#### Debian Package Installation
After downloading Memgraph as a Debian package, install it by running the
following.
```bash
dpkg -i /path/to/memgraph_<version>.deb
```
If the installation was successful, Memgraph should already be running. To
make sure that is true, start it explicitly with the command:
```bash
systemctl start memgraph
```
To verify that Memgraph is running, run the following command.
```bash
journalctl --unit memgraph
```
It is expected to see something like the following output.
```bash
Nov 23 13:40:13 hostname memgraph[14654]: Starting 8 BoltS workers
Nov 23 13:40:13 hostname memgraph[14654]: BoltS server is fully armed and operational
Nov 23 13:40:13 hostname memgraph[14654]: BoltS listening on 0.0.0.0 at 7687
```
Memgraph is now ready to process queries, you may now proceed to
[querying](#querying). To shutdown Memgraph server, issue the following
command.
```bash
systemctl stop memgraph
```
Memgraph configuration is available in `/etc/memgraph/memgraph.conf`. After
changing the configuration, Memgraph needs to be restarted.
#### RPM Package Installation
If you downloaded the RPM package of Memgraph, you can install it by running
the following command.
```bash
rpm -U /path/to/memgraph-<version>.rpm
```
After the successful installation, Memgraph can be started as a service. To do
so, type the following command.
```bash
systemctl start memgraph
```
To verify that Memgraph is running, run the following command.
```bash
journalctl --unit memgraph
```
It is expected to see something like the following output.
```bash
Nov 23 13:40:13 hostname memgraph[14654]: Starting 8 BoltS workers
Nov 23 13:40:13 hostname memgraph[14654]: BoltS server is fully armed and operational
Nov 23 13:40:13 hostname memgraph[14654]: BoltS listening on 0.0.0.0 at 7687
```
Memgraph is now ready to process queries, you may now proceed to
[querying](#querying). To shutdown Memgraph server, issue the following
command.
```bash
systemctl stop memgraph
```
Memgraph configuration is available in `/etc/memgraph/memgraph.conf`. After
changing the configuration, Memgraph needs to be restarted.
### Querying
Memgraph supports the openCypher query language which has been developed by
[Neo4j](http://neo4j.com). The language is currently going through a
vendor-independent standardization process. It's a declarative language
developed specifically for interaction with graph databases.
The easiest way to execute openCypher queries against Memgraph, is using
Neo4j's command-line tool. The command-line `neo4j-client` can be installed as
described [on the official website](https://neo4j-client.net).
After installing `neo4j-client`, connect to the running Memgraph instance by
issuing the following shell command.
```bash
neo4j-client -u "" -p "" localhost 7687
```
After the client has started it should present a command prompt similar to:
```bash
neo4j-client 2.1.3
Enter `:help` for usage hints.
Connected to 'neo4j://@localhost:7687'
neo4j>
```
At this point it is possible to execute openCypher queries on Memgraph. Each
query needs to end with the `;` (*semicolon*) character. For example:
```opencypher
CREATE (u:User {name: "Alice"})-[:Likes]->(m:Software {name: "Memgraph"});
```
The above will create 2 nodes in the database, one labeled "User" with name
"Alice" and the other labeled "Software" with name "Memgraph". It will also
create a relationship that "Alice" *likes* "Memgraph".
To find created nodes and relationships, execute the following query:
```opencypher
MATCH (u:User)-[r]->(x) RETURN u, r, x;
```
#### Supported Languages
If users wish to query Memgraph programmatically, they can do so using the
[Bolt protocol](https://boltprotocol.org). Bolt was designed for efficient
communication with graph databases and Memgraph supports
[Version 1](https://boltprotocol.org/v1) of the protocol. Bolt protocol drivers
for some popular programming languages are listed below:
* [Java](https://github.com/neo4j/neo4j-java-driver)
* [Python](https://github.com/neo4j/neo4j-python-driver)
* [JavaScript](https://github.com/neo4j/neo4j-javascript-driver)
* [C#](https://github.com/neo4j/neo4j-dotnet-driver)
* [Ruby](https://github.com/neo4jrb/neo4j)
* [Haskell](https://github.com/zmactep/hasbolt)
* [PHP](https://github.com/graphaware/neo4j-bolt-php)
We have included some basic usage examples for some of the supported languages
in the **Drivers** section.
### Telemetry
Telemetry is an automated process by which some useful data is collected at
a remote point. At Memgraph, we use telemetry for the sole purpose of improving
our product, thereby collecting some data about the machine that executes the
database (CPU, memory, OS and kernel information) as well as some data about the
database runtime (CPU usage, memory usage, vertices and edges count).
Here at Memgraph, we deeply care about the privacy of our users and do not
collect any sensitive information. If users wish to disable Memgraph's telemetry
features, they can easily do so by either altering the line in
`/etc/memgraph/memgraph.conf` that enables telemetry (`--telemetry-enabled=true`)
into `--telemetry-enabled=false`, or by including the `--telemetry-enabled=false`
as a command-line argument when running the executable.
### Where to Next
To learn more about the openCypher language, visit **openCypher Query
Language** chapter in this document. For real-world examples of how to use
Memgraph visit **Examples** chapter. Details on what can be stored in Memgraph
are in **Data Storage** chapter.
We *welcome and encourage* your feedback!

20
docs/user_technical/run Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
working_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# curl and php are required
if [ "$( which curl )" == "" ] || [ "$( which php )" == "" ]; then
echo "curl and php are required!"
exit 1
fi
cd ${working_dir}
generator="couscous.phar"
# download the generator if it's not already downloaded
if [ ! -f ${generator} ]; then
curl -OS http://couscous.io/${generator}
fi
# run the preview
php ${generator} preview

View File

@ -0,0 +1,105 @@
## Durability and Data Recovery
*Memgraph* uses two mechanisms to ensure the durability of the stored data:
* write-ahead logging (WAL) and
* taking periodic snapshots.
Write-ahead logging works by logging all database modifications to a file.
This ensures that all operations are done atomically and provides a trace of
steps needed to reconstruct the database state.
Snapshots are taken periodically during the entire runtime of *Memgraph*. When
a snapshot is triggered, the whole data storage is written to disk. The
snapshot file provides a quicker way to restore the database state.
Database recovery is done on startup from the most recently found snapshot
file. Since the snapshot may be older than the most recent update logged in
the WAL file, the recovery process will apply the remaining state changes
found in the said WAL file.
NOTE: Snapshot and WAL files are not (currently) compatible between *Memgraph*
versions.
Behaviour of the above mechanisms can be tweaked in the configuration file,
usually found in `/etc/memgraph/memgraph.conf`.
In addition to the above mentioned data durability and recovery, a
snapshot file may be generated using *Memgraph's* import tools. For more
information, take a look at **Import Tools** chapter.
## Storable Data Types
Since *Memgraph* is a *graph* database management system, data is stored in
the form of graph elements: nodes and edges. Each graph element can also
contain various types of data. This chapter describes which data types are
supported in *Memgraph*.
### Node Labels & Edge Types
Each node can have any number of labels. A label is a text value, which can be
used to *label* or group nodes according to users' desires. A user can change
labels at any time. Similarly to labels, each edge can have a type,
represented as text. Unlike nodes, which can have multiple labels or none at
all, edges *must* have exactly one edge type. Another difference to labels, is
that the edge types are set upon creation and never modified again.
### Properties
Nodes and edges can store various properties. These are like mappings or
tables containing property names and their accompanying values. Property names
are represented as text, while values can be of different types. Each property
name can store a single value, it is not possible to have multiple properties
with the same name on a single graph element. Naturally, the same property
names can be found across multiple graph elements. Also, there are no
restrictions on the number of properties that can be stored in a single graph
element. The only restriction is that the values must be of the supported
types. Following is a table of supported data types.
Type | Description
-----------|------------
`Null` | Denotes that the property has no value. This is the same as if the property does not exist.
`String` | A character string, i.e. text.
`Boolean` | A boolean value, either `true` or `false`.
`Integer` | An integer number.
`Float` | A floating-point number, i.e. a real number.
`List` | A list containing any number of property values of any supported type. It can be used to store multiple values under a single property name.
`Map` | A mapping of string keys to values of any supported type.
Note that even though it's possible to store `List` and `Map` property values, it is not possible to modify them. It is however possible to replace them completely. So, the following queries are legal:
```opencypher
CREATE (:Node {property: [1, 2, 3]})
CREATE (:Node {property: {key: "value"}})
```
However, these queries are not:
```opencypher
MATCH (n:Node) SET n.property[0] = 0
MATCH (n:Node) SET n.property.key = "other value"
```
### Cold data on disk
Although *Memgraph* is an in-memory database by default, it offers an option
to store a certain amount of data on disk. More precisely, the user can pass
a list of properties they wish to keep stored on disk via the command line.
In certain cases, this might result in a significant performance boost due to
reduced memory usage. It is recommended to use this feature on large,
cold properties, i.e. properties that are rarely accessed.
For example, a user of a library database might identify author biographies
and book summaries as cold properties. In that case, the user should run
*Memgraph* as follows:
```bash
/usr/lib/memgraph/memgraph --properties-on-disk biography,summary
```
Note that the usage of *Memgraph* has not changed, i.e. durability and
data recovery mechanisms are still in place and the query language remains
the same. It is also important to note that the user cannot change the storage
location of a property while *Memgraph* is running. Naturally, the user can
reload their database from snapshot, provide a different list of properties on
disk and rest assured that only those properties will be stored on disk.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,61 @@
## Upcoming Features
This chapter describes some of the planned features, that we at Memgraph are
working on.
### Performance Improvements
Excellent database performance is one of Memgraph's long-standing goals. We
will be continually working on improving the performance. This includes:
* query compilation;
* query execution;
* core engine performance;
* algorithmic improvements (i.e. bidirectional breadth-first search);
* memory usage and
* other improvements.
### Label-Property Index Usage Improvements
Currently, indexing combinations of labels and properties can be created, but
cannot be deleted. We plan to add a new query language construct which will
allow deletion of created indices.
### Improving openCypher Support
Although we have implemented the most common features of the openCypher query
language, there are other useful features we are still working on.
#### Functions
Memgraph's openCypher implementation supports the most useful functions, but
there are more which openCypher provides. Some are related to not yet
implemented features like paths, while some may use the features Memgraph
already supports. Out of the remaining functions, some are more useful than
others and as such they will be supported sooner.
#### List Comprehensions
List comprehensions are similar to the supported `collect` function, which
generates a list out of multiple values. But unlike `collect`, list
comprehensions offer a powerful mechanism for filtering or otherwise
manipulating values which are collected into a list.
For example, getting numbers between 0 and 10 and squaring them:
```opencypher
RETURN [x IN range(0, 10) | x^2] AS squares
```
Another example, to collect `:Person` nodes with `age` less than 42, without
list comprehensions can be achieved with:
```opencypher
MATCH (n :Person) WHERE n.age < 42 RETURN collect(n)
```
Using list comprehensions, the same can be done with the query:
```opencypher
MATCH (n :Person) RETURN [n IN collect(n) WHERE n.age < 42]
```

View File

@ -1,15 +0,0 @@
# Memgraph Operating Environments
## Issues related to build toolchain
* GCC 11.2 (toolchain-v4) doesn't compile on Fedora 38, multiple definitions of enum issue
* spdlog 1.10/11 doesn't work with fmt 10.0.0
## os
Under the `os` directory, you can find scripts to install all required system
dependencies on operating systems where Memgraph natively builds. The testing
script helps to see how to install all packages (in the case of a new package),
or make any adjustments in the overall system setup. Also, the testing script
helps check if Memgraph runs on a freshly installed operating system (with no
packages installed).

View File

@ -1,6 +0,0 @@
*.deb
*.deb.*
*.rpm
*.rpm.*
*.tar.gz
*.tar.gz.*

View File

@ -1,190 +0,0 @@
#!/bin/bash
set -Eeuo pipefail
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
source "$DIR/../util.sh"
check_operating_system "amzn-2"
check_architecture "x86_64"
TOOLCHAIN_BUILD_DEPS=(
git gcc gcc-c++ make # generic build tools
wget # used for archive download
gnupg2 # used for archive signature verification
tar gzip bzip2 xz unzip # used for archive unpacking
zlib-devel # zlib library used for all builds
expat-devel xz-devel python3-devel texinfo
curl libcurl-devel # for cmake
readline-devel # for cmake and llvm
libffi-devel libxml2-devel # for llvm
libedit-devel pcre-devel pcre2-devel automake bison # for swig
file
openssl-devel
gmp-devel
gperf
diffutils
patch
libipt libipt-devel # intel
perl # for openssl
)
TOOLCHAIN_RUN_DEPS=(
make # generic build tools
tar gzip bzip2 xz # used for archive unpacking
zlib # zlib library used for all builds
expat xz-libs python3 # for gdb
readline # for cmake and llvm
libffi libxml2 # for llvm
openssl-devel
)
MEMGRAPH_BUILD_DEPS=(
git # source code control
make cmake # build system
wget # for downloading libs
libuuid-devel java-11-openjdk # required by antlr
readline-devel # for memgraph console
python3-devel # for query modules
openssl-devel
openssl
libseccomp-devel
python3 python3-pip nmap-ncat # for tests
#
# IMPORTANT: python3-yaml does NOT exist on CentOS
# Install it using `pip3 install PyYAML`
#
PyYAML # Package name here does not correspond to the yum package!
libcurl-devel # mg-requests
rpm-build rpmlint # for RPM package building
doxygen graphviz # source documentation generators
which nodejs golang custom-golang1.18.9 zip unzip java-11-openjdk-devel jdk-17 custom-maven3.9.3 # for driver tests
autoconf # for jemalloc code generation
libtool # for protobuf code generation
cyrus-sasl-devel
)
MEMGRAPH_TEST_DEPS="${MEMGRAPH_BUILD_DEPS[*]}"
MEMGRAPH_RUN_DEPS=(
logrotate openssl python3 libseccomp
)
NEW_DEPS=(
wget curl tar gzip
)
list() {
echo "$1"
}
check() {
local missing=""
# On Fedora yum/dnf and python10 use newer glibc which is not compatible
# with ours, so we need to momentarely disable env
local OLD_LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-""}
LD_LIBRARY_PATH=""
for pkg in $1; do
if [ "$pkg" == custom-maven3.9.3 ]; then
if [ ! -f "/opt/apache-maven-3.9.3/bin/mvn" ]; then
missing="$pkg $missing"
fi
continue
fi
if [ "$pkg" == custom-golang1.18.9 ]; then
if [ ! -f "/opt/go1.18.9/go/bin/go" ]; then
missing="$pkg $missing"
fi
continue
fi
if [ "$pkg" == "PyYAML" ]; then
if ! python3 -c "import yaml" >/dev/null 2>/dev/null; then
missing="$pkg $missing"
fi
continue
fi
if ! yum list installed "$pkg" >/dev/null 2>/dev/null; then
missing="$pkg $missing"
fi
done
if [ "$missing" != "" ]; then
echo "MISSING PACKAGES: $missing"
exit 1
fi
LD_LIBRARY_PATH=${OLD_LD_LIBRARY_PATH}
}
install() {
cd "$DIR"
if [ "$EUID" -ne 0 ]; then
echo "Please run as root."
exit 1
fi
# If GitHub Actions runner is installed, append LANG to the environment.
# Python related tests don't work without the LANG export.
if [ -d "/home/gh/actions-runner" ]; then
echo "LANG=en_US.utf8" >> /home/gh/actions-runner/.env
else
echo "NOTE: export LANG=en_US.utf8"
fi
yum update -y
for pkg in $1; do
if [ "$pkg" == custom-maven3.9.3 ]; then
install_custom_maven "3.9.3"
continue
fi
if [ "$pkg" == custom-golang1.18.9 ]; then
install_custom_golang "1.18.9"
continue
fi
if [ "$pkg" == jdk-17 ]; then
if ! yum list installed jdk-17 >/dev/null 2>/dev/null; then
wget --no-check-certificate -c --header "Cookie: oraclelicense=accept-securebackup-cookie" https://download.oracle.com/java/17/latest/jdk-17_linux-x64_bin.rpm
rpm -Uvh jdk-17_linux-x64_bin.rpm
# NOTE: Set Java 11 as default.
update-alternatives --set java java-11-openjdk.x86_64
update-alternatives --set javac java-11-openjdk.x86_64
fi
continue
fi
if [ "$pkg" == libipt ]; then
if ! yum list installed libipt >/dev/null 2>/dev/null; then
yum install -y http://repo.okay.com.mx/centos/8/x86_64/release/libipt-1.6.1-8.el8.x86_64.rpm
fi
continue
fi
if [ "$pkg" == libipt-devel ]; then
if ! yum list installed libipt-devel >/dev/null 2>/dev/null; then
yum install -y http://repo.okay.com.mx/centos/8/x86_64/release/libipt-devel-1.6.1-8.el8.x86_64.rpm
fi
continue
fi
if [ "$pkg" == nodejs ]; then
if ! yum list installed nodejs >/dev/null 2>/dev/null; then
yum install https://rpm.nodesource.com/pub_16.x/nodistro/repo/nodesource-release-nodistro-1.noarch.rpm -y
yum install nodejs -y --setopt=nodesource-nodejs.module_hotfixes=1
fi
continue
fi
if [ "$pkg" == PyYAML ]; then
if [ -z ${SUDO_USER+x} ]; then # Running as root (e.g. Docker).
pip3 install --user PyYAML
else # Running using sudo.
sudo -H -u "$SUDO_USER" bash -c "pip3 install --user PyYAML"
fi
continue
fi
if [ "$pkg" == java-11-openjdk ]; then
amazon-linux-extras install -y java-openjdk11
continue
fi
if [ "$pkg" == java-11-openjdk-devel ]; then
amazon-linux-extras install -y java-openjdk11
yum install -y java-11-openjdk-devel
continue
fi
yum install -y "$pkg"
done
}
deps=$2"[*]"
"$1" "${!deps}"

View File

@ -1,189 +0,0 @@
#!/bin/bash
set -Eeuo pipefail
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
source "$DIR/../util.sh"
check_operating_system "centos-7"
check_architecture "x86_64"
TOOLCHAIN_BUILD_DEPS=(
coreutils gcc gcc-c++ make # generic build tools
wget # used for archive download
gnupg2 # used for archive signature verification
tar gzip bzip2 xz unzip # used for archive unpacking
zlib-devel # zlib library used for all builds
expat-devel libipt libipt-devel libbabeltrace-devel xz-devel python3-devel # gdb
texinfo # gdb
libcurl-devel # cmake
curl # snappy
readline-devel # cmake and llvm
libffi-devel libxml2-devel perl-Digest-MD5 # llvm
libedit-devel pcre-devel pcre2-devel automake bison # swig
file
openssl-devel
gmp-devel
gperf
patch
)
TOOLCHAIN_RUN_DEPS=(
make # generic build tools
tar gzip bzip2 xz # used for archive unpacking
zlib # zlib library used for all builds
expat libipt libbabeltrace xz-libs python3 # for gdb
readline # for cmake and llvm
libffi libxml2 # for llvm
openssl-devel
)
MEMGRAPH_BUILD_DEPS=(
make cmake pkgconfig # build system
curl wget # for downloading libs
libuuid-devel java-11-openjdk # required by antlr
readline-devel # for memgraph console
python3-devel # for query modules
openssl-devel
openssl
libseccomp-devel
python3 python-virtualenv python3-pip nmap-ncat # for qa, macro_benchmark and stress tests
#
# IMPORTANT: python3-yaml does NOT exist on CentOS
# Install it using `pip3 install PyYAML`
#
PyYAML # Package name here does not correspond to the yum package!
libcurl-devel # mg-requests
sbcl # for custom Lisp C++ preprocessing
rpm-build rpmlint # for RPM package building
doxygen graphviz # source documentation generators
which mono-complete dotnet-sdk-3.1 golang custom-golang1.18.9 # for driver tests
nodejs zip unzip java-11-openjdk-devel jdk-17 custom-maven3.9.3 # for driver tests
autoconf # for jemalloc code generation
libtool # for protobuf code generation
cyrus-sasl-devel
)
MEMGRAPH_TEST_DEPS="${MEMGRAPH_BUILD_DEPS[*]}"
MEMGRAPH_RUN_DEPS=(
logrotate openssl python3 libseccomp
)
NEW_DEPS=(
wget curl tar gzip
)
list() {
echo "$1"
}
check() {
local missing=""
for pkg in $1; do
if [ "$pkg" == custom-maven3.9.3 ]; then
if [ ! -f "/opt/apache-maven-3.9.3/bin/mvn" ]; then
missing="$pkg $missing"
fi
continue
fi
if [ "$pkg" == custom-golang1.18.9 ]; then
if [ ! -f "/opt/go1.18.9/go/bin/go" ]; then
missing="$pkg $missing"
fi
continue
fi
if [ "$pkg" == git ]; then
if ! which "git" >/dev/null; then
missing="git $missing"
fi
continue
fi
if [ "$pkg" == "PyYAML" ]; then
if ! python3 -c "import yaml" >/dev/null 2>/dev/null; then
missing="$pkg $missing"
fi
continue
fi
if ! yum list installed "$pkg" >/dev/null 2>/dev/null; then
missing="$pkg $missing"
fi
done
if [ "$missing" != "" ]; then
echo "MISSING PACKAGES: $missing"
exit 1
fi
}
install() {
cd "$DIR"
if [ "$EUID" -ne 0 ]; then
echo "Please run as root."
exit 1
fi
# If GitHub Actions runner is installed, append LANG to the environment.
# Python related tests doesn't work the LANG export.
if [ -d "/home/gh/actions-runner" ]; then
echo "LANG=en_US.utf8" >> /home/gh/actions-runner/.env
else
echo "NOTE: export LANG=en_US.utf8"
fi
yum install -y epel-release
yum remove -y ius-release
yum install -y \
https://repo.ius.io/ius-release-el7.rpm
yum update -y
yum install -y wget python3 python3-pip
yum install -y git
for pkg in $1; do
if [ "$pkg" == custom-maven3.9.3 ]; then
install_custom_maven "3.9.3"
continue
fi
if [ "$pkg" == custom-golang1.18.9 ]; then
install_custom_golang "1.18.9"
continue
fi
if [ "$pkg" == jdk-17 ]; then
if ! yum list installed jdk-17 >/dev/null 2>/dev/null; then
wget https://download.oracle.com/java/17/latest/jdk-17_linux-x64_bin.rpm
rpm -ivh jdk-17_linux-x64_bin.rpm
update-alternatives --set java java-11-openjdk.x86_64
update-alternatives --set javac java-11-openjdk.x86_64
fi
continue
fi
if [ "$pkg" == libipt ]; then
if ! yum list installed libipt >/dev/null 2>/dev/null; then
yum install -y http://repo.okay.com.mx/centos/8/x86_64/release/libipt-1.6.1-8.el8.x86_64.rpm
fi
continue
fi
if [ "$pkg" == libipt-devel ]; then
if ! yum list installed libipt-devel >/dev/null 2>/dev/null; then
yum install -y http://repo.okay.com.mx/centos/8/x86_64/release/libipt-devel-1.6.1-8.el8.x86_64.rpm
fi
continue
fi
if [ "$pkg" == dotnet-sdk-3.1 ]; then
if ! yum list installed dotnet-sdk-3.1 >/dev/null 2>/dev/null; then
wget -nv https://packages.microsoft.com/config/centos/7/packages-microsoft-prod.rpm -O packages-microsoft-prod.rpm
rpm -Uvh https://packages.microsoft.com/config/centos/7/packages-microsoft-prod.rpm
yum update -y
yum install -y dotnet-sdk-3.1
fi
continue
fi
if [ "$pkg" == PyYAML ]; then
if [ -z ${SUDO_USER+x} ]; then # Running as root (e.g. Docker).
pip3 install --user PyYAML
else # Running using sudo.
sudo -H -u "$SUDO_USER" bash -c "pip3 install --user PyYAML"
fi
continue
fi
yum install -y "$pkg"
done
}
deps=$2"[*]"
"$1" "${!deps}"

View File

@ -1,199 +0,0 @@
#!/bin/bash
set -Eeuo pipefail
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
source "$DIR/../util.sh"
check_operating_system "centos-9"
check_architecture "x86_64"
TOOLCHAIN_BUILD_DEPS=(
wget # used for archive download
coreutils-common gcc gcc-c++ make # generic build tools
# NOTE: Pure libcurl conflicts with libcurl-minimal
libcurl-devel # cmake build requires it
gnupg2 # used for archive signature verification
tar gzip bzip2 xz unzip # used for archive unpacking
zlib-devel # zlib library used for all builds
expat-devel xz-devel python3-devel texinfo libbabeltrace-devel # for gdb
readline-devel # for cmake and llvm
libffi-devel libxml2-devel # for llvm
libedit-devel pcre-devel pcre2-devel automake bison # for swig
file
openssl-devel
gmp-devel
gperf
diffutils
libipt libipt-devel # intel
patch
)
TOOLCHAIN_RUN_DEPS=(
make # generic build tools
tar gzip bzip2 xz # used for archive unpacking
zlib # zlib library used for all builds
expat xz-libs python3 # for gdb
readline # for cmake and llvm
libffi libxml2 # for llvm
openssl-devel
perl # for openssl
)
MEMGRAPH_BUILD_DEPS=(
git # source code control
make cmake pkgconf-pkg-config # build system
wget # for downloading libs
libuuid-devel java-11-openjdk # required by antlr
readline-devel # for memgraph console
python3-devel # for query modules
openssl-devel
libseccomp-devel
python3 python3-pip python3-virtualenv nmap-ncat # for qa, macro_benchmark and stress tests
#
# IMPORTANT: python3-yaml does NOT exist on CentOS
# Install it manually using `pip3 install PyYAML`
#
PyYAML # Package name here does not correspond to the yum package!
libcurl-devel # mg-requests
rpm-build rpmlint # for RPM package building
doxygen graphviz # source documentation generators
which nodejs golang custom-golang1.18.9 # for driver tests
zip unzip java-11-openjdk-devel java-17-openjdk java-17-openjdk-devel custom-maven3.9.3 # for driver tests
sbcl # for custom Lisp C++ preprocessing
autoconf # for jemalloc code generation
libtool # for protobuf code generation
cyrus-sasl-devel
)
MEMGRAPH_TEST_DEPS="${MEMGRAPH_BUILD_DEPS[*]}"
MEMGRAPH_RUN_DEPS=(
logrotate openssl python3 libseccomp
)
NEW_DEPS=(
wget curl tar gzip
)
list() {
echo "$1"
}
check() {
local missing=""
for pkg in $1; do
if [ "$pkg" == custom-maven3.9.3 ]; then
if [ ! -f "/opt/apache-maven-3.9.3/bin/mvn" ]; then
missing="$pkg $missing"
fi
continue
fi
if [ "$pkg" == custom-golang1.18.9 ]; then
if [ ! -f "/opt/go1.18.9/go/bin/go" ]; then
missing="$pkg $missing"
fi
continue
fi
if [ "$pkg" == "PyYAML" ]; then
if ! python3 -c "import yaml" >/dev/null 2>/dev/null; then
missing="$pkg $missing"
fi
continue
fi
if [ "$pkg" == "python3-virtualenv" ]; then
continue
fi
if ! yum list installed "$pkg" >/dev/null 2>/dev/null; then
missing="$pkg $missing"
fi
done
if [ "$missing" != "" ]; then
echo "MISSING PACKAGES: $missing"
exit 1
fi
}
install() {
cd "$DIR"
if [ "$EUID" -ne 0 ]; then
echo "Please run as root."
exit 1
fi
# If GitHub Actions runner is installed, append LANG to the environment.
# Python related tests doesn't work the LANG export.
if [ -d "/home/gh/actions-runner" ]; then
echo "LANG=en_US.utf8" >> /home/gh/actions-runner/.env
else
echo "NOTE: export LANG=en_US.utf8"
fi
# --nobest is used because of libipt because we install custom versions
# because libipt-devel is not available on CentOS 9 Stream
yum update -y --nobest
yum install -y wget git python3 python3-pip
for pkg in $1; do
if [ "$pkg" == custom-maven3.9.3 ]; then
install_custom_maven "3.9.3"
continue
fi
if [ "$pkg" == custom-golang1.18.9 ]; then
install_custom_golang "1.18.9"
continue
fi
# Since there is no support for libipt-devel for CentOS 9 we install
# Fedoras version of same libs, they are the same version but released
# for different OS
# TODO Update when libipt-devel releases for CentOS 9
if [ "$pkg" == libipt ]; then
if ! dnf list installed libipt >/dev/null 2>/dev/null; then
dnf install -y http://repo.okay.com.mx/centos/8/x86_64/release/libipt-1.6.1-8.el8.x86_64.rpm
fi
continue
fi
if [ "$pkg" == libipt-devel ]; then
if ! dnf list installed libipt-devel >/dev/null 2>/dev/null; then
dnf install -y http://repo.okay.com.mx/centos/8/x86_64/release/libipt-devel-1.6.1-8.el8.x86_64.rpm
fi
continue
fi
if [ "$pkg" == libbabeltrace-devel ]; then
if ! dnf list installed libbabeltrace-devel >/dev/null 2>/dev/null; then
dnf install -y http://mirror.stream.centos.org/9-stream/CRB/x86_64/os/Packages/libbabeltrace-devel-1.5.8-10.el9.x86_64.rpm
fi
continue
fi
if [ "$pkg" == sbcl ]; then
if ! dnf list installed cl-asdf >/dev/null 2>/dev/null; then
dnf install -y https://pkgs.dyn.su/el8/base/x86_64/cl-asdf-20101028-18.el8.noarch.rpm
fi
if ! dnf list installed common-lisp-controller >/dev/null 2>/dev/null; then
dnf install -y https://pkgs.dyn.su/el8/base/x86_64/common-lisp-controller-7.4-20.el8.noarch.rpm
fi
if ! dnf list installed sbcl >/dev/null 2>/dev/null; then
dnf install -y https://pkgs.dyn.su/el8/base/x86_64/sbcl-2.0.1-4.el8.x86_64.rpm
fi
continue
fi
if [ "$pkg" == PyYAML ]; then
if [ -z ${SUDO_USER+x} ]; then # Running as root (e.g. Docker).
pip3 install --user PyYAML
else # Running using sudo.
sudo -H -u "$SUDO_USER" bash -c "pip3 install --user PyYAML"
fi
continue
fi
if [ "$pkg" == python3-virtualenv ]; then
if [ -z ${SUDO_USER+x} ]; then # Running as root (e.g. Docker).
pip3 install virtualenv
pip3 install virtualenvwrapper
else # Running using sudo.
sudo -H -u "$SUDO_USER" bash -c "pip3 install virtualenv"
sudo -H -u "$SUDO_USER" bash -c "pip3 install virtualenvwrapper"
fi
continue
fi
yum install -y "$pkg"
done
}
deps=$2"[*]"
"$1" "${!deps}"

Some files were not shown because too many files have changed in this diff Show More