2024-02-29 09:47:18 +01:00

1604 lines
60 KiB

# Copyright 2022 Memgraph Ltd.
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import os
import shutil
import sys
import tempfile
import interactive_mg_runner
import pytest
from common import connect, execute_and_fetch_all, safe_execute
from mg_utils import mg_sleep_and_assert, mg_sleep_and_assert_collection
interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
interactive_mg_runner.PROJECT_DIR = os.path.normpath(
os.path.join(interactive_mg_runner.SCRIPT_DIR, "..", "..", "..", "..")
interactive_mg_runner.BUILD_DIR = os.path.normpath(os.path.join(interactive_mg_runner.PROJECT_DIR, "build"))
interactive_mg_runner.MEMGRAPH_BINARY = os.path.normpath(os.path.join(interactive_mg_runner.BUILD_DIR, "memgraph"))
TEMP_DIR = tempfile.TemporaryDirectory().name
"instance_1": {
"args": [
"log_file": "instance_1.log",
"data_directory": f"{TEMP_DIR}/instance_1",
"setup_queries": [],
"instance_2": {
"args": [
"log_file": "instance_2.log",
"data_directory": f"{TEMP_DIR}/instance_2",
"setup_queries": [],
"instance_3": {
"args": [
"log_file": "instance_3.log",
"data_directory": f"{TEMP_DIR}/instance_3",
"setup_queries": [],
"coordinator": {
"args": [
"log_file": "coordinator.log",
"setup_queries": [
"REGISTER INSTANCE instance_1 ON '' WITH '';",
"REGISTER INSTANCE instance_2 ON '' WITH '';",
"REGISTER INSTANCE instance_3 ON '' WITH '';",
"SET INSTANCE instance_3 TO MAIN",
@pytest.mark.parametrize("data_recovery", ["false", "true"])
def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recovery):
# Goal of this test is to check the replication works after failover command.
# 1. We start all replicas, main and coordinator manually
# 2. We check that main has correct state
# 3. Create initial data on MAIN
# 4. Expect data to be copied on all replicas
# 5. Kill instance_1 (replica 1)
# 6. Create data on MAIN and expect to be copied to only one replica (instance_2)
# 7. Kill main
# 8. Instance_2 new MAIN
# 9. Create vertex on instance 2
# 10. Start instance_1(it should have one commit on old epoch and new epoch with new commit shouldn't be replicated)
# 11. Expect data to be copied on instance_1
# 12. Start old MAIN (instance_3)
# 13. Expect data to be copied to instance_3
temp_dir = tempfile.TemporaryDirectory().name
"instance_1": {
"args": [
"log_file": "instance_1.log",
"data_directory": f"{temp_dir}/instance_1",
"setup_queries": [],
"instance_2": {
"args": [
"log_file": "instance_2.log",
"data_directory": f"{temp_dir}/instance_2",
"setup_queries": [],
"instance_3": {
"args": [
"log_file": "instance_3.log",
"data_directory": f"{temp_dir}/instance_3",
"setup_queries": [],
"coordinator": {
"args": [
"log_file": "coordinator.log",
"setup_queries": [
"REGISTER INSTANCE instance_1 ON '' WITH '';",
"REGISTER INSTANCE instance_2 ON '' WITH '';",
"REGISTER INSTANCE instance_3 ON '' WITH '';",
"SET INSTANCE instance_3 TO MAIN",
# 1
# 2
main_cursor = connect(host="localhost", port=7687).cursor()
expected_data_on_main = [
("instance_1", "", "sync", 0, 0, "ready"),
("instance_2", "", "sync", 0, 0, "ready"),
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
assert actual_data_on_main == expected_data_on_main
# 3
execute_and_fetch_all(main_cursor, "CREATE (:EpochVertex1 {prop:1});")
# 4
instance_1_cursor = connect(host="localhost", port=7688).cursor()
instance_2_cursor = connect(host="localhost", port=7689).cursor()
assert execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n);")[0][0] == 1
assert execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0] == 1
# 5
interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1")
# 6.
with pytest.raises(Exception) as e:
execute_and_fetch_all(main_cursor, "CREATE (:EpochVertex1 {prop:2});")
assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value)
assert execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2
# 7. Kill main
interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_3")
# 8.
coord_cursor = connect(host="localhost", port=7690).cursor()
def retrieve_data_show_instances():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;")))
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", False, "unknown"),
("instance_2", "", "", True, "main"),
("instance_3", "", "", False, "unknown"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances)
# 9. Create vertex on instance 2
with pytest.raises(Exception) as e:
execute_and_fetch_all(instance_2_cursor, "CREATE (:Epoch3 {prop:3});")
assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value)
# 10. Start instance_1 ( it should have one commit on old epoch and new epoch with new commit shouldn't be replicated)
interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1")
new_expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "replica"),
("instance_2", "", "", True, "main"),
("instance_3", "", "", False, "unknown"),
mg_sleep_and_assert(new_expected_data_on_coord, retrieve_data_show_instances)
# 11. Expect data to be copied on instance_1
instance_1_cursor = connect(host="localhost", port=7688).cursor()
def get_vertex_count():
return execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n)")[0][0]
mg_sleep_and_assert(3, get_vertex_count)
# 12. Start old MAIN (instance_3)
interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_3")
new_expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "replica"),
("instance_2", "", "", True, "main"),
("instance_3", "", "", True, "replica"),
mg_sleep_and_assert(new_expected_data_on_coord, retrieve_data_show_instances)
# 13. Expect data to be copied to instance_3
instance_3_cursor = connect(host="localhost", port=7687).cursor()
def get_vertex_count():
return execute_and_fetch_all(instance_3_cursor, "MATCH (n) RETURN count(n)")[0][0]
mg_sleep_and_assert(3, get_vertex_count)
@pytest.mark.parametrize("data_recovery", ["false", "true"])
def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_recovery):
# Goal of this test is to check the replication works after failover command if one
# instance missed couple of epochs but data is still available on one of the instances
# 1. We start all replicas, main and coordinator manually
# 2. Main does commit
# 3. instance_2 down
# 4. Main commits more
# 5. Main down
# 6. Instance_1 new main
# 7. Instance 1 commits
# 8. Instance 4 gets data
# 9. Instance 1 dies
# 10. Instance 4 new main
# 11. Instance 4 commits
# 12. Instance 2 wakes up
# 13. Instance 2 gets data from old epochs
# 14. All other instances wake up
# 15. Everything is replicated
temp_dir = tempfile.TemporaryDirectory().name
"instance_1": {
"args": [
"log_file": "instance_1.log",
"data_directory": f"{temp_dir}/instance_1",
"setup_queries": [],
"instance_2": {
"args": [
"log_file": "instance_2.log",
"data_directory": f"{temp_dir}/instance_2",
"setup_queries": [],
"instance_3": {
"args": [
"log_file": "instance_3.log",
"data_directory": f"{temp_dir}/instance_3",
"setup_queries": [],
"instance_4": {
"args": [
"log_file": "instance_4.log",
"data_directory": f"{temp_dir}/instance_4",
"setup_queries": [],
"coordinator": {
"args": [
"log_file": "coordinator.log",
"setup_queries": [
"REGISTER INSTANCE instance_1 ON '' WITH '';",
"REGISTER INSTANCE instance_2 ON '' WITH '';",
"REGISTER INSTANCE instance_3 ON '' WITH '';",
"REGISTER INSTANCE instance_4 ON '' WITH '';",
"SET INSTANCE instance_3 TO MAIN",
# 1. We start all replicas, main and coordinator manually
main_cursor = connect(host="localhost", port=7687).cursor()
expected_data_on_main = [
("instance_1", "", "sync", 0, 0, "ready"),
("instance_2", "", "sync", 0, 0, "ready"),
("instance_4", "", "sync", 0, 0, "ready"),
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
assert actual_data_on_main == expected_data_on_main
# 2. Main does commit
execute_and_fetch_all(main_cursor, "CREATE (:EpochVertex1 {prop:1});")
execute_and_fetch_all(main_cursor, "CREATE (:EpochVertex1 {prop:2});")
instance_1_cursor = connect(host="localhost", port=7688).cursor()
instance_2_cursor = connect(host="localhost", port=7689).cursor()
instance_4_cursor = connect(host="localhost", port=7691).cursor()
assert execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2
assert execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2
assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2
# 3. instance_2 down
interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_2")
coord_cursor = connect(host="localhost", port=7690).cursor()
def retrieve_data_show_instances():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;")))
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "replica"),
("instance_2", "", "", False, "unknown"),
("instance_3", "", "", True, "main"),
("instance_4", "", "", True, "replica"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances)
# 4. Main commits more
with pytest.raises(Exception) as e:
execute_and_fetch_all(main_cursor, "CREATE (:EpochVertex1 {prop:1});")
assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value)
assert execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n);")[0][0] == 3
assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 3
# 5. Main down
interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_3")
# 6. Instance_1 new main
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "main"),
("instance_2", "", "", False, "unknown"),
("instance_3", "", "", False, "unknown"),
("instance_4", "", "", True, "replica"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances)
# 7. Instance 1 commits
with pytest.raises(Exception) as e:
execute_and_fetch_all(instance_1_cursor, "CREATE (:Epoch2Vertex {prop:1});")
assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value)
# 8. Instance 4 gets data
assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 4
# 8. Instance 1 dies
interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1")
# 9. Instance 4 new main
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", False, "unknown"),
("instance_2", "", "", False, "unknown"),
("instance_3", "", "", False, "unknown"),
("instance_4", "", "", True, "main"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances)
# 10 Instance 4 commits
with pytest.raises(Exception) as e:
execute_and_fetch_all(instance_4_cursor, "CREATE (:Epoch3Vertex {prop:1});")
assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value)
# 11 Instance 2 wakes up
interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_2")
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", False, "unknown"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", False, "unknown"),
("instance_4", "", "", True, "main"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances)
# 12 Instance 2 gets data from old epochs
instance_2_cursor = connect(host="localhost", port=7689).cursor()
def get_vertex_count():
return execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n)")[0][0]
mg_sleep_and_assert(5, get_vertex_count)
# 12. All other instances wake up
interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1")
interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_3")
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "replica"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", True, "replica"),
("instance_4", "", "", True, "main"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances)
# 13. Everything is replicated
instance_1_cursor = connect(host="localhost", port=7688).cursor()
instance_4_cursor = connect(host="localhost", port=7691).cursor()
def get_vertex_count():
return execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n)")[0][0]
mg_sleep_and_assert(5, get_vertex_count)
def get_vertex_count():
return execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n)")[0][0]
mg_sleep_and_assert(5, get_vertex_count)
@pytest.mark.parametrize("data_recovery", ["false"])
def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_recovery):
# TODO(antoniofilipovic) Test should pass when logic is added
# Goal of this test is to check the replication works forcefully if replica misses epoch
# 1. We start all replicas, main and coordinator manually
# 2. We check that main has correct state
# 3. Create initial data on MAIN
# 4. Expect data to be copied on all replicas
# 5. Kill instance_1 ( this one will miss complete epoch)
# 6. Kill main (instance_3)
# 7. Instance_2 or instance_4 new main
# 8. New main commits
# 9. Instance_2 down (not main)
# 10. instance_4 down
# 11. Instance 1 up (missed epoch)
# 12 Instance 1 new main
# 13 instance 2 up
# 14 Force data from instance 1 to instance 2
temp_dir = tempfile.TemporaryDirectory().name
@pytest.mark.parametrize("data_recovery", ["false"])
def test_replication_works_with_snapshot(data_recovery):
# TODO(antoniofilipovic) Test should pass
# Goal of this test is to check the replication works only recovering with snapshot
# 1. We start all replicas, main and coordinator manually
# 2. We check that main has correct state
# 3. Create initial data on MAIN
# 4. Expect data to be copied on all replicas
# 5. Kill instance_1
# 6. Create more data on MAIN
# 7. Create snapshot
# 8. Run show replicas to get last commit timestamp
# 9. Delete WAL files from folder of instance_3
# 10. Start instance 1
# 11. Check data is replicated
# 12. Call SHOW REPLICAS on MAIN to get correct state
temp_dir = tempfile.TemporaryDirectory().name
"instance_1": {
"args": [
"log_file": "instance_1.log",
"data_directory": f"{temp_dir}/instance_1",
"setup_queries": [],
"instance_2": {
"args": [
"log_file": "instance_2.log",
"data_directory": f"{temp_dir}/instance_2",
"setup_queries": [],
"instance_3": {
"args": [
"log_file": "instance_3.log",
"data_directory": f"{temp_dir}/instance_3",
"setup_queries": [],
"coordinator": {
"args": [
"log_file": "coordinator.log",
"setup_queries": [
"REGISTER INSTANCE instance_1 ON '' WITH '';",
"REGISTER INSTANCE instance_2 ON '' WITH '';",
"REGISTER INSTANCE instance_3 ON '' WITH '';",
"SET INSTANCE instance_3 TO MAIN",
# 1
# 2
main_cursor = connect(host="localhost", port=7687).cursor()
expected_data_on_main = [
("instance_1", "", "sync", 0, 0, "ready"),
("instance_2", "", "sync", 0, 0, "ready"),
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
assert actual_data_on_main == expected_data_on_main
# 3
execute_and_fetch_all(main_cursor, "CREATE (:Epoch1Vertex {prop:1});")
execute_and_fetch_all(main_cursor, "CREATE (:Epoch1Vertex {prop:2});")
# 4
instance_1_cursor = connect(host="localhost", port=7688).cursor()
instance_2_cursor = connect(host="localhost", port=7689).cursor()
assert execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2
assert execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2
# 5
interactive_mg_runner.kill(CONFIGURATION, "instance_1")
coord_cursor = connect(host="localhost", port=7690).cursor()
def retrieve_data_show_instances():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;")))
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", False, "unknown"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", True, "main"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances)
# 6
num_vertices = 100
with pytest.raises(Exception) as e:
execute_and_fetch_all(main_cursor, f"FOREACH (i in range(1, {num_vertices}) | CREATE (:Vertex));")
assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value)
assert execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0] == num_vertices + 2
# 7
execute_and_fetch_all(main_cursor, "CREATE SNAPSHOT;")
# 8
main_cursor = connect(host="localhost", port=7687).cursor()
expected_data_on_main = [
("instance_1", "", "sync", 0, 0, "invalid"),
("instance_2", "", "sync", 6, 0, "ready"),
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
assert actual_data_on_main == expected_data_on_main
# 9
folder = f"{temp_dir}/instance_2/wal"
for filename in os.listdir(folder):
file_path = os.path.join(folder, filename)
if os.path.isfile(file_path) or os.path.islink(file_path):
elif os.path.isdir(file_path):
print("removing file")
except Exception as e:
print("Failed to delete %s. Reason: %s" % (file_path, e))
# 10
interactive_mg_runner.start(CONFIGURATION, "instance_1")
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "replica"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", True, "main"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances)
# 11
instance_1_cursor = connect(host="localhost", port=7688).cursor()
def get_vertex_count():
return execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n)")[0][0]
mg_sleep_and_assert(num_vertices + 2, get_vertex_count)
# 12
main_cursor = connect(host="localhost", port=7687).cursor()
expected_data_on_main = [
("instance_1", "", "sync", 6, 0, "ready"),
("instance_2", "", "sync", 6, 0, "ready"),
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
assert actual_data_on_main == expected_data_on_main
@pytest.mark.parametrize("data_recovery", ["false", "true"])
def test_replication_correct_replica_chosen_up_to_date_data(data_recovery):
# TODO(antoniofilipovic): Test should pass when base branch is updated
# Goal of this test is to check that correct replica instance as new MAIN is chosen
# 1. We start all replicas, main and coordinator manually
# 2. We check that main has correct state
# 3. Create initial data on MAIN
# 4. Expect data to be copied on all replicas
# 5. Kill instance_1 ( this one will miss complete epoch)
# 6. Kill main (instance_3)
# 7. Instance_2 new MAIN
# 8. Instance_2 commits and replicates data
# 9. Instance_4 down (not main)
# 10. instance_2 down (MAIN), instance 1 up (missed epoch),
# instance 4 up (In this case we should always choose instance_4 because it has up-to-date data)
# 11 Instance 4 new main
# 12 instance_1 gets up-to-date data, instance_4 has all data
temp_dir = tempfile.TemporaryDirectory().name
"instance_1": {
"args": [
"log_file": "instance_1.log",
"data_directory": f"{temp_dir}/instance_1",
"setup_queries": [],
"instance_2": {
"args": [
"log_file": "instance_2.log",
"data_directory": f"{temp_dir}/instance_2",
"setup_queries": [],
"instance_3": {
"args": [
"log_file": "instance_3.log",
"data_directory": f"{temp_dir}/instance_3",
"setup_queries": [],
"instance_4": {
"args": [
"log_file": "instance_4.log",
"data_directory": f"{temp_dir}/instance_4",
"setup_queries": [],
"coordinator": {
"args": [
"log_file": "coordinator.log",
"setup_queries": [
"REGISTER INSTANCE instance_1 ON '' WITH '';",
"REGISTER INSTANCE instance_2 ON '' WITH '';",
"REGISTER INSTANCE instance_3 ON '' WITH '';",
"REGISTER INSTANCE instance_4 ON '' WITH '';",
"SET INSTANCE instance_3 TO MAIN",
# 1
# 2
main_cursor = connect(host="localhost", port=7687).cursor()
expected_data_on_main = [
("instance_1", "", "sync", 0, 0, "ready"),
("instance_2", "", "sync", 0, 0, "ready"),
("instance_4", "", "sync", 0, 0, "ready"),
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
assert actual_data_on_main == expected_data_on_main
# 3
execute_and_fetch_all(main_cursor, "CREATE (:Epoch1Vertex {prop:1});")
execute_and_fetch_all(main_cursor, "CREATE (:Epoch1Vertex {prop:2});")
# 4
instance_1_cursor = connect(host="localhost", port=7688).cursor()
instance_2_cursor = connect(host="localhost", port=7689).cursor()
instance_4_cursor = connect(host="localhost", port=7691).cursor()
assert execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2
assert execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2
assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2
# 5
interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1")
# 6
interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_3")
# 7
coord_cursor = connect(host="localhost", port=7690).cursor()
def retrieve_data_show_instances():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;")))
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", False, "unknown"),
("instance_2", "", "", True, "main"),
("instance_3", "", "", False, "unknown"),
("instance_4", "", "", True, "replica"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances)
# 8
with pytest.raises(Exception) as e:
execute_and_fetch_all(instance_2_cursor, "CREATE (:Epoch2Vertex {prop:1});")
assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value)
def get_vertex_count():
return execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n)")[0][0]
mg_sleep_and_assert(3, get_vertex_count)
assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 3
# 9
interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_4")
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", False, "unknown"),
("instance_2", "", "", True, "main"),
("instance_3", "", "", False, "unknown"),
("instance_4", "", "", False, "unknown"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances)
# 10
interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_2")
interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1")
interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_4")
# 11
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "replica"),
("instance_2", "", "", False, "unknown"),
("instance_3", "", "", False, "unknown"),
("instance_4", "", "", True, "main"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances)
# 12
instance_1_cursor = connect(host="localhost", port=7688).cursor()
instance_4_cursor = connect(host="localhost", port=7691).cursor()
def get_vertex_count():
return execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n)")[0][0]
mg_sleep_and_assert(3, get_vertex_count)
def get_vertex_count():
return execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n)")[0][0]
mg_sleep_and_assert(3, get_vertex_count)
def test_replication_works_on_failover_simple():
# Goal of this test is to check the replication works after failover command.
# 1. We start all replicas, main and coordinator manually
# 2. We check that main has correct state
# 3. We kill main
# 4. We check that coordinator and new main have correct state
# 5. We insert one vertex on new main
# 6. We check that vertex appears on new replica
# 7. We bring back main up
# 8. Expect data to be copied to main
safe_execute(shutil.rmtree, TEMP_DIR)
# 1
# 2
main_cursor = connect(host="localhost", port=7687).cursor()
expected_data_on_main = [
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
assert actual_data_on_main == expected_data_on_main
# 3
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
# 4
coord_cursor = connect(host="localhost", port=7690).cursor()
def retrieve_data_show_repl_cluster():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;")))
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "main"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", False, "unknown"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster)
new_main_cursor = connect(host="localhost", port=7688).cursor()
def retrieve_data_show_replicas():
return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;")))
expected_data_on_new_main = [
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
{"ts": 0, "behind": None, "status": "invalid"},
{"memgraph": {"ts": 0, "behind": 0, "status": "invalid"}},
mg_sleep_and_assert_collection(expected_data_on_new_main, retrieve_data_show_replicas)
# 5
with pytest.raises(Exception) as e:
execute_and_fetch_all(new_main_cursor, "CREATE ();")
assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value)
# 6
alive_replica_cursor = connect(host="localhost", port=7689).cursor()
res = execute_and_fetch_all(alive_replica_cursor, "MATCH (n) RETURN count(n) as count;")[0][0]
assert res == 1, "Vertex should be replicated"
# 7
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
def retrieve_data_show_replicas():
return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;")))
new_main_cursor = connect(host="localhost", port=7688).cursor()
expected_data_on_new_main = [
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
mg_sleep_and_assert(expected_data_on_new_main, retrieve_data_show_replicas)
# 8
alive_main = connect(host="localhost", port=7687).cursor()
def retrieve_vertices_count():
return execute_and_fetch_all(alive_main, "MATCH (n) RETURN count(n) as count;")[0][0]
mg_sleep_and_assert(1, retrieve_vertices_count)
def test_replication_works_on_replica_instance_restart():
# Goal of this test is to check the replication works after replica goes down and restarts
# 1. We start all replicas, main and coordinator manually: we want to be able to kill them ourselves without relying on external tooling to kill processes.
# 2. We check that main has correct state
# 3. We kill replica
# 4. We check that main cannot replicate to replica
# 5. We bring replica back up
# 6. We check that replica gets data
safe_execute(shutil.rmtree, TEMP_DIR)
# 1
# 2
main_cursor = connect(host="localhost", port=7687).cursor()
expected_data_on_main = [
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
assert actual_data_on_main == expected_data_on_main
# 3
coord_cursor = connect(host="localhost", port=7690).cursor()
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
def retrieve_data_show_repl_cluster():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;")))
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "replica"),
("instance_2", "", "", False, "unknown"),
("instance_3", "", "", True, "main"),
mg_sleep_and_assert_collection(expected_data_on_coord, retrieve_data_show_repl_cluster)
def retrieve_data_show_replicas():
return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
expected_data_on_main = [
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "invalid"}},
mg_sleep_and_assert_collection(expected_data_on_main, retrieve_data_show_replicas)
# 4
instance_1_cursor = connect(host="localhost", port=7688).cursor()
with pytest.raises(Exception) as e:
execute_and_fetch_all(main_cursor, "CREATE ();")
assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value)
res_instance_1 = execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n)")[0][0]
assert res_instance_1 == 1
def retrieve_data_show_replicas():
return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
expected_data_on_main = [
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 2, "behind": 0, "status": "ready"}},
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "invalid"}},
mg_sleep_and_assert_collection(expected_data_on_main, retrieve_data_show_replicas)
# 5.
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
def retrieve_data_show_repl_cluster():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;")))
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "replica"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", True, "main"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster)
def retrieve_data_show_replicas():
return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
expected_data_on_main = [
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 2, "behind": 0, "status": "ready"}},
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 2, "behind": 0, "status": "ready"}},
mg_sleep_and_assert_collection(expected_data_on_main, retrieve_data_show_replicas)
# 6.
instance_2_cursor = connect(port=7689, host="localhost").cursor()
execute_and_fetch_all(main_cursor, "CREATE ();")
res_instance_2 = execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n)")[0][0]
assert res_instance_2 == 2
def test_show_instances():
safe_execute(shutil.rmtree, TEMP_DIR)
instance1_cursor = connect(host="localhost", port=7688).cursor()
instance2_cursor = connect(host="localhost", port=7689).cursor()
instance3_cursor = connect(host="localhost", port=7687).cursor()
coord_cursor = connect(host="localhost", port=7690).cursor()
def show_repl_cluster():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;")))
expected_data = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "replica"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", True, "main"),
mg_sleep_and_assert(expected_data, show_repl_cluster)
def retrieve_data_show_repl_role_instance1():
return sorted(list(execute_and_fetch_all(instance1_cursor, "SHOW REPLICATION ROLE;")))
def retrieve_data_show_repl_role_instance2():
return sorted(list(execute_and_fetch_all(instance2_cursor, "SHOW REPLICATION ROLE;")))
def retrieve_data_show_repl_role_instance3():
return sorted(list(execute_and_fetch_all(instance3_cursor, "SHOW REPLICATION ROLE;")))
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance1)
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance2)
mg_sleep_and_assert([("main",)], retrieve_data_show_repl_role_instance3)
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
expected_data = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", False, "unknown"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", True, "main"),
mg_sleep_and_assert(expected_data, show_repl_cluster)
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
expected_data = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", False, "unknown"),
("instance_2", "", "", False, "unknown"),
("instance_3", "", "", True, "main"),
mg_sleep_and_assert(expected_data, show_repl_cluster)
def test_simple_automatic_failover():
safe_execute(shutil.rmtree, TEMP_DIR)
main_cursor = connect(host="localhost", port=7687).cursor()
expected_data_on_main = [
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
assert actual_data_on_main == sorted(expected_data_on_main)
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
coord_cursor = connect(host="localhost", port=7690).cursor()
def retrieve_data_show_repl_cluster():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;")))
expected_data_on_coord = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "main"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", False, "unknown"),
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster)
new_main_cursor = connect(host="localhost", port=7688).cursor()
def retrieve_data_show_replicas():
return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;")))
expected_data_on_new_main = [
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
{"ts": 0, "behind": None, "status": "invalid"},
{"memgraph": {"ts": 0, "behind": 0, "status": "invalid"}},
mg_sleep_and_assert_collection(expected_data_on_new_main, retrieve_data_show_replicas)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
expected_data_on_new_main_old_alive = [
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
mg_sleep_and_assert_collection(expected_data_on_new_main_old_alive, retrieve_data_show_replicas)
def test_registering_replica_fails_name_exists():
safe_execute(shutil.rmtree, TEMP_DIR)
coord_cursor = connect(host="localhost", port=7690).cursor()
with pytest.raises(Exception) as e:
"REGISTER INSTANCE instance_1 ON '' WITH '';",
assert str(e.value) == "Couldn't register replica instance since instance with such name already exists!"
def test_registering_replica_fails_endpoint_exists():
safe_execute(shutil.rmtree, TEMP_DIR)
coord_cursor = connect(host="localhost", port=7690).cursor()
with pytest.raises(Exception) as e:
"REGISTER INSTANCE instance_5 ON '' WITH '';",
assert str(e.value) == "Couldn't register replica instance since instance with such endpoint already exists!"
def test_replica_instance_restarts():
safe_execute(shutil.rmtree, TEMP_DIR)
cursor = connect(host="localhost", port=7690).cursor()
def show_repl_cluster():
return sorted(list(execute_and_fetch_all(cursor, "SHOW INSTANCES;")))
expected_data_up = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "replica"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", True, "main"),
mg_sleep_and_assert(expected_data_up, show_repl_cluster)
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
expected_data_down = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", False, "unknown"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", True, "main"),
mg_sleep_and_assert(expected_data_down, show_repl_cluster)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
mg_sleep_and_assert(expected_data_up, show_repl_cluster)
instance1_cursor = connect(host="localhost", port=7688).cursor()
def retrieve_data_show_repl_role_instance1():
return sorted(list(execute_and_fetch_all(instance1_cursor, "SHOW REPLICATION ROLE;")))
expected_data_replica = [("replica",)]
mg_sleep_and_assert(expected_data_replica, retrieve_data_show_repl_role_instance1)
def test_automatic_failover_main_back_as_replica():
safe_execute(shutil.rmtree, TEMP_DIR)
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
coord_cursor = connect(host="localhost", port=7690).cursor()
def retrieve_data_show_repl_cluster():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;")))
expected_data_after_failover = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "main"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", False, "unknown"),
mg_sleep_and_assert(expected_data_after_failover, retrieve_data_show_repl_cluster)
expected_data_after_main_coming_back = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "main"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", True, "replica"),
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
mg_sleep_and_assert(expected_data_after_main_coming_back, retrieve_data_show_repl_cluster)
instance3_cursor = connect(host="localhost", port=7687).cursor()
def retrieve_data_show_repl_role_instance3():
return sorted(list(execute_and_fetch_all(instance3_cursor, "SHOW REPLICATION ROLE;")))
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance3)
def test_automatic_failover_main_back_as_main():
safe_execute(shutil.rmtree, TEMP_DIR)
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
coord_cursor = connect(host="localhost", port=7690).cursor()
def retrieve_data_show_repl_cluster():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;")))
expected_data_all_down = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", False, "unknown"),
("instance_2", "", "", False, "unknown"),
("instance_3", "", "", False, "unknown"),
mg_sleep_and_assert(expected_data_all_down, retrieve_data_show_repl_cluster)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
expected_data_main_back = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", False, "unknown"),
("instance_2", "", "", False, "unknown"),
("instance_3", "", "", True, "main"),
mg_sleep_and_assert(expected_data_main_back, retrieve_data_show_repl_cluster)
instance3_cursor = connect(host="localhost", port=7687).cursor()
def retrieve_data_show_repl_role_instance3():
return sorted(list(execute_and_fetch_all(instance3_cursor, "SHOW REPLICATION ROLE;")))
mg_sleep_and_assert([("main",)], retrieve_data_show_repl_role_instance3)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
expected_data_replicas_back = [
("coordinator_1", "", "", True, "coordinator"),
("instance_1", "", "", True, "replica"),
("instance_2", "", "", True, "replica"),
("instance_3", "", "", True, "main"),
mg_sleep_and_assert(expected_data_replicas_back, retrieve_data_show_repl_cluster)
instance1_cursor = connect(host="localhost", port=7688).cursor()
instance2_cursor = connect(host="localhost", port=7689).cursor()
def retrieve_data_show_repl_role_instance1():
return sorted(list(execute_and_fetch_all(instance1_cursor, "SHOW REPLICATION ROLE;")))
def retrieve_data_show_repl_role_instance2():
return sorted(list(execute_and_fetch_all(instance2_cursor, "SHOW REPLICATION ROLE;")))
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance1)
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance2)
mg_sleep_and_assert([("main",)], retrieve_data_show_repl_role_instance3)
def test_disable_multiple_mains():
safe_execute(shutil.rmtree, TEMP_DIR)
coord_cursor = connect(host="localhost", port=7690).cursor()
"SET INSTANCE instance_1 TO MAIN;",
except Exception as e:
assert str(e) == "Couldn't set instance to main since there is already a main instance in cluster!"
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-k", "test_replication_works_with_snapshot"]))
sys.exit(pytest.main([__file__, "-rA"]))