memgraph/tests/manual/test_isolation_level.py

608 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
"""
This script verifies whether the server is able to correctly
detect and abort certain classes of transactional anomaly,
inspired by the hermitage tests found at:
https://github.com/ept/hermitage
and based on the correctness definitions laid out in:
https://pmg.csail.mit.edu/papers/adya-phd.pdf
As of the time of committing this script, the database
is able to meet the conditions required for Snapshot Isolation
and Consistent View.
All of these tests begin with a dataset of:
Kv { key: 1, value: 10 }
Kv { key: 2, value: 20 }
and then assert various transactional correctness properties.
"""
import argparse
import mgclient
def setup(conn):
conn.autocommit = True
cursor = conn.cursor()
cursor.execute("CREATE CONSTRAINT ON (kv: Kv) ASSERT kv.key IS UNIQUE;")
conn.commit()
conn.autocommit = False
cursor = conn.cursor()
cursor.execute("MATCH (kv:Kv) DETACH DELETE kv;")
conn.commit()
update(cursor, 1, 10, "instantiate key 1")
conn.commit()
update(cursor, 2, 20, "instantiate key 2")
conn.commit()
assert_eq(get(cursor, 1), [10])
assert_eq(get(cursor, 2), [20])
def update(cursor, key, value, comment=""):
cursor.execute(
"""
// %s
MERGE (kv:Kv{ key: $key })
SET kv.value = $value
RETURN kv;
"""
% comment,
{"key": key, "value": value},
)
ret = cursor.fetchall()
if len(ret) != 1:
print(f"expected ret to be len 1, but it's {ret}")
assert len(ret) == 1
return ret[0][0].properties["value"]
def get(cursor, key):
cursor.execute(
"""
MATCH (kv: Kv { key: $key })
RETURN kv;
""",
{"key": key},
)
ret = cursor.fetchall()
return [r[0].properties["value"] for r in ret]
def assert_eq(a, b):
if a != b:
print("expected", a, "to be", b)
assert a == b
def assert_commit_fails(conn, failure="conflicting transactions"):
try:
conn.commit()
# should always abort
print("expected transaction to fail with", failure, "but it incorrectly committed successfully")
assert False
except mgclient.DatabaseError as e:
assert failure in str(e), "expected exception containing text {failure} but instead it was {e}"
def conflicting_update(cursor, key, value):
try:
update(cursor, key, value)
assert False
except mgclient.DatabaseError as e:
assert str(e).startswith("Cannot resolve conflicting transactions")
def select(cursor, key, expected):
actual = get(cursor, key)
if actual != expected:
print("expected key", key, "to have value", expected, "but instead it had value", actual)
assert False
def select_all(cursor, expected):
cursor.execute("MATCH (kv: Kv {}) RETURN kv")
actual = [r[0].properties["value"] for r in cursor.fetchall()]
assert actual == expected, "expected values {expected}, but instead it had values {actual}"
"""
G0: Write Cycles (dirty writes)
"""
def g0(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
update(cursor_1, 1, 11)
conflicting_update(cursor_2, 1, 12)
update(cursor_1, 2, 21)
c1.commit()
get(cursor_1, 1)
get(cursor_1, 2)
# if the above write to 1:12 was
# able to be staged without throwing
# an exception, these lines would be
# necessary. but the interactive txn
# from c2 is already aborted before
# this point, so there's nothing we
# need to do here.
# update(cursor_2, 2, 22)
# assert_commit_fails(c2)
select(cursor_1, 1, [11])
select(cursor_1, 2, [21])
select(cursor_2, 1, [11])
select(cursor_2, 2, [21])
c1.commit()
c2.commit()
print("✓ g0 test passed")
return True
"""
G1a: Aborted Reads (dirty reads, cascaded aborts)
"""
def g1a(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
update(cursor_1, 1, 101)
select(cursor_2, 1, [10])
c1.rollback()
select(cursor_2, 1, [10])
c2.commit()
print("✓ g1a test passed")
return True
"""
G1b: Intermediate Reads (dirty reads)
"""
def g1b(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
update(cursor_1, 1, 101)
select(cursor_2, 1, [10])
update(cursor_1, 1, 11)
c1.commit()
select(cursor_2, 1, [10])
c2.commit()
print("✓ g1b test passed")
return True
"""
G1c: Circular Information Flow (dirty reads)
"""
def g1c(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
update(cursor_1, 1, 11)
update(cursor_2, 2, 22)
select(cursor_1, 2, [20])
select(cursor_2, 1, [10])
c1.commit()
c2.commit()
print("✓ g1c test passed")
return True
"""
OTV: Observed Transaction Vanishes
"""
def otv(c1, c2, c3):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
cursor_3 = c3.cursor()
update(cursor_1, 1, 11)
update(cursor_1, 2, 19)
conflicting_update(cursor_2, 1, 12)
c1.commit()
select(cursor_3, 1, [11])
select(cursor_3, 2, [19])
# cursor_2 update not required due to its early-abort above
select(cursor_3, 1, [11])
select(cursor_3, 2, [19])
c3.commit()
print("✓ otv test passed")
return True
"""
Predicate-Many-Preceders
"""
def pmp(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
select(cursor_1, 3, [])
update(cursor_2, 3, 30)
c2.commit()
select(cursor_1, 3, [])
c1.commit()
print("✓ pmp test passed")
return True
"""
Predicate-Many-Preceders (write)
"""
def pmp_write(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
cursor_1.execute("MATCH (kv: Kv {}) SET kv.value = kv.value + 10 RETURN kv")
select(cursor_2, 1, [10])
select(cursor_2, 2, [20])
try:
cursor_2.execute("MATCH (kv:Kv { value: 20 }) DETACH DELETE kv;")
assert False
except mgclient.DatabaseError as e:
assert "conflicting transactions" in str(e)
c1.commit()
select(cursor_1, 1, [20])
select(cursor_1, 2, [30])
c1.commit()
print("✓ pmp_write test passed")
return True
"""
P4: Lost Update
"""
def p4(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
select(cursor_1, 1, [10])
select(cursor_2, 1, [10])
update(cursor_1, 1, 11)
conflicting_update(cursor_2, 1, 11)
c1.commit()
print("✓ p4 test passed")
return True
"""
G-single: Single Anti-dependency Cycles (read skew)
"""
def g_single(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
select(cursor_1, 1, [10])
select(cursor_2, 1, [10])
select(cursor_2, 2, [20])
update(cursor_2, 1, 12)
update(cursor_2, 2, 18)
c2.commit()
select(cursor_1, 2, [20])
c1.commit()
print("✓ g_single test passed")
return True
"""
G-single: Single Anti-dependency Cycles (read skew) (dependencies)
"""
def g_single_dependencies(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
cursor_1.execute("MATCH (kv: Kv {}) WHERE kv.value % 5 = 0 RETURN kv")
found = [r[0].properties["value"] for r in cursor_1.fetchall()]
assert found == [10, 20]
cursor_2.execute("MATCH (kv: Kv { value: 10 }) SET kv.value = 12 RETURN kv")
c2.commit()
cursor_1.execute("MATCH (kv: Kv {}) WHERE kv.value % 3 = 0 RETURN kv")
found = [r[0].properties["value"] for r in cursor_1.fetchall()]
assert found == []
c1.commit()
print("✓ g_single_dependencies test passed")
return True
"""
G-single: Single Anti-dependency Cycles (read skew) (write 1)
"""
def g_single_write_1(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
select(cursor_1, 1, [10])
select_all(cursor_2, [10, 20])
update(cursor_2, 1, 12)
update(cursor_2, 2, 18)
c2.commit()
try:
cursor_1.execute("MATCH (kv:Kv { value: 20 }) DETACH DELETE kv;")
assert False
except mgclient.DatabaseError as e:
assert "conflicting transactions" in str(e)
select_all(cursor_2, [12, 18])
c2.commit()
print("✓ g_single_write_1 test passed")
return True
"""
G-single: Single Anti-dependency Cycles (read skew) (write 2)
"""
def g_single_write_2(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
select(cursor_1, 1, [10])
select_all(cursor_2, [10, 20])
update(cursor_2, 1, 12)
cursor_1.execute("MATCH (kv:Kv { value: 20 }) DETACH DELETE kv;")
conflicting_update(cursor_2, 2, 18)
c1.rollback()
# c2.commit()
print("✓ g_single_write_2 test passed (although the abort rate is pessimistically high)")
return True
"""
G2-item: Item Anti-dependency Cycles (write skew on disjoint read)
"""
def g2_item(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
cursor_1.execute("MATCH (kv: Kv {}) WHERE kv.key = 1 OR kv.key = 2 RETURN kv")
found = [r[0].properties["value"] for r in cursor_1.fetchall()]
assert found == [10, 20]
cursor_2.execute("MATCH (kv: Kv {}) WHERE kv.key = 1 OR kv.key = 2 RETURN kv")
found = [r[0].properties["value"] for r in cursor_2.fetchall()]
assert found == [10, 20]
update(cursor_1, 1, 11)
update(cursor_2, 2, 21)
c1.commit()
try:
assert_commit_fails(c2)
print("✓ g2_item test passed")
return True
except:
print(
"X g2_item test failed - database exhibits write skew -",
"writes based on invalidated reads should have failed,",
"causing repeatable read (PL-2.99) and serializability (PL-3)",
"to fail to be achieved",
)
return False
"""
G2: Anti-Dependency Cycles (write skew on predicate read)
"""
def g2(c1, c2):
cursor_1 = c1.cursor()
cursor_2 = c2.cursor()
cursor_1.execute("MATCH (kv: Kv {}) WHERE kv.value % 3 = 0 RETURN kv")
found = [r[0].properties["value"] for r in cursor_1.fetchall()]
assert found == []
cursor_2.execute("MATCH (kv: Kv {}) WHERE kv.value % 3 = 0 RETURN kv")
found = [r[0].properties["value"] for r in cursor_2.fetchall()]
assert found == []
update(cursor_1, 3, 30)
update(cursor_2, 4, 42)
c1.commit()
try:
assert_commit_fails(c2)
print("✓ g2 test passed")
return True
except:
print(
"X g2 test failed - database exhibits write skew on predicate read -",
"concurrent transactions that should have caused a predicate read to",
"return data in one transaction actually returned nothing in both.",
"Both transactions committed, but one of them should have failed due",
"to having a predicate invalidated",
)
return False
"""
G2: Anti-Dependency Cycles (write skew on predicate read) (two edges)
"""
def g2_two_edges(c1, c2, c3):
cursor_1 = c1.cursor()
select_all(cursor_1, [10, 20])
cursor_2 = c2.cursor()
cursor_2.execute("MATCH (kv: Kv { key: 2 }) SET kv.value = kv.value + 5 RETURN kv;")
found = [r[0].properties["value"] for r in cursor_2.fetchall()]
assert found == [25]
c2.commit()
cursor_3 = c3.cursor()
select_all(cursor_3, [10, 25])
c3.commit()
try:
conflicting_update(cursor_1, 1, 0)
print("✓ g2_two_edges test passed")
return True
except:
c3.rollback()
print(
"X g2_two_edges test failed: database exhibits write skew on predicate read -",
"a transaction's read set was invalidated in two concurrent transactions,"
"and it should have failed to commit if we want to be serializable",
)
return False
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--host", default="127.0.0.1")
parser.add_argument("-p", "--port", default=7687)
args = parser.parse_args()
c1 = mgclient.connect(host=args.host, port=args.port)
c1.autocommit = False
c2 = mgclient.connect(host=args.host, port=args.port)
c2.autocommit = False
c3 = mgclient.connect(host=args.host, port=args.port)
c3.autocommit = False
setup(c1)
g0 = g0(c1, c2)
setup(c1)
g1a = g1a(c1, c2)
setup(c1)
g1b = g1b(c1, c2)
setup(c1)
g1c = g1c(c1, c2)
setup(c1)
otv = otv(c1, c2, c3)
setup(c1)
pmp = pmp(c1, c2)
setup(c1)
pmp_write = pmp_write(c1, c2)
setup(c1)
p4 = p4(c1, c2)
setup(c1)
g_single = g_single(c1, c2)
setup(c1)
g_single_dependencies = g_single_dependencies(c1, c2)
setup(c1)
g_single_write_1 = g_single_write_1(c1, c2)
setup(c1)
g_single_write_2 = g_single_write_2(c1, c2)
setup(c1)
g2_item = g2_item(c1, c2)
setup(c1)
g2 = g2(c1, c2)
setup(c1)
g2_two_edges = g2_two_edges(c1, c2, c3)
g1 = all([g1a, g1b, g1c])
repeatable_read = all([g1, g2_item])
snapshot_isolation = all(
[g0, g1, otv, pmp, pmp_write, p4, g_single, g_single_dependencies, g_single_write_1, g_single_write_2]
)
full_serializability = all([snapshot_isolation, g2_item, g2, g2_two_edges])
print("")
print("results:")
print("consistent view (PL-2+):", g1 and g_single)
print("snapshot isolation (PL-SI):", snapshot_isolation)
print("repeatable read (PL-2.99):", repeatable_read)
print("full serializability (PL-3):", full_serializability)
print("")
print("more information about these anomalies can be found at:")
print("\thttps://github.com/ept/hermitage")
print("\thttps://pmg.csail.mit.edu/papers/adya-phd.pdf")