#!/usr/bin/env python3 """ This script verifies whether the server is able to correctly detect and abort certain classes of transactional anomaly, inspired by the hermitage tests found at: https://github.com/ept/hermitage and based on the correctness definitions laid out in: https://pmg.csail.mit.edu/papers/adya-phd.pdf As of the time of committing this script, the database is able to meet the conditions required for Snapshot Isolation and Consistent View. All of these tests begin with a dataset of: Kv { key: 1, value: 10 } Kv { key: 2, value: 20 } and then assert various transactional correctness properties. """ import argparse import mgclient def setup(conn): conn.autocommit = True cursor = conn.cursor() cursor.execute("CREATE CONSTRAINT ON (kv: Kv) ASSERT kv.key IS UNIQUE;") conn.commit() conn.autocommit = False cursor = conn.cursor() cursor.execute("MATCH (kv:Kv) DETACH DELETE kv;") conn.commit() update(cursor, 1, 10, "instantiate key 1") conn.commit() update(cursor, 2, 20, "instantiate key 2") conn.commit() assert_eq(get(cursor, 1), [10]) assert_eq(get(cursor, 2), [20]) def update(cursor, key, value, comment=""): cursor.execute( """ // %s MERGE (kv:Kv{ key: $key }) SET kv.value = $value RETURN kv; """ % comment, {"key": key, "value": value}, ) ret = cursor.fetchall() if len(ret) != 1: print(f"expected ret to be len 1, but it's {ret}") assert len(ret) == 1 return ret[0][0].properties["value"] def get(cursor, key): cursor.execute( """ MATCH (kv: Kv { key: $key }) RETURN kv; """, {"key": key}, ) ret = cursor.fetchall() return [r[0].properties["value"] for r in ret] def assert_eq(a, b): if a != b: print("expected", a, "to be", b) assert a == b def assert_commit_fails(conn, failure="conflicting transactions"): try: conn.commit() # should always abort print("expected transaction to fail with", failure, "but it incorrectly committed successfully") assert False except mgclient.DatabaseError as e: assert failure in str(e), "expected exception containing text {failure} but instead it was {e}" def conflicting_update(cursor, key, value): try: update(cursor, key, value) assert False except mgclient.DatabaseError as e: assert str(e).startswith("Cannot resolve conflicting transactions") def select(cursor, key, expected): actual = get(cursor, key) if actual != expected: print("expected key", key, "to have value", expected, "but instead it had value", actual) assert False def select_all(cursor, expected): cursor.execute("MATCH (kv: Kv {}) RETURN kv") actual = [r[0].properties["value"] for r in cursor.fetchall()] assert actual == expected, "expected values {expected}, but instead it had values {actual}" """ G0: Write Cycles (dirty writes) """ def g0(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() update(cursor_1, 1, 11) conflicting_update(cursor_2, 1, 12) update(cursor_1, 2, 21) c1.commit() get(cursor_1, 1) get(cursor_1, 2) # if the above write to 1:12 was # able to be staged without throwing # an exception, these lines would be # necessary. but the interactive txn # from c2 is already aborted before # this point, so there's nothing we # need to do here. # update(cursor_2, 2, 22) # assert_commit_fails(c2) select(cursor_1, 1, [11]) select(cursor_1, 2, [21]) select(cursor_2, 1, [11]) select(cursor_2, 2, [21]) c1.commit() c2.commit() print("✓ g0 test passed") return True """ G1a: Aborted Reads (dirty reads, cascaded aborts) """ def g1a(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() update(cursor_1, 1, 101) select(cursor_2, 1, [10]) c1.rollback() select(cursor_2, 1, [10]) c2.commit() print("✓ g1a test passed") return True """ G1b: Intermediate Reads (dirty reads) """ def g1b(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() update(cursor_1, 1, 101) select(cursor_2, 1, [10]) update(cursor_1, 1, 11) c1.commit() select(cursor_2, 1, [10]) c2.commit() print("✓ g1b test passed") return True """ G1c: Circular Information Flow (dirty reads) """ def g1c(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() update(cursor_1, 1, 11) update(cursor_2, 2, 22) select(cursor_1, 2, [20]) select(cursor_2, 1, [10]) c1.commit() c2.commit() print("✓ g1c test passed") return True """ OTV: Observed Transaction Vanishes """ def otv(c1, c2, c3): cursor_1 = c1.cursor() cursor_2 = c2.cursor() cursor_3 = c3.cursor() update(cursor_1, 1, 11) update(cursor_1, 2, 19) conflicting_update(cursor_2, 1, 12) c1.commit() select(cursor_3, 1, [11]) select(cursor_3, 2, [19]) # cursor_2 update not required due to its early-abort above select(cursor_3, 1, [11]) select(cursor_3, 2, [19]) c3.commit() print("✓ otv test passed") return True """ Predicate-Many-Preceders """ def pmp(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() select(cursor_1, 3, []) update(cursor_2, 3, 30) c2.commit() select(cursor_1, 3, []) c1.commit() print("✓ pmp test passed") return True """ Predicate-Many-Preceders (write) """ def pmp_write(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() cursor_1.execute("MATCH (kv: Kv {}) SET kv.value = kv.value + 10 RETURN kv") select(cursor_2, 1, [10]) select(cursor_2, 2, [20]) try: cursor_2.execute("MATCH (kv:Kv { value: 20 }) DETACH DELETE kv;") assert False except mgclient.DatabaseError as e: assert "conflicting transactions" in str(e) c1.commit() select(cursor_1, 1, [20]) select(cursor_1, 2, [30]) c1.commit() print("✓ pmp_write test passed") return True """ P4: Lost Update """ def p4(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() select(cursor_1, 1, [10]) select(cursor_2, 1, [10]) update(cursor_1, 1, 11) conflicting_update(cursor_2, 1, 11) c1.commit() print("✓ p4 test passed") return True """ G-single: Single Anti-dependency Cycles (read skew) """ def g_single(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() select(cursor_1, 1, [10]) select(cursor_2, 1, [10]) select(cursor_2, 2, [20]) update(cursor_2, 1, 12) update(cursor_2, 2, 18) c2.commit() select(cursor_1, 2, [20]) c1.commit() print("✓ g_single test passed") return True """ G-single: Single Anti-dependency Cycles (read skew) (dependencies) """ def g_single_dependencies(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() cursor_1.execute("MATCH (kv: Kv {}) WHERE kv.value % 5 = 0 RETURN kv") found = [r[0].properties["value"] for r in cursor_1.fetchall()] assert found == [10, 20] cursor_2.execute("MATCH (kv: Kv { value: 10 }) SET kv.value = 12 RETURN kv") c2.commit() cursor_1.execute("MATCH (kv: Kv {}) WHERE kv.value % 3 = 0 RETURN kv") found = [r[0].properties["value"] for r in cursor_1.fetchall()] assert found == [] c1.commit() print("✓ g_single_dependencies test passed") return True """ G-single: Single Anti-dependency Cycles (read skew) (write 1) """ def g_single_write_1(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() select(cursor_1, 1, [10]) select_all(cursor_2, [10, 20]) update(cursor_2, 1, 12) update(cursor_2, 2, 18) c2.commit() try: cursor_1.execute("MATCH (kv:Kv { value: 20 }) DETACH DELETE kv;") assert False except mgclient.DatabaseError as e: assert "conflicting transactions" in str(e) select_all(cursor_2, [12, 18]) c2.commit() print("✓ g_single_write_1 test passed") return True """ G-single: Single Anti-dependency Cycles (read skew) (write 2) """ def g_single_write_2(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() select(cursor_1, 1, [10]) select_all(cursor_2, [10, 20]) update(cursor_2, 1, 12) cursor_1.execute("MATCH (kv:Kv { value: 20 }) DETACH DELETE kv;") conflicting_update(cursor_2, 2, 18) c1.rollback() # c2.commit() print("✓ g_single_write_2 test passed (although the abort rate is pessimistically high)") return True """ G2-item: Item Anti-dependency Cycles (write skew on disjoint read) """ def g2_item(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() cursor_1.execute("MATCH (kv: Kv {}) WHERE kv.key = 1 OR kv.key = 2 RETURN kv") found = [r[0].properties["value"] for r in cursor_1.fetchall()] assert found == [10, 20] cursor_2.execute("MATCH (kv: Kv {}) WHERE kv.key = 1 OR kv.key = 2 RETURN kv") found = [r[0].properties["value"] for r in cursor_2.fetchall()] assert found == [10, 20] update(cursor_1, 1, 11) update(cursor_2, 2, 21) c1.commit() try: assert_commit_fails(c2) print("✓ g2_item test passed") return True except: print( "X g2_item test failed - database exhibits write skew -", "writes based on invalidated reads should have failed,", "causing repeatable read (PL-2.99) and serializability (PL-3)", "to fail to be achieved", ) return False """ G2: Anti-Dependency Cycles (write skew on predicate read) """ def g2(c1, c2): cursor_1 = c1.cursor() cursor_2 = c2.cursor() cursor_1.execute("MATCH (kv: Kv {}) WHERE kv.value % 3 = 0 RETURN kv") found = [r[0].properties["value"] for r in cursor_1.fetchall()] assert found == [] cursor_2.execute("MATCH (kv: Kv {}) WHERE kv.value % 3 = 0 RETURN kv") found = [r[0].properties["value"] for r in cursor_2.fetchall()] assert found == [] update(cursor_1, 3, 30) update(cursor_2, 4, 42) c1.commit() try: assert_commit_fails(c2) print("✓ g2 test passed") return True except: print( "X g2 test failed - database exhibits write skew on predicate read -", "concurrent transactions that should have caused a predicate read to", "return data in one transaction actually returned nothing in both.", "Both transactions committed, but one of them should have failed due", "to having a predicate invalidated", ) return False """ G2: Anti-Dependency Cycles (write skew on predicate read) (two edges) """ def g2_two_edges(c1, c2, c3): cursor_1 = c1.cursor() select_all(cursor_1, [10, 20]) cursor_2 = c2.cursor() cursor_2.execute("MATCH (kv: Kv { key: 2 }) SET kv.value = kv.value + 5 RETURN kv;") found = [r[0].properties["value"] for r in cursor_2.fetchall()] assert found == [25] c2.commit() cursor_3 = c3.cursor() select_all(cursor_3, [10, 25]) c3.commit() try: conflicting_update(cursor_1, 1, 0) print("✓ g2_two_edges test passed") return True except: c3.rollback() print( "X g2_two_edges test failed: database exhibits write skew on predicate read -", "a transaction's read set was invalidated in two concurrent transactions," "and it should have failed to commit if we want to be serializable", ) return False if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-s", "--host", default="127.0.0.1") parser.add_argument("-p", "--port", default=7687) args = parser.parse_args() c1 = mgclient.connect(host=args.host, port=args.port) c1.autocommit = False c2 = mgclient.connect(host=args.host, port=args.port) c2.autocommit = False c3 = mgclient.connect(host=args.host, port=args.port) c3.autocommit = False setup(c1) g0 = g0(c1, c2) setup(c1) g1a = g1a(c1, c2) setup(c1) g1b = g1b(c1, c2) setup(c1) g1c = g1c(c1, c2) setup(c1) otv = otv(c1, c2, c3) setup(c1) pmp = pmp(c1, c2) setup(c1) pmp_write = pmp_write(c1, c2) setup(c1) p4 = p4(c1, c2) setup(c1) g_single = g_single(c1, c2) setup(c1) g_single_dependencies = g_single_dependencies(c1, c2) setup(c1) g_single_write_1 = g_single_write_1(c1, c2) setup(c1) g_single_write_2 = g_single_write_2(c1, c2) setup(c1) g2_item = g2_item(c1, c2) setup(c1) g2 = g2(c1, c2) setup(c1) g2_two_edges = g2_two_edges(c1, c2, c3) g1 = all([g1a, g1b, g1c]) repeatable_read = all([g1, g2_item]) snapshot_isolation = all( [g0, g1, otv, pmp, pmp_write, p4, g_single, g_single_dependencies, g_single_write_1, g_single_write_2] ) full_serializability = all([snapshot_isolation, g2_item, g2, g2_two_edges]) print("") print("results:") print("consistent view (PL-2+):", g1 and g_single) print("snapshot isolation (PL-SI):", snapshot_isolation) print("repeatable read (PL-2.99):", repeatable_read) print("full serializability (PL-3):", full_serializability) print("") print("more information about these anomalies can be found at:") print("\thttps://github.com/ept/hermitage") print("\thttps://pmg.csail.mit.edu/papers/adya-phd.pdf")