mirror of
https://github.com/pingcap/tla-plus.git
synced 2024-12-27 13:00:31 +08:00
460 lines
16 KiB
Plaintext
460 lines
16 KiB
Plaintext
-------------------------- MODULE OptimizedCommitTS -------------------------
|
|
|
|
EXTENDS Integers, FiniteSets, Sequences, TLC
|
|
|
|
\* The set of transaction keys.
|
|
CONSTANTS KEY
|
|
ASSUME KEY # {} \* Keys cannot be empty.
|
|
|
|
\* The set of clients to execute a transaction.
|
|
CONSTANTS CLIENT
|
|
|
|
\* Primary keys of all clients (transactions).
|
|
CONSTANTS CLIENT_PRIMARY_KEY
|
|
ASSUME CLIENT_PRIMARY_KEY \in [CLIENT -> KEY]
|
|
|
|
\* $next_ts$ is the timestamp for transaction. It is increased monotonically,
|
|
\* so every transaction must have a unique start ts.
|
|
VARIABLES next_ts
|
|
|
|
\* $client_state[c]$ is the state of client.
|
|
VARIABLES client_state
|
|
|
|
\* $client_ts[c]$ is a record of [start_ts: ts, commit_ts: ts].
|
|
\* The $commit_ts$ is set to $start_ts$ at first, then updated at every prewrite,
|
|
\* and finally increased by 1.
|
|
VARIABLES client_ts
|
|
|
|
\* $client_key[c]$ is a record of [primary: key, secondary: {key},
|
|
\* pending: {key}]. Hereby, "pending" denotes the keys that are pending for
|
|
\* prewrite.
|
|
VARIABLES client_key
|
|
|
|
\* $key_data[k]$ is the set of multi-version data of the key.
|
|
\* Since we don't care about the concrete value of data, a record of
|
|
\* [ts: start_ts] is sufficient to represent one data version.
|
|
VARIABLES key_data
|
|
|
|
\* $key_lock[k]$ is the set of lock. A lock is of a record of
|
|
\* [ts: start_ts, primary: lock]. If $primary$ equals to $k$, it is a primary
|
|
\* lock, otherwise secondary lock.
|
|
VARIABLES key_lock
|
|
|
|
\* $key_write[k]$ is a sequence of committed version of the key.
|
|
\* A committed version of the key is a record of [ts: ts, type: type,
|
|
\* start_ts: start_ts]. $type$ can be "write" or "rollback" depending on record
|
|
\* type. $start_ts$ field only exists if type is "write". For "write" record,
|
|
\* $ts$ denotes commit_ts; for "rollback" record, $ts$ denotes start_ts.
|
|
VARIABLES key_write
|
|
|
|
\* $key_last_read_ts[k]$ denotes the last read timestamp for key $k$, this
|
|
\* should be monotonic. It can be used to verfiy snapshot isolation invariant
|
|
\* and calculate commit ts. In a real-world implementation, each region will have
|
|
\* one $region_last_read_ts$.
|
|
VARIABLES key_last_read_ts
|
|
|
|
\* $key_si[k]$ denotes if the snapshot isolation invariant is preserved for
|
|
\* key $k$ so far. This will not appear in a real-world implementation.
|
|
VARIABLES key_si
|
|
|
|
client_vars == <<client_state, client_ts, client_key>>
|
|
key_vars == <<key_data, key_lock, key_write, key_last_read_ts, key_si>>
|
|
vars == <<next_ts, client_vars, key_vars>>
|
|
|
|
--------------------------------------------------------------------------------
|
|
Range(m) ==
|
|
{m[i] : i \in DOMAIN m}
|
|
|
|
Max(a, b) == IF a < b THEN b ELSE a
|
|
|
|
--------------------------------------------------------------------------------
|
|
\* Checks whether there is a lock of key $k$, whose $ts$ is less or equal than
|
|
\* $ts$.
|
|
hasLockLE(k, ts) ==
|
|
\E l \in key_lock[k] : l.ts <= ts
|
|
|
|
\* Checks whether there is a lock of key $k$ with $ts$.
|
|
hasLockEQ(k, ts) ==
|
|
\E l \in key_lock[k] : l.ts = ts
|
|
|
|
\* Returns TRUE if a lock can be cleanup up.
|
|
\* A lock can be cleaned up iff its ts is less than or equal to $ts$.
|
|
isStaleLock(l, ts) ==
|
|
l.ts <= ts
|
|
|
|
\* Returns TRUE if we have a stale lock for key $k$.
|
|
hasStaleLock(k, ts) ==
|
|
\E l \in key_lock[k] : isStaleLock(l, ts)
|
|
|
|
\* Returns the writes with start_ts equals to $ts$.
|
|
findWriteWithStartTS(k, ts) ==
|
|
{w \in Range(key_write[k]) : (w.type = "write" /\ w.start_ts = ts)}
|
|
|
|
\* Returns the writes with commit_ts equals to $ts$.
|
|
findWriteWithCommitTS(k, ts) ==
|
|
{w \in Range(key_write[k]) : (w.type = "write" /\ w.ts = ts)}
|
|
|
|
\* Returns TRUE if there is a rollback for key $k$ at timestamp $ts$.
|
|
hasRollback(k, ts) ==
|
|
{r \in Range(key_write[k]) : (r.type = "rollback" /\ r.ts = ts)} # {}
|
|
|
|
\* Updates $key_si$ for key $k$. If a new version of key $k$ is committed with
|
|
\* $commit_ts$ < last read timestamp, consider the snapshot isolation invariant
|
|
\* for key $k$ has been violated.
|
|
checkSnapshotIsolation(k, commit_ts) ==
|
|
IF key_last_read_ts[k] >= commit_ts
|
|
THEN
|
|
key_si' = [key_si EXCEPT ![k] = FALSE]
|
|
ELSE
|
|
UNCHANGED <<key_si>>
|
|
|
|
\* Cleans up a stale lock and its data.
|
|
\* If the lock is a secondary lock, and the assoicated primary lock is cleaned
|
|
\* up, we can clean up the lock and do,
|
|
\* 1. If the primary key is committed, we must also commit the secondary key.
|
|
\* 2. Otherwise, clean up the stale data too.
|
|
cleanupStaleLock(k, ts) ==
|
|
LET
|
|
\* Erases the lock by removing data and lock, and write a rollback record.
|
|
eraseLock(key, l) ==
|
|
/\ key_data' = [key_data EXCEPT ![key] = @ \ {[ts |-> l.ts]}]
|
|
/\ key_lock' = [key_lock EXCEPT ![key] = @ \ {l}]
|
|
/\ key_write' = [key_write EXCEPT ![key] =
|
|
Append(@, [ts |-> l.ts, type |-> "rollback"])]
|
|
IN
|
|
\E l \in key_lock[k] :
|
|
/\ isStaleLock(l, ts)
|
|
/\ UNCHANGED <<key_last_read_ts>>
|
|
/\ \/ /\ l.primary = k \* this is a primary key, always rollback
|
|
\* because it is not committed.
|
|
/\ eraseLock(k, l)
|
|
/\ UNCHANGED <<key_si>>
|
|
\/ /\ l.primary # k \* this is a secondary key.
|
|
/\ LET
|
|
ws == findWriteWithStartTS(l.primary, l.ts)
|
|
IN
|
|
IF ws = {}
|
|
THEN
|
|
\* the primary key is not committed, clean up the data.
|
|
\* Note we should always clean up the corresponding primary
|
|
\* lock first, then this secondary lock.
|
|
IF hasRollback(l.primary, l.ts) = FALSE
|
|
THEN
|
|
/\ eraseLock(l.primary, l)
|
|
/\ UNCHANGED <<key_si>>
|
|
ELSE
|
|
/\ eraseLock(k, l)
|
|
/\ UNCHANGED <<key_si>>
|
|
ELSE
|
|
\* the primary key is committed, commit the secondary key.
|
|
\E w \in ws :
|
|
/\ key_lock' = [key_lock EXCEPT ![k] = @ \ {l}]
|
|
/\ key_write' = [key_write EXCEPT ![k] = Append(@, w)]
|
|
/\ checkSnapshotIsolation(k, w.ts)
|
|
/\ UNCHANGED <<key_data>>
|
|
|
|
\* Cleans up a stale lock when the client encounters one.
|
|
cleanup(c) ==
|
|
LET
|
|
start_ts == client_ts[c].start_ts
|
|
primary == client_key[c].primary
|
|
secondary == client_key[c].secondary
|
|
IN
|
|
\/ /\ hasStaleLock(primary, start_ts)
|
|
/\ cleanupStaleLock(primary, start_ts)
|
|
\/ \E k \in secondary :
|
|
/\ hasStaleLock(k, start_ts)
|
|
/\ cleanupStaleLock(k, start_ts)
|
|
|
|
\* Reads one key if there is no stale lock, and updates last read timestamp.
|
|
readKey(c) ==
|
|
LET
|
|
start_ts == client_ts[c].start_ts
|
|
primary == client_key[c].primary
|
|
secondary == client_key[c].secondary
|
|
IN
|
|
\E k \in {primary} \union secondary :
|
|
/\ ~hasStaleLock(k, start_ts)
|
|
/\ key_last_read_ts[k] < start_ts
|
|
/\ key_last_read_ts' = [key_last_read_ts EXCEPT ![k] = start_ts]
|
|
/\ UNCHANGED <<key_data, key_lock, key_write, key_si>>
|
|
|
|
\* Returns TRUE if there is no lock for key $k$, and no any newer writes than
|
|
\* $ts$.
|
|
canLockKey(k, ts) ==
|
|
LET
|
|
writes == {w \in DOMAIN key_write[k] : key_write[k][w].ts >= ts}
|
|
IN
|
|
/\ key_lock[k] = {} \* no any lock for the key.
|
|
/\ writes = {} \* no any newer writes or rollbacks.
|
|
|
|
\* Locks the key and places the data.
|
|
lockKey(c, k, start_ts, primary) ==
|
|
/\ key_lock' = [key_lock EXCEPT ![k] = @ \union {[ts |-> start_ts, primary |-> primary]}]
|
|
/\ key_data' = [key_data EXCEPT ![k] = @ \union {[ts |-> start_ts]}]
|
|
/\ client_ts' = [client_ts EXCEPT ![c].commit_ts = Max(client_ts[c].commit_ts, key_last_read_ts[k])]
|
|
/\ UNCHANGED <<key_write, key_last_read_ts, key_si>>
|
|
|
|
\* Tries to lock primary key first, then the secondary key.
|
|
lock(c) ==
|
|
LET
|
|
start_ts == client_ts[c].start_ts
|
|
primary == client_key[c].primary
|
|
pending == client_key[c].pending
|
|
IN
|
|
\* Different from normal percolator protocol, which issues a prewrite on
|
|
\* primary lock first, then secondary locks, hereby we issues prewrites
|
|
\* on both primary and secondary locks concurrently. Rollback mechanism
|
|
\* ensures its correctness.
|
|
\E k \in pending :
|
|
/\ canLockKey(k, start_ts)
|
|
/\ lockKey(c, k, start_ts, primary)
|
|
/\ client_key' = [client_key EXCEPT ![c].pending = @ \ {k}]
|
|
/\ UNCHANGED <<client_state>>
|
|
|
|
\* Commits the primary key.
|
|
commitPrimary(c) ==
|
|
LET
|
|
start_ts == client_ts[c].start_ts
|
|
commit_ts == client_ts[c].commit_ts
|
|
primary == client_key[c].primary
|
|
IN
|
|
/\ hasLockEQ(primary, start_ts)
|
|
/\ key_write' = [key_write EXCEPT ![primary] =
|
|
Append(@, [ts |-> commit_ts,
|
|
type |-> "write",
|
|
start_ts |-> start_ts])]
|
|
/\ key_lock' = [key_lock EXCEPT ![primary] = @ \ {[ts |-> start_ts,
|
|
primary |-> primary]}]
|
|
/\ checkSnapshotIsolation(primary, commit_ts)
|
|
/\ UNCHANGED <<key_data, key_last_read_ts>>
|
|
|
|
\* Assigns $start_ts$ to the transaction.
|
|
Start(c) ==
|
|
/\ client_state[c] = "init"
|
|
/\ next_ts' = next_ts + 1
|
|
/\ client_state' = [client_state EXCEPT ![c] = "working"]
|
|
/\ client_ts' = [client_ts EXCEPT ![c] = [start_ts |-> next_ts', commit_ts |-> next_ts']]
|
|
/\ UNCHANGED <<key_vars, client_key>>
|
|
|
|
\* Does either one thing from these following threes.
|
|
\* 1. Advances to prewrite phase,
|
|
\* 2. Tries to clean up one stale lock,
|
|
\* 3. Reads one key if no stale lock.
|
|
Get(c) ==
|
|
/\ client_state[c] = "working"
|
|
/\ \/ /\ client_state' = [client_state EXCEPT ![c] = "prewriting"]
|
|
/\ UNCHANGED <<next_ts, key_vars, client_ts, client_key>>
|
|
\/ /\ cleanup(c)
|
|
/\ UNCHANGED <<next_ts, client_vars>>
|
|
\/ /\ readKey(c)
|
|
/\ UNCHANGED <<next_ts, client_vars>>
|
|
|
|
\* Enters commit phase if all locks are granted, otherwise tries to lock the
|
|
\* primary lock and secondary locks.
|
|
Prewrite(c) ==
|
|
/\ client_state[c] = "prewriting"
|
|
/\ IF client_key[c].pending = {}
|
|
THEN \* all keys have been pre-written
|
|
/\ client_state' = [client_state EXCEPT ![c] = "committing"]
|
|
/\ client_ts' = [client_ts EXCEPT ![c].commit_ts = client_ts[c].commit_ts + 1]
|
|
/\ UNCHANGED <<key_vars, client_key, next_ts>>
|
|
ELSE
|
|
/\ lock(c)
|
|
/\ UNCHANGED <<next_ts>>
|
|
|
|
\* If we commit the primary key successfully, we can think the transaction is
|
|
\* committed.
|
|
Commit(c) ==
|
|
/\ client_state[c] = "committing"
|
|
/\ commitPrimary(c)
|
|
/\ client_state' = [client_state EXCEPT ![c] = "committed"]
|
|
/\ UNCHANGED <<next_ts, client_ts, client_key>>
|
|
|
|
\* We can choose to abort at any time if not committed. Hereby, the aborted
|
|
\* state unifies client crash, client abort and transaction failure. The client
|
|
\* simply halts when aborted, and leaves cleanup to future transaction.
|
|
Abort(c) ==
|
|
/\ client_state[c] # "committed"
|
|
/\ client_state' = [client_state EXCEPT ![c] = "aborted"]
|
|
/\ UNCHANGED <<next_ts, client_ts, client_key, key_vars>>
|
|
|
|
ClientOp(c) ==
|
|
\/ Start(c)
|
|
\/ Get(c)
|
|
\/ Prewrite(c)
|
|
\/ Commit(c)
|
|
\/ Abort(c)
|
|
|
|
Next == \E c \in CLIENT : ClientOp(c)
|
|
|
|
Init ==
|
|
LET
|
|
\* Selects a primary key and use the rest for the secondary keys.
|
|
chooseKey(ks, c) ==
|
|
LET
|
|
primary == CLIENT_PRIMARY_KEY[c]
|
|
IN
|
|
[primary |-> primary,
|
|
secondary |-> ks \ {primary},
|
|
pending |-> ks]
|
|
IN
|
|
/\ next_ts = 0
|
|
/\ client_state = [c \in CLIENT |-> "init"]
|
|
/\ client_ts = [c \in CLIENT |-> [start_ts |-> 0, commit_ts |-> 0]]
|
|
/\ client_key = [c \in CLIENT |-> chooseKey(KEY, c)]
|
|
/\ key_lock = [k \in KEY |-> {}]
|
|
/\ key_data = [k \in KEY |-> {}]
|
|
/\ key_write = [k \in KEY |-> <<>>]
|
|
/\ key_last_read_ts = [k \in KEY |-> 0]
|
|
/\ key_si = [k \in KEY |-> TRUE]
|
|
|
|
PercolatorSpec == Init /\ [][Next]_vars
|
|
|
|
--------------------------------------------------------------------------------
|
|
NextTsTypeInv ==
|
|
next_ts \in Nat
|
|
|
|
ClientStateTypeInv ==
|
|
client_state \in [CLIENT -> {"init", "working", "prewriting",
|
|
"committing", "committed", "aborted"}]
|
|
|
|
ClientTsTypeInv ==
|
|
client_ts \in [CLIENT -> [start_ts : Nat, commit_ts : Nat]]
|
|
|
|
ClientKeyTypeInv ==
|
|
client_key \in [CLIENT -> [primary : KEY,
|
|
secondary : SUBSET KEY,
|
|
pending : SUBSET KEY]]
|
|
|
|
KeyDataTypeInv ==
|
|
key_data \in [KEY -> SUBSET [ts : Nat]]
|
|
|
|
KeyLockTypeInv ==
|
|
key_lock \in [KEY -> SUBSET [ts : Nat, primary : KEY]]
|
|
|
|
KeyWriteTypeInv ==
|
|
key_write \in [KEY -> Seq([ts : Nat, type : {"write"}, start_ts : Nat] \union
|
|
[ts : Nat, type : {"rollback"}])
|
|
]
|
|
|
|
KeyLastReadTsTypeInv ==
|
|
key_last_read_ts \in [KEY -> Nat]
|
|
|
|
KeySiTypeInv ==
|
|
key_si \in [KEY -> BOOLEAN]
|
|
|
|
TypeInvariant ==
|
|
/\ NextTsTypeInv
|
|
/\ ClientStateTypeInv
|
|
/\ ClientTsTypeInv
|
|
/\ ClientKeyTypeInv
|
|
/\ KeyDataTypeInv
|
|
/\ KeyLockTypeInv
|
|
/\ KeyWriteTypeInv
|
|
/\ KeyLastReadTsTypeInv
|
|
/\ KeySiTypeInv
|
|
|
|
--------------------------------------------------------------------------------
|
|
\* The committed write timestamp of one key must be in order, and no two writes
|
|
\* can overlap. For each write, the commit_ts should be strictly greater than
|
|
\* start_ts.
|
|
WriteConsistency ==
|
|
/\ \A k \in KEY :
|
|
\A i, j \in 1..Len(key_write[k]) :
|
|
(/\ i < j
|
|
/\ key_write[k][i].type = "write"
|
|
/\ key_write[k][j].type = "write"
|
|
) =>
|
|
key_write[k][i].ts < key_write[k][j].start_ts
|
|
/\ \A k \in KEY :
|
|
\A w \in Range(key_write[k]) :
|
|
w.type = "write" => w.start_ts < w.ts
|
|
|
|
LockConsistency ==
|
|
\* There should be at most one lock for each key.
|
|
/\ \A k \in KEY :
|
|
Cardinality(key_lock[k]) <= 1
|
|
\* When the client finishes prewriting and is ready for commit, if the
|
|
\* primary lock exists, all secondary locks should exist.
|
|
/\ \A c \in CLIENT :
|
|
(/\ client_state[c] = "committing"
|
|
/\ hasLockEQ(client_key[c].primary, client_ts[c].start_ts)
|
|
) =>
|
|
\A k \in client_key[c].secondary :
|
|
hasLockEQ(k, client_ts[c].start_ts)
|
|
|
|
CommittedConsistency ==
|
|
\A c \in CLIENT :
|
|
LET
|
|
start_ts == client_ts[c].start_ts
|
|
commit_ts == client_ts[c].commit_ts
|
|
primary == client_key[c].primary
|
|
secondary == client_key[c].secondary
|
|
w == [ts |-> commit_ts, type |-> "write", start_ts |-> start_ts]
|
|
IN
|
|
client_state[c] = "committed" =>
|
|
\* The primary key lock must be cleaned up, and no any older lock.
|
|
/\ ~hasLockLE(primary, start_ts)
|
|
/\ findWriteWithCommitTS(primary, commit_ts) = {w}
|
|
/\ [ts |-> start_ts] \in key_data[primary]
|
|
/\ \A k \in secondary :
|
|
\* The secondary key lock can be empty or not.
|
|
/\ \/ /\ ~hasLockEQ(k, start_ts)
|
|
/\ findWriteWithCommitTS(k, commit_ts) = {w}
|
|
/\ ~hasLockLE(k, start_ts - 1)
|
|
\/ /\ hasLockEQ(k, start_ts)
|
|
/\ findWriteWithCommitTS(k, commit_ts) = {}
|
|
/\ (Len(key_write[k]) > 0 =>
|
|
\* Lock has not been cleaned up, so the committed
|
|
\* timestamp of last write must be less than lock's
|
|
\* start_ts.
|
|
key_write[k][Len(key_write[k])].ts < start_ts)
|
|
/\ [ts |-> start_ts] \in key_data[k]
|
|
|
|
\* If one transaction is aborted, there should be no committed primary key.
|
|
AbortedConsistency ==
|
|
\A c \in CLIENT :
|
|
(/\ client_state[c] = "aborted"
|
|
/\ client_ts[c].commit_ts # 0
|
|
) =>
|
|
findWriteWithStartTS(client_key[c].primary, client_ts[c].start_ts) = {}
|
|
|
|
\* For each transaction, we cannot have both committed and rolled back keys.
|
|
RollbackConsistency ==
|
|
\A c \in CLIENT :
|
|
LET
|
|
start_ts == client_ts[c].start_ts
|
|
hasWriteKey == \E k \in KEY : findWriteWithStartTS(k, start_ts) # {}
|
|
hasRollbackKey == \E k \in KEY : hasRollback(k, start_ts)
|
|
IN
|
|
start_ts > 0 => ~ (hasWriteKey /\ hasRollbackKey)
|
|
|
|
\* For each key, each write or rollback record in write column should have a
|
|
\* unique start_ts.
|
|
UniqueWrite ==
|
|
LET
|
|
getStartTs(w) ==
|
|
IF w.type = "write"
|
|
THEN w.start_ts
|
|
ELSE w.ts
|
|
IN
|
|
\A k \in KEY :
|
|
Cardinality({getStartTs(w) : w \in Range(key_write[k])}) = Len(key_write[k])
|
|
|
|
\* Snapshot isolation invariant should be preserved.
|
|
SnapshotIsolation ==
|
|
\A k \in KEY :
|
|
key_si[k] = TRUE
|
|
|
|
--------------------------------------------------------------------------------
|
|
THEOREM Safety ==
|
|
PercolatorSpec => [](/\ TypeInvariant
|
|
/\ WriteConsistency
|
|
/\ LockConsistency
|
|
/\ CommittedConsistency
|
|
/\ AbortedConsistency
|
|
/\ SnapshotIsolation)
|
|
|
|
================================================================================
|