2018-11-30 21:32:32 +08:00
|
|
|
#include "gtest/gtest.h"
|
|
|
|
|
|
|
|
#include <unordered_map>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "durability/single_node_ha/state_delta.hpp"
|
|
|
|
#include "raft/raft_interface.hpp"
|
|
|
|
#include "transactions/single_node_ha/engine.hpp"
|
|
|
|
#include "transactions/transaction.hpp"
|
|
|
|
|
|
|
|
using namespace tx;
|
|
|
|
|
|
|
|
class RaftMock final : public raft::RaftInterface {
|
|
|
|
public:
|
2019-07-16 16:50:53 +08:00
|
|
|
raft::DeltaStatus Emplace(const database::StateDelta &delta) override {
|
2018-11-30 21:32:32 +08:00
|
|
|
log_[delta.transaction_id].emplace_back(std::move(delta));
|
2019-07-16 16:50:53 +08:00
|
|
|
return {true, std::nullopt};
|
2018-11-30 21:32:32 +08:00
|
|
|
}
|
|
|
|
|
Fix how HA handles leader change during commit
Summary:
During it's leadership, one peer can receive RPC messages from other peers that his reign is over.
The problem is when this happens during a transaction commit.
This is handled in the following way.
If we're the current leader and we want to commit a transaction, we need to make sure the Raft Log is replicated before we can tell the client that the transaction is committed.
During that wait, we can only notice that the replication takes too long, and we report that with `LOG(WARNING)` messages.
If we change the Raft mode during the wait, our Raft implementation will internally commit this transaction, but won't be able to acquire the Raft lock because the `db.Reset` has been called.
This is why there is an manual lock acquire. If we pick up that the `db.Reset` has been called, we throw an `UnexpectedLeaderChangeException` exception to the client.
Another thing with long running transactions, if someone decides to kill a `memgraph_ha` instance during the commit, the transaction will have `abort` hint set. This will cause the `src/query/operator.cpp` to throw a `HintedAbortError`. We need to catch this during the shutdown, because the `memgraph_ha` isn't dead from the user perspective, and the transaction wasn't aborted because it took too long, but we can differentiate between those two.
Reviewers: mferencevic, ipaljak
Reviewed By: mferencevic, ipaljak
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D1956
2019-04-09 16:02:11 +08:00
|
|
|
bool SafeToCommit(const tx::TransactionId &) override {
|
|
|
|
return true;
|
|
|
|
}
|
2019-01-04 23:07:57 +08:00
|
|
|
|
|
|
|
bool IsLeader() override { return true; }
|
2018-12-12 22:50:17 +08:00
|
|
|
|
2019-04-08 19:00:28 +08:00
|
|
|
uint64_t TermId() override { return 1; }
|
|
|
|
|
2019-07-16 16:50:53 +08:00
|
|
|
raft::TxStatus TransactionStatus(uint64_t term_id,
|
|
|
|
uint64_t log_index) override {
|
|
|
|
return raft::TxStatus::REPLICATED;
|
|
|
|
}
|
|
|
|
|
2018-11-30 21:32:32 +08:00
|
|
|
std::vector<database::StateDelta> GetLogForTx(
|
|
|
|
const tx::TransactionId &tx_id) {
|
|
|
|
return log_[tx_id];
|
|
|
|
}
|
|
|
|
|
Fix how HA handles leader change during commit
Summary:
During it's leadership, one peer can receive RPC messages from other peers that his reign is over.
The problem is when this happens during a transaction commit.
This is handled in the following way.
If we're the current leader and we want to commit a transaction, we need to make sure the Raft Log is replicated before we can tell the client that the transaction is committed.
During that wait, we can only notice that the replication takes too long, and we report that with `LOG(WARNING)` messages.
If we change the Raft mode during the wait, our Raft implementation will internally commit this transaction, but won't be able to acquire the Raft lock because the `db.Reset` has been called.
This is why there is an manual lock acquire. If we pick up that the `db.Reset` has been called, we throw an `UnexpectedLeaderChangeException` exception to the client.
Another thing with long running transactions, if someone decides to kill a `memgraph_ha` instance during the commit, the transaction will have `abort` hint set. This will cause the `src/query/operator.cpp` to throw a `HintedAbortError`. We need to catch this during the shutdown, because the `memgraph_ha` isn't dead from the user perspective, and the transaction wasn't aborted because it took too long, but we can differentiate between those two.
Reviewers: mferencevic, ipaljak
Reviewed By: mferencevic, ipaljak
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D1956
2019-04-09 16:02:11 +08:00
|
|
|
std::mutex &WithLock() override { return lock_; }
|
|
|
|
|
2018-11-30 21:32:32 +08:00
|
|
|
private:
|
|
|
|
std::unordered_map<tx::TransactionId, std::vector<database::StateDelta>> log_;
|
Fix how HA handles leader change during commit
Summary:
During it's leadership, one peer can receive RPC messages from other peers that his reign is over.
The problem is when this happens during a transaction commit.
This is handled in the following way.
If we're the current leader and we want to commit a transaction, we need to make sure the Raft Log is replicated before we can tell the client that the transaction is committed.
During that wait, we can only notice that the replication takes too long, and we report that with `LOG(WARNING)` messages.
If we change the Raft mode during the wait, our Raft implementation will internally commit this transaction, but won't be able to acquire the Raft lock because the `db.Reset` has been called.
This is why there is an manual lock acquire. If we pick up that the `db.Reset` has been called, we throw an `UnexpectedLeaderChangeException` exception to the client.
Another thing with long running transactions, if someone decides to kill a `memgraph_ha` instance during the commit, the transaction will have `abort` hint set. This will cause the `src/query/operator.cpp` to throw a `HintedAbortError`. We need to catch this during the shutdown, because the `memgraph_ha` isn't dead from the user perspective, and the transaction wasn't aborted because it took too long, but we can differentiate between those two.
Reviewers: mferencevic, ipaljak
Reviewed By: mferencevic, ipaljak
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D1956
2019-04-09 16:02:11 +08:00
|
|
|
std::mutex lock_;
|
2018-11-30 21:32:32 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
TEST(Engine, Reset) {
|
|
|
|
RaftMock raft;
|
|
|
|
Engine engine{&raft};
|
|
|
|
|
|
|
|
auto t0 = engine.Begin();
|
|
|
|
EXPECT_EQ(t0->id_, 1);
|
|
|
|
engine.Commit(*t0);
|
|
|
|
|
|
|
|
engine.Reset();
|
|
|
|
|
|
|
|
auto t1 = engine.Begin();
|
|
|
|
EXPECT_EQ(t1->id_, 1);
|
|
|
|
engine.Commit(*t1);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Engine, TxStateDelta) {
|
|
|
|
RaftMock raft;
|
|
|
|
Engine engine{&raft};
|
|
|
|
|
|
|
|
auto t0 = engine.Begin();
|
|
|
|
tx::TransactionId tx_id = t0->id_;
|
|
|
|
engine.Commit(*t0);
|
|
|
|
|
|
|
|
auto t0_log = raft.GetLogForTx(tx_id);
|
|
|
|
EXPECT_EQ(t0_log.size(), 2);
|
|
|
|
|
|
|
|
using Type = enum database::StateDelta::Type;
|
|
|
|
EXPECT_EQ(t0_log[0].type, Type::TRANSACTION_BEGIN);
|
|
|
|
EXPECT_EQ(t0_log[0].transaction_id, tx_id);
|
|
|
|
EXPECT_EQ(t0_log[1].type, Type::TRANSACTION_COMMIT);
|
|
|
|
EXPECT_EQ(t0_log[1].transaction_id, tx_id);
|
|
|
|
}
|