talent-plan-tinykv/scheduler/server/member/leader.go
Connor 5e089a2cd1 init course framework
Signed-off-by: Connor <zbk602423539@gmail.com>
Co-authored-by: Nick Cameron <nrc@ncameron.org>
Co-authored-by: linning <linningde25@gmail.com>
Co-authored-by: YangKeao <keao.yang@yahoo.com>
Co-authored-by: andylokandy <andylokandy@hotmail.com>
Co-authored-by: Iosmanthus Teng <myosmanthustree@gmail.com>
2020-04-30 15:25:07 +08:00

401 lines
12 KiB
Go

// Copyright 2016 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package member
import (
"context"
"fmt"
"math/rand"
"path"
"strconv"
"strings"
"sync/atomic"
"time"
"github.com/pingcap-incubator/tinykv/proto/pkg/schedulerpb"
"github.com/pingcap-incubator/tinykv/scheduler/pkg/etcdutil"
"github.com/pingcap-incubator/tinykv/scheduler/server/config"
"github.com/pingcap-incubator/tinykv/scheduler/server/kv"
"github.com/pingcap/log"
"github.com/pkg/errors"
"go.etcd.io/etcd/clientv3"
"go.etcd.io/etcd/embed"
"go.etcd.io/etcd/mvcc/mvccpb"
"go.uber.org/zap"
)
const (
// The timeout to wait transfer etcd leader to complete.
moveLeaderTimeout = 5 * time.Second
requestTimeout = etcdutil.DefaultRequestTimeout
slowRequestTime = etcdutil.DefaultSlowRequestTime
)
// Member is used for the election related logic.
type Member struct {
leader atomic.Value
// Etcd and cluster information.
etcd *embed.Etcd
client *clientv3.Client
id uint64 // etcd server id.
member *schedulerpb.Member // current PD's info.
rootPath string
// memberValue is the serialized string of `member`. It will be save in
// etcd leader key when the PD node is successfully elected as the leader
// of the cluster. Every write will use it to check leadership.
memberValue string
}
// NewMember create a new Member.
func NewMember(etcd *embed.Etcd, client *clientv3.Client, id uint64) *Member {
return &Member{
etcd: etcd,
client: client,
id: id,
}
}
// ID returns the unique etcd ID for this server in etcd cluster.
func (m *Member) ID() uint64 {
return m.id
}
// MemberValue returns the member value.
func (m *Member) MemberValue() string {
return m.memberValue
}
// Member returns the member.
func (m *Member) Member() *schedulerpb.Member {
return m.member
}
// Etcd returns etcd related information.
func (m *Member) Etcd() *embed.Etcd {
return m.etcd
}
// IsLeader returns whether the server is leader or not.
func (m *Member) IsLeader() bool {
// If server is not started. Both leaderID and ID could be 0.
return m.GetLeaderID() == m.ID()
}
// GetLeaderID returns current leader's member ID.
func (m *Member) GetLeaderID() uint64 {
return m.GetLeader().GetMemberId()
}
// GetLeader returns current leader of PD cluster.
func (m *Member) GetLeader() *schedulerpb.Member {
leader := m.leader.Load()
if leader == nil {
return nil
}
member := leader.(*schedulerpb.Member)
if member.GetMemberId() == 0 {
return nil
}
return member
}
// EnableLeader sets the member to leader.
func (m *Member) EnableLeader() {
m.leader.Store(m.member)
}
// DisableLeader reset the leader value.
func (m *Member) DisableLeader() {
m.leader.Store(&schedulerpb.Member{})
}
// GetLeaderPath returns the path of the leader.
func (m *Member) GetLeaderPath() string {
return path.Join(m.rootPath, "leader")
}
// CheckLeader checks returns true if it is needed to check later.
func (m *Member) CheckLeader(name string) (*schedulerpb.Member, int64, bool) {
if m.GetEtcdLeader() == 0 {
log.Error("no etcd leader, check leader later")
time.Sleep(200 * time.Millisecond)
return nil, 0, true
}
leader, rev, err := getLeader(m.client, m.GetLeaderPath())
if err != nil {
log.Error("get leader meet error", zap.Error(err))
time.Sleep(200 * time.Millisecond)
return nil, 0, true
}
if leader != nil {
if m.isSameLeader(leader) {
// oh, we are already leader, we may meet something wrong
// in previous CampaignLeader. we can delete and campaign again.
log.Warn("the leader has not changed, delete and campaign again", zap.Stringer("old-leader", leader))
if err = m.deleteLeaderKey(); err != nil {
log.Error("delete leader key meet error", zap.Error(err))
time.Sleep(200 * time.Millisecond)
return nil, 0, true
}
}
}
return leader, rev, false
}
// CheckPriority if the leader will be moved according to the priority.
func (m *Member) CheckPriority(ctx context.Context) {
etcdLeader := m.GetEtcdLeader()
if etcdLeader == m.ID() || etcdLeader == 0 {
return
}
myPriority, err := m.GetMemberLeaderPriority(m.ID())
if err != nil {
log.Error("failed to load leader priority", zap.Error(err))
return
}
leaderPriority, err := m.GetMemberLeaderPriority(etcdLeader)
if err != nil {
log.Error("failed to load etcd leader priority", zap.Error(err))
return
}
if myPriority > leaderPriority {
err := m.MoveEtcdLeader(ctx, etcdLeader, m.ID())
if err != nil {
log.Error("failed to transfer etcd leader", zap.Error(err))
} else {
log.Info("transfer etcd leader",
zap.Uint64("from", etcdLeader),
zap.Uint64("to", m.ID()))
}
}
}
// MoveEtcdLeader tries to transfer etcd leader.
func (m *Member) MoveEtcdLeader(ctx context.Context, old, new uint64) error {
moveCtx, cancel := context.WithTimeout(ctx, moveLeaderTimeout)
defer cancel()
return errors.WithStack(m.etcd.Server.MoveLeader(moveCtx, old, new))
}
// getLeader gets server leader from etcd.
func getLeader(c *clientv3.Client, leaderPath string) (*schedulerpb.Member, int64, error) {
leader := &schedulerpb.Member{}
ok, rev, err := etcdutil.GetProtoMsgWithModRev(c, leaderPath, leader)
if err != nil {
return nil, 0, err
}
if !ok {
return nil, 0, nil
}
return leader, rev, nil
}
// GetEtcdLeader returns the etcd leader ID.
func (m *Member) GetEtcdLeader() uint64 {
return m.etcd.Server.Lead()
}
func (m *Member) isSameLeader(leader *schedulerpb.Member) bool {
return leader.GetMemberId() == m.ID()
}
// MemberInfo initializes the member info.
func (m *Member) MemberInfo(cfg *config.Config, name string, rootPath string) {
leader := &schedulerpb.Member{
Name: name,
MemberId: m.ID(),
ClientUrls: strings.Split(cfg.AdvertiseClientUrls, ","),
PeerUrls: strings.Split(cfg.AdvertisePeerUrls, ","),
}
data, err := leader.Marshal()
if err != nil {
// can't fail, so panic here.
log.Fatal("marshal leader meet error", zap.Stringer("leader", leader), zap.Error(err))
}
m.member = leader
m.memberValue = string(data)
m.rootPath = rootPath
}
// CampaignLeader is used to campaign the leader.
func (m *Member) CampaignLeader(lease *LeaderLease, leaseTimeout int64) error {
err := lease.Grant(leaseTimeout)
if err != nil {
return err
}
leaderKey := m.GetLeaderPath()
// The leader key must not exist, so the CreateRevision is 0.
resp, err := kv.NewSlowLogTxn(m.client).
If(clientv3.Compare(clientv3.CreateRevision(leaderKey), "=", 0)).
Then(clientv3.OpPut(leaderKey, m.memberValue, clientv3.WithLease(lease.ID))).
Commit()
if err != nil {
return errors.WithStack(err)
}
if !resp.Succeeded {
return errors.New("failed to campaign leader, other server may campaign ok")
}
return nil
}
// ResignLeader resigns current PD's leadership. If nextLeader is empty, all
// other pd-servers can campaign.
func (m *Member) ResignLeader(ctx context.Context, from string, nextLeader string) error {
log.Info("try to resign leader to next leader", zap.String("from", from), zap.String("to", nextLeader))
// Determine next leaders.
var leaderIDs []uint64
res, err := etcdutil.ListEtcdMembers(m.client)
if err != nil {
return err
}
for _, member := range res.Members {
if (nextLeader == "" && member.ID != m.id) || (nextLeader != "" && member.Name == nextLeader) {
leaderIDs = append(leaderIDs, member.GetID())
}
}
if len(leaderIDs) == 0 {
return errors.New("no valid pd to transfer leader")
}
nextLeaderID := leaderIDs[rand.Intn(len(leaderIDs))]
return m.MoveEtcdLeader(ctx, m.ID(), nextLeaderID)
}
// LeaderTxn returns txn() with a leader comparison to guarantee that
// the transaction can be executed only if the server is leader.
func (m *Member) LeaderTxn(cs ...clientv3.Cmp) clientv3.Txn {
txn := kv.NewSlowLogTxn(m.client)
return txn.If(append(cs, m.leaderCmp())...)
}
func (m *Member) getMemberLeaderPriorityPath(id uint64) string {
return path.Join(m.rootPath, fmt.Sprintf("member/%d/leader_priority", id))
}
// SetMemberLeaderPriority saves a member's priority to be elected as the etcd leader.
func (m *Member) SetMemberLeaderPriority(id uint64, priority int) error {
key := m.getMemberLeaderPriorityPath(id)
res, err := m.LeaderTxn().Then(clientv3.OpPut(key, strconv.Itoa(priority))).Commit()
if err != nil {
return errors.WithStack(err)
}
if !res.Succeeded {
return errors.New("save leader priority failed, maybe not leader")
}
return nil
}
// DeleteMemberLeaderPriority removes a member's priority config.
func (m *Member) DeleteMemberLeaderPriority(id uint64) error {
key := m.getMemberLeaderPriorityPath(id)
res, err := m.LeaderTxn().Then(clientv3.OpDelete(key)).Commit()
if err != nil {
return errors.WithStack(err)
}
if !res.Succeeded {
return errors.New("delete leader priority failed, maybe not leader")
}
return nil
}
// GetMemberLeaderPriority loads a member's priority to be elected as the etcd leader.
func (m *Member) GetMemberLeaderPriority(id uint64) (int, error) {
key := m.getMemberLeaderPriorityPath(id)
res, err := etcdutil.EtcdKVGet(m.client, key)
if err != nil {
return 0, err
}
if len(res.Kvs) == 0 {
return 0, nil
}
priority, err := strconv.ParseInt(string(res.Kvs[0].Value), 10, 32)
if err != nil {
return 0, errors.WithStack(err)
}
return int(priority), nil
}
func (m *Member) deleteLeaderKey() error {
// delete leader itself and let others start a new election again.
leaderKey := m.GetLeaderPath()
resp, err := m.LeaderTxn().Then(clientv3.OpDelete(leaderKey)).Commit()
if err != nil {
return errors.WithStack(err)
}
if !resp.Succeeded {
return errors.New("resign leader failed, we are not leader already")
}
return nil
}
func (m *Member) leaderCmp() clientv3.Cmp {
return clientv3.Compare(clientv3.Value(m.GetLeaderPath()), "=", m.memberValue)
}
// WatchLeader is used to watch the changes of the leader.
func (m *Member) WatchLeader(serverCtx context.Context, leader *schedulerpb.Member, revision int64) {
m.leader.Store(leader)
defer m.leader.Store(&schedulerpb.Member{})
watcher := clientv3.NewWatcher(m.client)
defer watcher.Close()
ctx, cancel := context.WithCancel(serverCtx)
defer cancel()
// The revision is the revision of last modification on this key.
// If the revision is compacted, will meet required revision has been compacted error.
// In this case, use the compact revision to re-watch the key.
for {
rch := watcher.Watch(ctx, m.GetLeaderPath(), clientv3.WithRev(revision))
for wresp := range rch {
// meet compacted error, use the compact revision.
if wresp.CompactRevision != 0 {
log.Warn("required revision has been compacted, use the compact revision",
zap.Int64("required-revision", revision),
zap.Int64("compact-revision", wresp.CompactRevision))
revision = wresp.CompactRevision
break
}
if wresp.Canceled {
log.Error("leader watcher is canceled with", zap.Int64("revision", revision), zap.Error(wresp.Err()))
return
}
for _, ev := range wresp.Events {
if ev.Type == mvccpb.DELETE {
log.Info("leader is deleted")
return
}
}
}
select {
case <-ctx.Done():
// server closed, return
return
default:
}
}
}
// Close gracefully shuts down all servers/listeners.
func (m *Member) Close() {
m.Etcd().Close()
}