talent-plan-tinykv/scheduler/server/schedulers/balance_test.go

500 lines
16 KiB
Go
Raw Normal View History

// Copyright 2017 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package schedulers
import (
"context"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/scheduler/pkg/mock/mockcluster"
"github.com/pingcap-incubator/tinykv/scheduler/pkg/mock/mockoption"
"github.com/pingcap-incubator/tinykv/scheduler/pkg/testutil"
"github.com/pingcap-incubator/tinykv/scheduler/server/core"
"github.com/pingcap-incubator/tinykv/scheduler/server/kv"
"github.com/pingcap-incubator/tinykv/scheduler/server/schedule"
"github.com/pingcap-incubator/tinykv/scheduler/server/schedule/checker"
"github.com/pingcap-incubator/tinykv/scheduler/server/schedule/operator"
. "github.com/pingcap/check"
)
func newTestReplication(mso *mockoption.ScheduleOptions, maxReplicas int) {
mso.MaxReplicas = maxReplicas
}
var _ = Suite(&testBalanceRegionSchedulerSuite{})
type testBalanceRegionSchedulerSuite struct {
ctx context.Context
cancel context.CancelFunc
}
func (s *testBalanceRegionSchedulerSuite) SetUpSuite(c *C) {
s.ctx, s.cancel = context.WithCancel(context.Background())
}
func (s *testBalanceRegionSchedulerSuite) TearDownSuite(c *C) {
s.cancel()
}
func (s *testBalanceRegionSchedulerSuite) TestReplicas13C(c *C) {
opt := mockoption.NewScheduleOptions()
tc := mockcluster.NewCluster(opt)
oc := schedule.NewOperatorController(s.ctx, nil, nil)
sb, err := schedule.CreateScheduler("balance-region", oc, core.NewStorage(kv.NewMemoryKV()), nil)
c.Assert(err, IsNil)
opt.SetMaxReplicas(1)
// Add stores 1,2,3,4.
tc.AddRegionStore(1, 6)
tc.AddRegionStore(2, 8)
tc.AddRegionStore(3, 8)
tc.AddRegionStore(4, 16)
// Add region 1 with leader in store 4.
tc.AddLeaderRegion(1, 4)
testutil.CheckTransferPeerWithLeaderTransfer(c, sb.Schedule(tc), operator.OpBalance, 4, 1)
// Test stateFilter.
tc.SetStoreOffline(1)
tc.UpdateRegionCount(2, 6)
// When store 1 is offline, it will be filtered,
// store 2 becomes the store with least regions.
testutil.CheckTransferPeerWithLeaderTransfer(c, sb.Schedule(tc), operator.OpBalance, 4, 2)
opt.SetMaxReplicas(3)
c.Assert(sb.Schedule(tc), IsNil)
opt.SetMaxReplicas(1)
c.Assert(sb.Schedule(tc), NotNil)
}
func (s *testBalanceRegionSchedulerSuite) TestReplicas33C(c *C) {
opt := mockoption.NewScheduleOptions()
tc := mockcluster.NewCluster(opt)
oc := schedule.NewOperatorController(s.ctx, nil, nil)
newTestReplication(opt, 3)
sb, err := schedule.CreateScheduler("balance-region", oc, core.NewStorage(kv.NewMemoryKV()), nil)
c.Assert(err, IsNil)
// Store 1 has the largest region score, so the balancer try to replace peer in store 1.
tc.AddRegionStore(1, 16)
tc.AddRegionStore(2, 15)
tc.AddRegionStore(3, 14)
tc.AddLeaderRegion(1, 1, 2, 3)
// This schedule try to replace peer in store 1, but we have no other stores.
c.Assert(sb.Schedule(tc), IsNil)
// Store 4 has smaller region score than store 1.
tc.AddRegionStore(4, 2)
testutil.CheckTransferPeer(c, sb.Schedule(tc), operator.OpBalance, 1, 4)
// Store 5 has smaller region score than store 4.
tc.AddRegionStore(5, 1)
testutil.CheckTransferPeer(c, sb.Schedule(tc), operator.OpBalance, 1, 5)
// Store 6 has smaller region score with store 6.
tc.AddRegionStore(6, 0)
testutil.CheckTransferPeer(c, sb.Schedule(tc), operator.OpBalance, 1, 6)
// If store 6 is not available, will choose store 5.
tc.SetStoreDown(6)
testutil.CheckTransferPeer(c, sb.Schedule(tc), operator.OpBalance, 1, 5)
// Take down 4,5,6
tc.SetStoreDown(4)
tc.SetStoreDown(5)
tc.SetStoreDown(6)
// Store 7 has different zone with other stores but larger region score than store 1.
tc.AddRegionStore(7, 20)
c.Assert(sb.Schedule(tc), IsNil)
}
func (s *testBalanceRegionSchedulerSuite) TestReplicas53C(c *C) {
opt := mockoption.NewScheduleOptions()
tc := mockcluster.NewCluster(opt)
oc := schedule.NewOperatorController(s.ctx, nil, nil)
newTestReplication(opt, 5)
sb, err := schedule.CreateScheduler("balance-region", oc, core.NewStorage(kv.NewMemoryKV()), nil)
c.Assert(err, IsNil)
tc.AddRegionStore(1, 4)
tc.AddRegionStore(2, 5)
tc.AddRegionStore(3, 6)
tc.AddRegionStore(4, 7)
tc.AddRegionStore(5, 28)
tc.AddLeaderRegion(1, 1, 2, 3, 4, 5)
// Store 6 has smaller region score.
tc.AddRegionStore(6, 1)
testutil.CheckTransferPeer(c, sb.Schedule(tc), operator.OpBalance, 5, 6)
// Store 7 has larger region score and same distinct score with store 6.
tc.AddRegionStore(7, 5)
testutil.CheckTransferPeer(c, sb.Schedule(tc), operator.OpBalance, 5, 6)
// Store 1 has smaller region score and higher distinct score.
tc.AddLeaderRegion(1, 2, 3, 4, 5, 6)
testutil.CheckTransferPeer(c, sb.Schedule(tc), operator.OpBalance, 5, 1)
// Store 6 has smaller region score and higher distinct score.
tc.AddRegionStore(11, 29)
tc.AddRegionStore(12, 8)
tc.AddRegionStore(13, 7)
tc.AddLeaderRegion(1, 2, 3, 11, 12, 13)
testutil.CheckTransferPeer(c, sb.Schedule(tc), operator.OpBalance, 11, 6)
}
func (s *testBalanceRegionSchedulerSuite) TestReplacePendingRegion3C(c *C) {
opt := mockoption.NewScheduleOptions()
tc := mockcluster.NewCluster(opt)
oc := schedule.NewOperatorController(s.ctx, nil, nil)
newTestReplication(opt, 3)
sb, err := schedule.CreateScheduler("balance-region", oc, core.NewStorage(kv.NewMemoryKV()), nil)
c.Assert(err, IsNil)
tc.AddRegionStore(1, 16)
tc.AddRegionStore(2, 7)
tc.AddRegionStore(3, 15)
tc.AddRegionStore(4, 10)
// set pending peer
tc.AddLeaderRegion(1, 1, 2, 3)
tc.AddLeaderRegion(2, 1, 2, 3)
tc.AddLeaderRegion(3, 2, 1, 3)
region := tc.GetRegion(3)
region = region.Clone(core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(1)}))
tc.PutRegion(region)
// region 3 has a pending peer
c.Assert(sb.Schedule(tc).RegionID(), Equals, uint64(3))
// the peers of region 3 are in store 2, 1, and 3, so the store 4 is the only choice
testutil.CheckTransferPeer(c, sb.Schedule(tc), operator.OpBalance, 1, 4)
}
var _ = Suite(&testBalanceLeaderSchedulerSuite{})
type testBalanceLeaderSchedulerSuite struct {
ctx context.Context
cancel context.CancelFunc
tc *mockcluster.Cluster
lb schedule.Scheduler
oc *schedule.OperatorController
}
func (s *testBalanceLeaderSchedulerSuite) SetUpTest(c *C) {
s.ctx, s.cancel = context.WithCancel(context.Background())
opt := mockoption.NewScheduleOptions()
s.tc = mockcluster.NewCluster(opt)
s.oc = schedule.NewOperatorController(s.ctx, nil, nil)
lb, err := schedule.CreateScheduler("balance-leader", s.oc, core.NewStorage(kv.NewMemoryKV()), nil)
c.Assert(err, IsNil)
s.lb = lb
}
func (s *testBalanceLeaderSchedulerSuite) TearDownTest(c *C) {
s.cancel()
}
func (s *testBalanceLeaderSchedulerSuite) schedule() *operator.Operator {
return s.lb.Schedule(s.tc)
}
func (s *testBalanceLeaderSchedulerSuite) TestBalanceLimit(c *C) {
// Stores: 1 2 3 4
// Leaders: 1 0 0 0
// Region1: L F F F
s.tc.AddLeaderStore(1, 1)
s.tc.AddLeaderStore(2, 0)
s.tc.AddLeaderStore(3, 0)
s.tc.AddLeaderStore(4, 0)
s.tc.AddLeaderRegion(1, 1, 2, 3, 4)
c.Check(s.schedule(), IsNil)
// Stores: 1 2 3 4
// Leaders: 16 0 0 0
// Region1: L F F F
s.tc.UpdateLeaderCount(1, 16)
c.Check(s.schedule(), NotNil)
// Stores: 1 2 3 4
// Leaders: 7 8 9 10
// Region1: F F F L
s.tc.UpdateLeaderCount(1, 7)
s.tc.UpdateLeaderCount(2, 8)
s.tc.UpdateLeaderCount(3, 9)
s.tc.UpdateLeaderCount(4, 10)
s.tc.AddLeaderRegion(1, 4, 1, 2, 3)
c.Check(s.schedule(), IsNil)
// Stores: 1 2 3 4
// Leaders: 7 8 9 18
// Region1: F F F L
s.tc.UpdateLeaderCount(4, 18)
c.Check(s.schedule(), NotNil)
}
func (s *testBalanceLeaderSchedulerSuite) TestBalanceLeaderScheduleStrategy(c *C) {
// Stores: 1 2 3 4
// Leader Count: 10 10 10 10
// Leader Size : 10000 100 100 100
// Region1: L F F F
s.tc.AddLeaderStore(1, 10, 10000)
s.tc.AddLeaderStore(2, 10, 100)
s.tc.AddLeaderStore(3, 10, 100)
s.tc.AddLeaderStore(4, 10, 100)
s.tc.AddLeaderRegion(1, 1, 2, 3, 4)
c.Check(s.schedule(), IsNil)
}
func (s *testBalanceLeaderSchedulerSuite) TestBalanceLeaderTolerantRatio(c *C) {
// default leader tolerant ratio is 5, when schedule by count
// Stores: 1 2 3 4
// Leader Count: 14->21 10 10 10
// Leader Size : 100 100 100 100
// Region1: L F F F
s.tc.AddLeaderStore(1, 14, 100)
s.tc.AddLeaderStore(2, 10, 100)
s.tc.AddLeaderStore(3, 10, 100)
s.tc.AddLeaderStore(4, 10, 100)
s.tc.AddLeaderRegion(1, 1, 2, 3, 4)
c.Check(s.schedule(), IsNil)
c.Assert(s.tc.GetStore(1).GetLeaderCount(), Equals, 14)
s.tc.AddLeaderStore(1, 21, 100)
c.Assert(s.tc.GetStore(1).GetLeaderCount(), Equals, 21)
c.Check(s.schedule(), NotNil)
}
func (s *testBalanceLeaderSchedulerSuite) TestBalanceFilter(c *C) {
// Stores: 1 2 3 4
// Leaders: 1 2 3 16
// Region1: F F F L
s.tc.AddLeaderStore(1, 1)
s.tc.AddLeaderStore(2, 2)
s.tc.AddLeaderStore(3, 3)
s.tc.AddLeaderStore(4, 16)
s.tc.AddLeaderRegion(1, 4, 1, 2, 3)
testutil.CheckTransferLeader(c, s.schedule(), operator.OpBalance, 4, 1)
// Test stateFilter.
// if store 4 is offline, we should consider it
// because it still provides services
s.tc.SetStoreOffline(4)
testutil.CheckTransferLeader(c, s.schedule(), operator.OpBalance, 4, 1)
// If store 1 is down, it will be filtered,
// store 2 becomes the store with least leaders.
s.tc.SetStoreDown(1)
testutil.CheckTransferLeader(c, s.schedule(), operator.OpBalance, 4, 2)
// Test healthFilter.
// If store 2 is busy, it will be filtered,
// store 3 becomes the store with least leaders.
s.tc.SetStoreBusy(2, true)
testutil.CheckTransferLeader(c, s.schedule(), operator.OpBalance, 4, 3)
// Test disconnectFilter.
// If store 3 is disconnected, no operator can be created.
s.tc.SetStoreDisconnect(3)
c.Assert(s.schedule(), IsNil)
}
func (s *testBalanceLeaderSchedulerSuite) TestBalanceSelector(c *C) {
// Stores: 1 2 3 4
// Leaders: 1 2 3 16
// Region1: - F F L
// Region2: F F L -
s.tc.AddLeaderStore(1, 1)
s.tc.AddLeaderStore(2, 2)
s.tc.AddLeaderStore(3, 3)
s.tc.AddLeaderStore(4, 16)
s.tc.AddLeaderRegion(1, 4, 2, 3)
s.tc.AddLeaderRegion(2, 3, 1, 2)
// store4 has max leader score, store1 has min leader score.
// The scheduler try to move a leader out of 16 first.
testutil.CheckTransferLeader(c, s.schedule(), operator.OpBalance, 4, 2)
// Stores: 1 2 3 4
// Leaders: 1 14 15 16
// Region1: - F F L
// Region2: F F L -
s.tc.UpdateLeaderCount(2, 14)
s.tc.UpdateLeaderCount(3, 15)
// Cannot move leader out of store4, move a leader into store1.
testutil.CheckTransferLeader(c, s.schedule(), operator.OpBalance, 3, 1)
// Stores: 1 2 3 4
// Leaders: 1 2 15 16
// Region1: - F L F
// Region2: L F F -
s.tc.AddLeaderStore(2, 2)
s.tc.AddLeaderRegion(1, 3, 2, 4)
s.tc.AddLeaderRegion(2, 1, 2, 3)
// No leader in store16, no follower in store1. Now source and target are store3 and store2.
testutil.CheckTransferLeader(c, s.schedule(), operator.OpBalance, 3, 2)
// Stores: 1 2 3 4
// Leaders: 9 10 10 11
// Region1: - F F L
// Region2: L F F -
s.tc.AddLeaderStore(1, 10)
s.tc.AddLeaderStore(2, 10)
s.tc.AddLeaderStore(3, 10)
s.tc.AddLeaderStore(4, 10)
s.tc.AddLeaderRegion(1, 4, 2, 3)
s.tc.AddLeaderRegion(2, 1, 2, 3)
// The cluster is balanced.
c.Assert(s.schedule(), IsNil)
c.Assert(s.schedule(), IsNil)
// store3's leader drops:
// Stores: 1 2 3 4
// Leaders: 11 13 0 16
// Region1: - F F L
// Region2: L F F -
s.tc.AddLeaderStore(1, 11)
s.tc.AddLeaderStore(2, 13)
s.tc.AddLeaderStore(3, 0)
s.tc.AddLeaderStore(4, 16)
testutil.CheckTransferLeader(c, s.schedule(), operator.OpBalance, 4, 3)
}
var _ = Suite(&testReplicaCheckerSuite{})
type testReplicaCheckerSuite struct{}
func (s *testReplicaCheckerSuite) TestBasic(c *C) {
opt := mockoption.NewScheduleOptions()
tc := mockcluster.NewCluster(opt)
rc := checker.NewReplicaChecker(tc)
opt.MaxSnapshotCount = 2
// Add stores 1,2,3,4.
tc.AddRegionStore(1, 4)
tc.AddRegionStore(2, 3)
tc.AddRegionStore(3, 2)
tc.AddRegionStore(4, 1)
// Add region 1 with leader in store 1 and follower in store 2.
tc.AddLeaderRegion(1, 1, 2)
// Region has 2 peers, we need to add a new peer.
region := tc.GetRegion(1)
testutil.CheckAddPeer(c, rc.Check(region), operator.OpReplica, 4)
// Test healthFilter.
// If store 4 is down, we add to store 3.
tc.SetStoreDown(4)
testutil.CheckAddPeer(c, rc.Check(region), operator.OpReplica, 3)
tc.SetStoreUp(4)
testutil.CheckAddPeer(c, rc.Check(region), operator.OpReplica, 4)
// Add peer in store 4, and we have enough replicas.
peer4, _ := tc.AllocPeer(4)
region = region.Clone(core.WithAddPeer(peer4))
c.Assert(rc.Check(region), IsNil)
// Add peer in store 3, and we have redundant replicas.
peer3, _ := tc.AllocPeer(3)
region = region.Clone(core.WithAddPeer(peer3))
testutil.CheckRemovePeer(c, rc.Check(region), 1)
region = region.Clone(core.WithRemoveStorePeer(1))
// Peer in store 3 is offline, transfer peer to store 1.
tc.SetStoreOffline(3)
testutil.CheckTransferPeer(c, rc.Check(region), operator.OpReplica, 3, 1)
}
func (s *testReplicaCheckerSuite) TestLostStore(c *C) {
opt := mockoption.NewScheduleOptions()
tc := mockcluster.NewCluster(opt)
tc.AddRegionStore(1, 1)
tc.AddRegionStore(2, 1)
rc := checker.NewReplicaChecker(tc)
// now region peer in store 1,2,3.but we just have store 1,2
// This happens only in recovering the PD tc
// should not panic
tc.AddLeaderRegion(1, 1, 2, 3)
region := tc.GetRegion(1)
op := rc.Check(region)
c.Assert(op, IsNil)
}
func (s *testReplicaCheckerSuite) TestOffline(c *C) {
opt := mockoption.NewScheduleOptions()
tc := mockcluster.NewCluster(opt)
newTestReplication(opt, 3)
rc := checker.NewReplicaChecker(tc)
tc.AddRegionStore(1, 1)
tc.AddRegionStore(2, 2)
tc.AddRegionStore(3, 3)
tc.AddRegionStore(4, 4)
tc.AddLeaderRegion(1, 1)
region := tc.GetRegion(1)
// Store 2 has different zone and smallest region score.
testutil.CheckAddPeer(c, rc.Check(region), operator.OpReplica, 2)
peer2, _ := tc.AllocPeer(2)
region = region.Clone(core.WithAddPeer(peer2))
// Store 3 has different zone and smallest region score.
testutil.CheckAddPeer(c, rc.Check(region), operator.OpReplica, 3)
peer3, _ := tc.AllocPeer(3)
region = region.Clone(core.WithAddPeer(peer3))
// Store 4 has the same zone with store 3 and larger region score.
peer4, _ := tc.AllocPeer(4)
region = region.Clone(core.WithAddPeer(peer4))
testutil.CheckRemovePeer(c, rc.Check(region), 4)
// Test healthFilter.
tc.SetStoreBusy(4, true)
c.Assert(rc.Check(region), IsNil)
tc.SetStoreBusy(4, false)
testutil.CheckRemovePeer(c, rc.Check(region), 4)
// Test offline
// the number of region peers more than the maxReplicas
// remove the peer
tc.SetStoreOffline(3)
testutil.CheckRemovePeer(c, rc.Check(region), 3)
region = region.Clone(core.WithRemoveStorePeer(4))
// the number of region peers equals the maxReplicas
// Transfer peer to store 4.
testutil.CheckTransferPeer(c, rc.Check(region), operator.OpReplica, 3, 4)
// Store 5 has smaller region score than store 4, we will choose store 5.
tc.AddRegionStore(5, 3)
testutil.CheckTransferPeer(c, rc.Check(region), operator.OpReplica, 3, 5)
}