Created
September 1, 2022 15:10
-
-
Save morgo/8915fee322fd43ec719860340d247465 to your computer and use it in GitHub Desktop.
pd fast region transfer (doesn't work yet)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go | |
index 3897f926..efbe7bf0 100644 | |
--- a/server/cluster/cluster.go | |
+++ b/server/cluster/cluster.go | |
@@ -49,6 +49,7 @@ import ( | |
"github.com/tikv/pd/server/schedule/checker" | |
"github.com/tikv/pd/server/schedule/hbstream" | |
"github.com/tikv/pd/server/schedule/labeler" | |
+ "github.com/tikv/pd/server/schedule/operator" | |
"github.com/tikv/pd/server/schedule/placement" | |
"github.com/tikv/pd/server/schedulers" | |
"github.com/tikv/pd/server/statistics" | |
@@ -817,6 +818,9 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { | |
c.Unlock() | |
return err | |
} | |
+ if c.core.GetStore(region.GetLeader().GetStoreId()).GetLeaderWeight() == 0 { | |
+ c.moveRegionFromZeroLeaderStore(region) | |
+ } | |
overlaps = c.core.PutRegion(region) | |
for _, item := range overlaps { | |
if c.regionStats != nil { | |
@@ -886,6 +890,38 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { | |
return nil | |
} | |
+// moveRegionFromZeroLeaderStore moves a region from the store with zero leader weight to a new store. | |
+// it runs at heartbeat time instead of leader balance schedule because it should be run immediately. | |
+// a common scenario is when placement rules are used and a leader fails. The regions will use raft | |
+// to elect a new leader, but this election will not consider leader weight. This means that a store | |
+// that was only intended for quorum could get leaders. | |
+func (c *RaftCluster) moveRegionFromZeroLeaderStore(region *core.RegionInfo) { | |
+ var newStoreId uint64 | |
+ for _, p := range region.GetPeers() { | |
+ if c.core.GetStore(p.GetStoreId()).GetLeaderWeight() == 0 { | |
+ continue | |
+ } | |
+ if c.core.GetStore(p.GetStoreId()).IsDisconnected() { | |
+ continue | |
+ } | |
+ newStoreId = p.GetStoreId() | |
+ break | |
+ } | |
+ if newStoreId > 0 { | |
+ log.Warn("moving region from zero leader store", | |
+ zap.Uint64("region-id", region.GetID()), | |
+ zap.Uint64("store-id", region.GetLeader().GetStoreId()), | |
+ zap.Uint64("new-store-id", newStoreId)) | |
+ op, err := operator.CreateTransferLeaderOperator("transfer-leader-zero-weight-store", c, region, region.GetLeader().GetStoreId(), newStoreId, []uint64{}, operator.OpAdmin) | |
+ if err != nil { | |
+ log.Debug("failed to create transfer leader operator", errs.ZapError(err)) | |
+ } | |
+ if ok := c.GetOperatorController().AddOperator(op); !ok { | |
+ log.Warn("fail to add transfer leader operator") | |
+ } | |
+ } | |
+} | |
+ | |
func (c *RaftCluster) updateStoreStatusLocked(id uint64) { | |
leaderCount := c.core.GetStoreLeaderCount(id) | |
regionCount := c.core.GetStoreRegionCount(id) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment