diff --git a/gossip/state/config.go b/gossip/state/config.go index e2a3337e714..18071972d98 100644 --- a/gossip/state/config.go +++ b/gossip/state/config.go @@ -30,6 +30,8 @@ type StateConfig struct { StateBlockBufferSize int StateChannelSize int StateEnabled bool + UseLeaderElection bool + OrgLeader bool } func GlobalConfig() *StateConfig { @@ -68,4 +70,8 @@ func (c *StateConfig) loadStateConfig() { if viper.IsSet("peer.gossip.state.enabled") { c.StateEnabled = viper.GetBool("peer.gossip.state.enabled") } + // The below two configuration parameters are used for straggler() which warns + // if our peer is lagging behind the rest and has no way to catch up. + c.UseLeaderElection = viper.GetBool("peer.gossip.useLeaderElection") + c.OrgLeader = viper.GetBool("peer.gossip.orgLeader") } diff --git a/gossip/state/state.go b/gossip/state/state.go index 5261d639b60..2fee9e3c0c0 100644 --- a/gossip/state/state.go +++ b/gossip/state/state.go @@ -45,8 +45,9 @@ const ( defAntiEntropyStateResponseTimeout = 3 * time.Second defAntiEntropyBatchSize = 10 - defChannelBufferSize = 100 - defAntiEntropyMaxRetries = 3 + defChannelBufferSize = 100 + defAntiEntropyMaxRetries = 3 + stragglerWarningThreshold = 100 defMaxBlockDistance = 20 @@ -766,6 +767,13 @@ func (s *GossipStateProviderImpl) addPayload(payload *proto.Payload, blockingMod } if !blockingMode && payload.SeqNum-height >= uint64(s.config.StateBlockBufferSize) { + if s.straggler(height, payload) { + s.logger.Warningf("[%s] Current block height (%d) is too far behind other peers at height (%d) to be able to receive blocks "+ + "without state transfer which is disabled in the configuration "+ + "(peer.gossip.state.enabled = false). Consider enabling it or setting the peer explicitly to be a leader (peer.gossip.orgLeader = true) "+ + "in order to pull blocks directly from the ordering service.", + s.chainID, height, payload.SeqNum+1) + } return errors.Errorf("Ledger height is at %d, cannot enqueue block with sequence of %d", height, payload.SeqNum) } @@ -778,6 +786,16 @@ func (s *GossipStateProviderImpl) addPayload(payload *proto.Payload, blockingMod return nil } +func (s *GossipStateProviderImpl) straggler(currHeight uint64, receivedPayload *proto.Payload) bool { + // If state transfer is disabled, there is no way to request blocks from peers that their ledger has advanced too far. + stateDisabled := !s.config.StateEnabled + // We are too far behind if we received a block with a sequence number more than stragglerWarningThreshold ahead of our height. + tooFarBehind := currHeight+stragglerWarningThreshold < receivedPayload.SeqNum + // We depend on other peers for blocks if we use leader election, or we are not explicitly configured to be an org leader. + peerDependent := s.config.UseLeaderElection || !s.config.OrgLeader + return stateDisabled && tooFarBehind && peerDependent +} + func (s *GossipStateProviderImpl) commitBlock(block *common.Block, pvtData util.PvtDataCollections) error { t1 := time.Now() diff --git a/gossip/state/state_test.go b/gossip/state/state_test.go index 81bf97e1ac8..506a674d9af 100644 --- a/gossip/state/state_test.go +++ b/gossip/state/state_test.go @@ -484,6 +484,61 @@ func newBootNode(id int, committer committer.Committer, acceptor peerIdentityAcc return newPeerNodeWithGossipWithValidatorWithMetrics(logger, id, committer, acceptor, nil, v, gossipMetrics) } +func TestStraggler(t *testing.T) { + for _, testCase := range []struct { + stateEnabled bool + orgLeader bool + leaderElection bool + height uint64 + receivedSeq uint64 + expected bool + }{ + { + height: 100, + receivedSeq: 300, + leaderElection: true, + expected: true, + }, + { + height: 100, + receivedSeq: 300, + expected: true, + }, + { + height: 100, + receivedSeq: 300, + orgLeader: true, + }, + { + height: 100, + receivedSeq: 105, + leaderElection: true, + }, + { + height: 100, + receivedSeq: 300, + leaderElection: true, + stateEnabled: true, + }, + } { + description := fmt.Sprintf("%+v", testCase) + t.Run(description, func(t *testing.T) { + s := &GossipStateProviderImpl{ + config: &StateConfig{ + StateEnabled: testCase.stateEnabled, + OrgLeader: testCase.orgLeader, + UseLeaderElection: testCase.leaderElection, + }, + } + + s.straggler(testCase.height, &proto.Payload{ + SeqNum: testCase.receivedSeq, + }) + }) + } + +} + func TestNilDirectMsg(t *testing.T) { mc := &mockCommitter{Mock: &mock.Mock{}} mc.On("LedgerHeight", mock.Anything).Return(uint64(1), nil) diff --git a/sampleconfig/core.yaml b/sampleconfig/core.yaml index 5e90d2ac071..17523ce51b0 100644 --- a/sampleconfig/core.yaml +++ b/sampleconfig/core.yaml @@ -230,7 +230,13 @@ peer: # indicates whenever state transfer is enabled or not # default value is true, i.e. state transfer is active # and takes care to sync up missing blocks allowing - # lagging peer to catch up to speed with rest network + # lagging peer to catch up to speed with rest network. + # Keep in mind that when peer.gossip.useLeaderElection is true + # and there are several peers in the organization, + # or peer.gossip.useLeaderElection is false alongside with + # peer.gossip.orgleader being false, the peer's ledger may lag behind + # the rest of the peers and will never catch up due to state transfer + # being disabled. enabled: false # checkInterval interval to check whether peer is lagging behind enough to # request blocks via state transfer from another peer.