Skip to content

Commit

Permalink
Allow tick interval override via orderer.yaml
Browse files Browse the repository at this point in the history
If a raft network becomes unstable, sometimes, adjusting the tick
interval can be effective to restore it.  However, the tick interval is
stored in the channel config, so if the network is not operational,
modifying it is very challenging.  This commit adds a new option to the
orderer etcdraft consensus config, allowing the channel config parameter
to be overridden from the local configuration.

Signed-off-by: Jason Yellick <jyellick@us.ibm.com>
  • Loading branch information
Jason Yellick authored and Brett Logan committed Oct 8, 2020
1 parent 32cb396 commit e05c443
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 8 deletions.
8 changes: 6 additions & 2 deletions docs/source/raft_configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ used to further fine tune the cluster communication or replication mechanisms:
* `SnapDir`: specifies the location at which snapshots for `etcd/raft` are stored.
Each channel will have its own subdirectory named after the channel ID.

There is also a hidden configuration parameter that can be set by adding it to
the consensus section in the `orderer.yaml`:
There are also two hidden configuration parameters that can each be set by adding
them the consensus section in the `orderer.yaml`:

* `EvictionSuspicion`: The cumulative period of time of channel eviction
suspicion that triggers the node to pull blocks from other nodes and see if it
Expand All @@ -143,6 +143,10 @@ the consensus section in the `orderer.yaml`:
certificate), the node halts its operation for that channel. A node suspects
its channel eviction when it doesn't know about any elected leader nor can be
elected as leader in the channel. Defaults to 10 minutes.
* `TickIntervalOverride`: If set, this value will be preferred over the tick
interval configured in all channels where this ordering node is a consenter.
This value should be set only with great care, as a mismatch in tick interval
across orderers could result in a loss of quorum for one or more channels.

### Channel configuration

Expand Down
1 change: 1 addition & 0 deletions integration/nwo/fabricconfig/orderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ type Orderer struct {
Kafka *Kafka `yaml:"Kafka,omitempty"`
Operations *OrdererOperations `yaml:"Operations,omitempty"`
ChannelParticipation *ChannelParticipation `yaml:"ChannelParticipation,omitempty"`
Consensus map[string]string `yaml:"Consensus,omitempty"`

ExtraProperties map[string]interface{} `yaml:",inline,omitempty"`
}
Expand Down
24 changes: 24 additions & 0 deletions integration/raft/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,30 @@ var _ = Describe("EndToEnd reconfiguration and onboarding", func() {
})
})

When("a single node cluster has the tick interval overridden", func() {
It("reflects this in its startup logs", func() {
network = nwo.New(nwo.BasicEtcdRaft(), testDir, client, StartPort(), components)
network.GenerateConfigTree()
network.Bootstrap()

orderer := network.Orderer("orderer")
ordererConfig := network.ReadOrdererConfig(orderer)
ordererConfig.Consensus["TickIntervalOverride"] = "642ms"
network.WriteOrdererConfig(orderer, ordererConfig)

By("Launching the orderer")
runner := network.OrdererRunner(orderer)
ordererRunners = append(ordererRunners, runner)

process := ifrit.Invoke(runner)
Eventually(process.Ready(), network.EventuallyTimeout).Should(BeClosed())
ordererProcesses = append(ordererProcesses, process)

Eventually(runner.Err()).Should(gbytes.Say("TickIntervalOverride is set, overriding channel configuration tick interval to 642ms"))

})
})

When("the orderer certificates are all rotated", func() {
It("is possible to rotate certificate by adding & removing cert in single config", func() {
layout := nwo.MultiNodeEtcdRaft()
Expand Down
22 changes: 16 additions & 6 deletions orderer/consensus/etcdraft/consenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@ type ChainGetter interface {

// Config contains etcdraft configurations
type Config struct {
WALDir string // WAL data of <my-channel> is stored in WALDir/<my-channel>
SnapDir string // Snapshots of <my-channel> are stored in SnapDir/<my-channel>
EvictionSuspicion string // Duration threshold that the node samples in order to suspect its eviction from the channel.
WALDir string // WAL data of <my-channel> is stored in WALDir/<my-channel>
SnapDir string // Snapshots of <my-channel> are stored in SnapDir/<my-channel>
EvictionSuspicion string // Duration threshold that the node samples in order to suspect its eviction from the channel.
TickIntervalOverride string // Duration to use for tick interval instead of what is specified in the channel config.
}

// Consenter implements etcdraft consenter
Expand Down Expand Up @@ -181,9 +182,18 @@ func (c *Consenter) HandleChain(support consensus.ConsenterSupport, metadata *co
}
}

tickInterval, err := time.ParseDuration(m.Options.TickInterval)
if err != nil {
return nil, errors.Errorf("failed to parse TickInterval (%s) to time duration", m.Options.TickInterval)
var tickInterval time.Duration
if c.EtcdRaftConfig.TickIntervalOverride == "" {
tickInterval, err = time.ParseDuration(m.Options.TickInterval)
if err != nil {
return nil, errors.Errorf("failed to parse TickInterval (%s) to time duration", m.Options.TickInterval)
}
} else {
tickInterval, err = time.ParseDuration(c.EtcdRaftConfig.TickIntervalOverride)
if err != nil {
return nil, errors.Errorf("failed parsing Consensus.TickIntervalOverride: %s: %v", c.EtcdRaftConfig.TickIntervalOverride, err)
}
c.Logger.Infof("TickIntervalOverride is set, overriding channel configuration tick interval to %v", tickInterval)
}

opts := Options{
Expand Down
32 changes: 32 additions & 0 deletions orderer/consensus/etcdraft/consenter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,38 @@ var _ = Describe("Consenter", func() {
Expect(err).To(MatchError("failed to parse TickInterval (500) to time duration"))
})

When("the TickIntervalOverride is invalid", func() {
It("returns an error", func() {
m := &etcdraftproto.ConfigMetadata{
Consenters: []*etcdraftproto.Consenter{
{ServerTlsCert: certAsPEM},
},
Options: &etcdraftproto.Options{
TickInterval: "500s",
ElectionTick: 10,
HeartbeatTick: 1,
MaxInflightBlocks: 5,
},
}
metadata := protoutil.MarshalOrPanic(m)
mockOrderer := &mocks.OrdererConfig{}
mockOrderer.ConsensusMetadataReturns(metadata)
mockOrderer.BatchSizeReturns(
&orderer.BatchSize{
PreferredMaxBytes: 2 * 1024 * 1024,
},
)
mockOrderer.CapabilitiesReturns(&mocks.OrdererCapabilities{})
support.SharedConfigReturns(mockOrderer)

consenter := newConsenter(chainGetter)
consenter.EtcdRaftConfig.TickIntervalOverride = "seven"

_, err := consenter.HandleChain(support, nil)
Expect(err).To(MatchError("failed parsing Consensus.TickIntervalOverride: seven: time: invalid duration seven"))
})
})

It("constructs a follower chain if no matching cert found", func() {
m := &etcdraftproto.ConfigMetadata{
Consenters: []*etcdraftproto.Consenter{
Expand Down

0 comments on commit e05c443

Please sign in to comment.