Skip to content

Commit

Permalink
FAB-13669 consensus migration: kafka2raft green path #4
Browse files Browse the repository at this point in the history
This is the fourth of four (4/4) sub-tasks that focus on
the "green" path of consensus-type migration from Kafka to Raft.

By "green" we mean that there are no failures or aborts along the
way. The 4 sub-tasks are staged in a way that minimizes dependencies
between them.

In this sub-task we introduce changes to the etcd/raft-base OSNs such
that they can restart from a ledger that was started as Kafka, migrated,
and restarted. This change concludes all the changes needed to implement
the green path on the "Raft" side.

See respective JIRA item for further details.

Change-Id: I5b408e1cfcb8cf42c39bed4df6c5496792175ef0
Signed-off-by: Yoav Tock <tock@il.ibm.com>
  • Loading branch information
tock-ibm committed Feb 17, 2019
1 parent 4950edd commit 0504983
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 8 deletions.
56 changes: 56 additions & 0 deletions orderer/common/multichannel/chainsupport.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/hyperledger/fabric/orderer/common/msgprocessor"
"github.com/hyperledger/fabric/orderer/consensus"
cb "github.com/hyperledger/fabric/protos/common"
"github.com/hyperledger/fabric/protos/orderer"
"github.com/hyperledger/fabric/protos/utils"
"github.com/pkg/errors"
)
Expand Down Expand Up @@ -68,6 +69,14 @@ func newChainSupport(
// Set up the block writer
cs.BlockWriter = newBlockWriter(lastBlock, registrar, cs)

// TODO Identify recovery after crash in the middle of consensus-type migration
if cs.detectMigration(lastBlock) {
// We do this because the last block after migration (COMMIT/CONTEXT) carries Kafka metadata.
// This prevents the code down the line from unmarshaling it as Raft, and panicking.
metadata.Value = nil
logger.Debugf("[channel: %s] Consensus-type migration: restart on to Raft, resetting Kafka block metadata", cs.ChainID())
}

// Set up the consenter
consenterType := ledgerResources.SharedConfig().ConsensusType()
consenter, ok := consenters[consenterType]
Expand All @@ -85,6 +94,53 @@ func newChainSupport(
return cs
}

// detectMigration identifies restart after consensus-type migration was committed (green path).
// Restart after migration is detected by:
// 1. The Kafka2RaftMigration capability in on
// 2. The last block carries a config-tx
// 3. In the config-tx, you have:
// - (system-channel && state=COMMIT), OR
// - (standard-channel && state=CONTEXT)
// This assumes that migration was successful (green path). When migration ends successfully,
// every channel will have a config block as the last block. On the system channel, containing state=COMMIT;
// on standard channels, containing state=CONTEXT.
func (cs *ChainSupport) detectMigration(lastBlock *cb.Block) bool {
isMigration := false

if !cs.ledgerResources.SharedConfig().Capabilities().Kafka2RaftMigration() {
return isMigration
}

lastConfigIndex, err := utils.GetLastConfigIndexFromBlock(lastBlock)
if err != nil {
logger.Panicf("Chain did not have appropriately encoded last config in its latest block: %s", err)
}

logger.Debugf("[channel: %s], sysChan=%v, lastConfigIndex=%d, H=%d, mig-state: %s",
cs.ChainID(), cs.systemChannel, lastConfigIndex, cs.ledgerResources.Height(),
cs.ledgerResources.SharedConfig().ConsensusMigrationState())

if lastConfigIndex == lastBlock.Header.Number { //The last block was a config-tx
state := cs.ledgerResources.SharedConfig().ConsensusMigrationState()
if cs.systemChannel {
if state == orderer.ConsensusType_MIG_STATE_COMMIT {
isMigration = true
}
} else {
if state == orderer.ConsensusType_MIG_STATE_CONTEXT {
isMigration = true
}
}

if isMigration {
logger.Infof("[channel: %s], Restarting after consensus-type migration. New consensus-type is: %s",
cs.ChainID(), cs.ledgerResources.SharedConfig().ConsensusType())
}
}

return isMigration
}

// Block returns a block with the following number,
// or nil if such a block doesn't exist.
func (cs *ChainSupport) Block(number uint64) *cb.Block {
Expand Down
54 changes: 53 additions & 1 deletion orderer/consensus/etcdraft/chain.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,14 +289,57 @@ func (c *Chain) Start() {
return
}

c.node.start(c.fresh, c.support.Height() > 1)
isJoin := c.support.Height() > 1
isMigration := false
if isJoin {
isMigration = c.detectMigration()
}
c.node.start(c.fresh, isJoin, isMigration)
close(c.startC)
close(c.errorC)

go c.gc()
go c.serveRequest()
}

// detectMigration detects if the orderer restarts right after consensus-type migration,
// in which the Height>1 but previous blocks were created by Kafka.
// If this is the case, Raft should be started like it is joining a new channel.
func (c *Chain) detectMigration() bool {
startOfChain := false
if c.support.SharedConfig().Capabilities().Kafka2RaftMigration() {
lastBlock := c.support.Block(c.support.Height() - 1)
lastConfigIndex, err := utils.GetLastConfigIndexFromBlock(lastBlock)
if err != nil {
c.logger.Panicf("Chain did not have appropriately encoded last config in its latest block: %s", err)
}

c.logger.Debugf("[channel: %s], detecting if consensus-type migration, sysChan=%v, lastConfigIndex=%d, H=%d, mig-state: %s",
c.support.ChainID(), c.support.IsSystemChannel(), lastConfigIndex, c.support.Height(), c.support.SharedConfig().ConsensusMigrationState().String())

if lastConfigIndex != c.support.Height()-1 { // The last block is not a config-tx
return startOfChain
}

// The last block was a config-tx
if c.support.IsSystemChannel() {
if c.support.SharedConfig().ConsensusMigrationState() == orderer.ConsensusType_MIG_STATE_COMMIT {
startOfChain = true
}
} else {
if c.support.SharedConfig().ConsensusMigrationState() == orderer.ConsensusType_MIG_STATE_CONTEXT {
startOfChain = true
}
}

if startOfChain {
c.logger.Infof("[channel: %s], Restarting after consensus-type migration. Type: %s, just starting the channel.",
c.support.ChainID(), c.support.SharedConfig().ConsensusType())
}
}
return startOfChain
}

// Order submits normal type transactions for ordering.
func (c *Chain) Order(env *common.Envelope, configSeq uint64) error {
return c.Submit(&orderer.SubmitRequest{LastValidationSeq: configSeq, Payload: env, Channel: c.channelID}, 0)
Expand Down Expand Up @@ -1038,6 +1081,15 @@ func (c *Chain) getInFlightConfChange() *raftpb.ConfChange {
return nil
}

// Detect if it is a restart right after consensus-type migration. If yes, return early in order to avoid using
// the block metadata as etcdraft.RaftMetadata (see below). Right after migration the block metadata will carry
// Kafka metadata. The etcdraft.RaftMetadata should be extracted from the ConsensusType.Metadata, instead.
if c.detectMigration() {
c.logger.Infof("[channel: %s], Restarting after consensus-type migration. Type: %s, just starting the chain.",
c.support.ChainID(), c.support.SharedConfig().ConsensusType())
return nil
}

// extract membership mapping from configuration block metadata
// and compare with Raft configuration
metadata, err := utils.GetMetadataFromBlock(lastBlock, common.BlockMetadataIndex_ORDERER)
Expand Down
7 changes: 6 additions & 1 deletion orderer/consensus/etcdraft/chain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2697,7 +2697,12 @@ func newChain(timeout time.Duration, channel string, dataDir string, id uint64,

support := &consensusmocks.FakeConsenterSupport{}
support.ChainIDReturns(channel)
support.SharedConfigReturns(&mockconfig.Orderer{BatchTimeoutVal: timeout})
support.SharedConfigReturns(&mockconfig.Orderer{
BatchTimeoutVal: timeout,
CapabilitiesVal: &mockconfig.OrdererCapabilities{
Kafka2RaftMigVal: false,
},
})

cutter := mockblockcutter.NewReceiver()
close(cutter.Block)
Expand Down
17 changes: 17 additions & 0 deletions orderer/consensus/etcdraft/consenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package etcdraft

import (
"bytes"
"encoding/hex"
"path"
"reflect"
"time"
Expand Down Expand Up @@ -118,11 +119,27 @@ func (c *Consenter) detectSelfID(consenters map[uint64]*etcdraft.Consenter) (uin

// HandleChain returns a new Chain instance or an error upon failure
func (c *Consenter) HandleChain(support consensus.ConsenterSupport, metadata *common.Metadata) (consensus.Chain, error) {

if support.SharedConfig().Capabilities().Kafka2RaftMigration() {
c.Logger.Debugf("SharedConfig.ConsensusType fields: Type=%s, ConsensusMigrationState=%s, ConsensusMigrationContext=%d, ConsensusMetadata length=%d",
support.SharedConfig().ConsensusType(), support.SharedConfig().ConsensusMigrationState(),
support.SharedConfig().ConsensusMigrationContext(), len(support.SharedConfig().ConsensusMetadata()))
if support.SharedConfig().ConsensusMigrationState() != orderer.ConsensusType_MIG_STATE_NONE {
c.Logger.Debugf("SharedConfig.ConsensusType: ConsensusMetadata dump:\n%s", hex.Dump(support.SharedConfig().ConsensusMetadata()))
}
}

m := &etcdraft.Metadata{}
if err := proto.Unmarshal(support.SharedConfig().ConsensusMetadata(), m); err != nil {
return nil, errors.Wrap(err, "failed to unmarshal consensus metadata")
}

if support.SharedConfig().Capabilities().Kafka2RaftMigration() &&
support.SharedConfig().ConsensusMigrationState() != orderer.ConsensusType_MIG_STATE_NONE {
c.Logger.Debugf("SharedConfig().ConsensusMetadata(): %s", m.String())
c.Logger.Debugf("block metadata.Value dump: \n%s", hex.Dump(metadata.Value))
}

if m.Options == nil {
return nil, errors.New("etcdraft options have not been provided")
}
Expand Down
21 changes: 18 additions & 3 deletions orderer/consensus/etcdraft/consenter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,12 @@ var _ = Describe("Consenter", func() {
},
}
metadata := utils.MarshalOrPanic(m)
support.SharedConfigReturns(&mockconfig.Orderer{ConsensusMetadataVal: metadata})
support.SharedConfigReturns(&mockconfig.Orderer{
ConsensusMetadataVal: metadata,
CapabilitiesVal: &mockconfig.OrdererCapabilities{
Kafka2RaftMigVal: false,
},
})

consenter := newConsenter(chainGetter)
consenter.EtcdRaftConfig.WALDir = walDir
Expand Down Expand Up @@ -181,7 +186,12 @@ var _ = Describe("Consenter", func() {
}
metadata := utils.MarshalOrPanic(m)
support := &consensusmocks.FakeConsenterSupport{}
support.SharedConfigReturns(&mockconfig.Orderer{ConsensusMetadataVal: metadata})
support.SharedConfigReturns(&mockconfig.Orderer{
ConsensusMetadataVal: metadata,
CapabilitiesVal: &mockconfig.OrdererCapabilities{
Kafka2RaftMigVal: false,
},
})
support.ChainIDReturns("foo")

consenter := newConsenter(chainGetter)
Expand All @@ -200,7 +210,12 @@ var _ = Describe("Consenter", func() {
},
}
metadata := utils.MarshalOrPanic(m)
support.SharedConfigReturns(&mockconfig.Orderer{ConsensusMetadataVal: metadata})
support.SharedConfigReturns(&mockconfig.Orderer{
ConsensusMetadataVal: metadata,
CapabilitiesVal: &mockconfig.OrdererCapabilities{
Kafka2RaftMigVal: false,
},
})

consenter := newConsenter(chainGetter)

Expand Down
12 changes: 9 additions & 3 deletions orderer/consensus/etcdraft/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,19 @@ type node struct {
raft.Node
}

func (n *node) start(fresh, join bool) {
func (n *node) start(fresh, join, migration bool) {
raftPeers := RaftPeers(n.metadata.Consenters)
n.logger.Debugf("Starting raft node: #peers: %v", len(raftPeers))

if fresh {
if join {
raftPeers = nil
n.logger.Info("Starting raft node to join an existing channel")
if !migration {
raftPeers = nil
n.logger.Info("Starting raft node to join an existing channel")

} else {
n.logger.Info("Starting raft node to join an existing channel, after consensus-type migration")
}
} else {
n.logger.Info("Starting raft node as part of a new channel")
}
Expand Down

0 comments on commit 0504983

Please sign in to comment.