From c63d62133bcd1a596716a6cabac7b4ae3175c064 Mon Sep 17 00:00:00 2001 From: eriknordmark Date: Thu, 22 Feb 2024 11:53:15 +0100 Subject: [PATCH 1/5] Remove interlock with controlled for EdgeNodeCerts In https://github.com/lf-edge/eve/commit/f40d6d43cb17101267507f2ee9be1e716409e6d3 and modified in https://github.com/lf-edge/eve/commit/9ef0f0571d79812b6ac3b032542d78a2bd230683 EVE got an interlock to not fetch configuration from the controller until after it has sent the EdgeNodeCerts. This was done since some controller would send incomplete information when it could not do ECDH and hence not do object encryption. That controller has now been fixed (and is producing errors of the form "empty device certificate in create cipher block method" in this case). Thus we can remove the interlock. However, we still keep the retry logic from the second commit. Signed-off-by: eriknordmark --- pkg/pillar/cmd/zedagent/handleconfig.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pkg/pillar/cmd/zedagent/handleconfig.go b/pkg/pillar/cmd/zedagent/handleconfig.go index 1cc5cb1482..5fedcb888e 100644 --- a/pkg/pillar/cmd/zedagent/handleconfig.go +++ b/pkg/pillar/cmd/zedagent/handleconfig.go @@ -608,14 +608,6 @@ func requestConfigByURL(getconfigCtx *getconfigContext, url string, configProcessingRetval, []netdump.TracedNetRequest) { log.Tracef("getLatestConfig(%s, %d)", url, iteration) - // On first boot, if we haven't yet published our certificates we defer - // to ensure that the controller has our certs and can add encrypted - // secrets to our config. - if getconfigCtx.zedagentCtx.bootReason == types.BootReasonFirst && - !getconfigCtx.zedagentCtx.publishedEdgeNodeCerts { - log.Noticef("Defer fetching config until our EdgeNodeCerts have been published") - return deferConfig, nil - } ctx := getconfigCtx.zedagentCtx const bailOnHTTPErr = false // For 4xx and 5xx HTTP errors we try other interfaces // except http.StatusForbidden(which returns error From 4104424bea62e9217688b14c39cb8fb0b919cb7b Mon Sep 17 00:00:00 2001 From: eriknordmark Date: Thu, 22 Feb 2024 21:53:31 +0100 Subject: [PATCH 2/5] vaultmgr: export IsTpmEnabled with EncryptedVaultKeyFromDevice Signed-off-by: eriknordmark --- pkg/pillar/cmd/vaultmgr/vaultmgr.go | 5 ++++- pkg/pillar/types/vaultmgrtypes.go | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/pillar/cmd/vaultmgr/vaultmgr.go b/pkg/pillar/cmd/vaultmgr/vaultmgr.go index 86e58971a6..4781b22d6a 100644 --- a/pkg/pillar/cmd/vaultmgr/vaultmgr.go +++ b/pkg/pillar/cmd/vaultmgr/vaultmgr.go @@ -492,7 +492,8 @@ func publishVaultKey(ctx *vaultMgrContext, vaultName string) error { var encryptedVaultKey []byte //we try to fill EncryptedVaultKey only in case of tpm enabled //otherwise we leave it empty - if etpm.IsTpmEnabled() { + isTpmEnabled := etpm.IsTpmEnabled() + if isTpmEnabled { if !ctx.defaultVaultUnlocked { log.Errorf("Vault is not yet unlocked, waiting for Controller key") return nil @@ -504,6 +505,7 @@ func publishVaultKey(ctx *vaultMgrContext, vaultName string) error { encryptedKey, err := etpm.EncryptDecryptUsingTpm(keyBytes, true) if err != nil { + // XXX should we still send with no key? return fmt.Errorf("Failed to encrypt vault key %w", err) } @@ -524,6 +526,7 @@ func publishVaultKey(ctx *vaultMgrContext, vaultName string) error { keyFromDevice := types.EncryptedVaultKeyFromDevice{} keyFromDevice.Name = vaultName keyFromDevice.EncryptedVaultKey = encryptedVaultKey + keyFromDevice.IsTpmEnabled = isTpmEnabled key := keyFromDevice.Key() log.Tracef("Publishing EncryptedVaultKeyFromDevice %s\n", key) pub := ctx.pubVaultKeyFromDevice diff --git a/pkg/pillar/types/vaultmgrtypes.go b/pkg/pillar/types/vaultmgrtypes.go index 5d8a793798..e5975ddd47 100644 --- a/pkg/pillar/types/vaultmgrtypes.go +++ b/pkg/pillar/types/vaultmgrtypes.go @@ -83,6 +83,7 @@ func (status VaultStatus) LogKey() string { type EncryptedVaultKeyFromDevice struct { Name string EncryptedVaultKey []byte // empty if no TPM enabled + IsTpmEnabled bool } // Key returns name of the vault corresponding to this object From aa75d9524773d9d52f93116d5a41ee09c0a4f3f4 Mon Sep 17 00:00:00 2001 From: eriknordmark Date: Thu, 22 Feb 2024 21:54:24 +0100 Subject: [PATCH 3/5] zedagent: track publishedAttestEscrow for purposes of logging Signed-off-by: eriknordmark --- pkg/pillar/cmd/zedagent/attesttask.go | 6 ++++-- pkg/pillar/cmd/zedagent/handlecertconfig.go | 2 -- pkg/pillar/cmd/zedagent/handleconfig.go | 3 +++ pkg/pillar/cmd/zedagent/zedagent.go | 5 ++++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pkg/pillar/cmd/zedagent/attesttask.go b/pkg/pillar/cmd/zedagent/attesttask.go index a0be0be1c2..f5e2b7abc6 100644 --- a/pkg/pillar/cmd/zedagent/attesttask.go +++ b/pkg/pillar/cmd/zedagent/attesttask.go @@ -450,13 +450,14 @@ func (server *VerifierImpl) SendAttestEscrow(ctx *zattest.Context) error { } // bail if V2API is not supported if !zedcloud.UseV2API() { + attestCtx.zedagentCtx.publishedAttestEscrow = true return zattest.ErrNoVerifier } if attestCtx.SkipEscrow { log.Notice("[ATTEST] Escrow successful skipped") return nil } - if attestCtx.EscrowData == nil { + if attestCtx.EscrowData == nil || len(attestCtx.EscrowData) == 0 { errorDescription := types.ErrorDescription{Error: "[ATTEST] No escrow data"} log.Error(errorDescription.Error) setAttestErrorAndTriggerInfo(ctx, errorDescription) @@ -537,6 +538,7 @@ func (server *VerifierImpl) SendAttestEscrow(ctx *zattest.Context) error { return zattest.ErrITokenMismatch case attest.AttestStorageKeysResponseCode_ATTEST_STORAGE_KEYS_RESPONSE_CODE_SUCCESS: log.Notice("[ATTEST] Escrow successful") + attestCtx.zedagentCtx.publishedAttestEscrow = true ctx.ClearError() triggerPublishDevInfo(attestCtx.zedagentCtx) // we sent storage keys successfully @@ -800,7 +802,7 @@ func handleEncryptedKeyFromDeviceImpl(ctxArg interface{}, key string, attestCtx := ctx.attestCtx attestCtx.EscrowData = vaultKey.EncryptedVaultKey attestCtx.SkipEscrow = false - if len(attestCtx.EscrowData) == 0 { + if !vaultKey.IsTpmEnabled { attestCtx.SkipEscrow = true } diff --git a/pkg/pillar/cmd/zedagent/handlecertconfig.go b/pkg/pillar/cmd/zedagent/handlecertconfig.go index 210c1ef5e9..44eb5aa5e5 100644 --- a/pkg/pillar/cmd/zedagent/handlecertconfig.go +++ b/pkg/pillar/cmd/zedagent/handlecertconfig.go @@ -493,8 +493,6 @@ func publishEdgeNodeCertsToController(ctx *zedagentContext) { log.Tracef("publishEdgeNodeCertsToController: after send, total elapse sec %v", time.Since(startPubTime).Seconds()) ctx.cipherCtx.iteration++ - // XXX remove log? - log.Noticef("Maybe sent EdgeNodeCerts") // The getDeferredSentHandlerFunction will set ctx.publishedEdgeNodeCerts // when the message has been sent. } diff --git a/pkg/pillar/cmd/zedagent/handleconfig.go b/pkg/pillar/cmd/zedagent/handleconfig.go index 5fedcb888e..4c04086c47 100644 --- a/pkg/pillar/cmd/zedagent/handleconfig.go +++ b/pkg/pillar/cmd/zedagent/handleconfig.go @@ -470,6 +470,9 @@ func configTimerTask(getconfigCtx *getconfigContext, handleChannel chan interfac if getconfigCtx.configProcessingRV != configOK { log.Noticef("config processing flag is not OK: %s", getconfigCtx.configProcessingRV) } + if ctx.publishedEdgeNodeCerts && !ctx.publishedAttestEscrow { + log.Warning("Not yet published AttestEscrow") + } } ctx.ps.StillRunning(wdName, warningTime, errorTime) } diff --git a/pkg/pillar/cmd/zedagent/zedagent.go b/pkg/pillar/cmd/zedagent/zedagent.go index 5e6eaa6747..bf45104699 100644 --- a/pkg/pillar/cmd/zedagent/zedagent.go +++ b/pkg/pillar/cmd/zedagent/zedagent.go @@ -216,9 +216,12 @@ type zedagentContext struct { // Track the counter from force.fallback.counter to detect changes forceFallbackCounter int - // Interlock with controller to ensure we get the encrypted secrets + // Used for retry of EdgeNodeCerts publishedEdgeNodeCerts bool + // Used for retry of SendAttestEscrow + publishedAttestEscrow bool + attestationTryCount int // cli options versionPtr *bool From a0934b49113a82fc4a873b195d0bf1848474e144 Mon Sep 17 00:00:00 2001 From: eriknordmark Date: Thu, 22 Feb 2024 22:27:05 +0100 Subject: [PATCH 4/5] Fix 'Skip triggering attest state machine before entering main loop' The refactoring in https://github.com/lf-edge/eve/commit/5832ab1c294e863b57915cb2fbb99da43a44070a accidentally activated the subscriptions to AttestQuote and EncryptedKeyFromDevice before the attest engine was started. As a result we get the above log message and the encrypted storage key is not reliably escrowed in the controller. Here we start the attest engine before the subscriptions. Signed-off-by: eriknordmark --- pkg/pillar/cmd/zedagent/zedagent.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/pillar/cmd/zedagent/zedagent.go b/pkg/pillar/cmd/zedagent/zedagent.go index bf45104699..d323560602 100644 --- a/pkg/pillar/cmd/zedagent/zedagent.go +++ b/pkg/pillar/cmd/zedagent/zedagent.go @@ -442,6 +442,10 @@ func Run(ps *pubsub.PubSub, loggerArg *logrus.Logger, logArg *base.LogObject, ar // XXX defer this until we have some config from cloud or saved copy getconfigCtx.pubAppInstanceConfig.SignalRestarted() + // Initialize remote attestation context. Do this before we get events + // from the AttestQuote and EncryptedKeyFromDevice subscriptions + attestModuleInitialize(zedagentCtx) + // With device UUID, zedagent is ready to initialize and activate all subscriptions. initPostOnboardSubs(zedagentCtx) @@ -453,9 +457,6 @@ func Run(ps *pubsub.PubSub, loggerArg *logrus.Logger, logArg *base.LogObject, ar //initialize cipher processing block cipherModuleInitialize(zedagentCtx) - //initialize remote attestation context - attestModuleInitialize(zedagentCtx) - // Pick up debug aka log level before we start real work waitUntilGCReady(zedagentCtx, stillRunning) From e13e8303a3b1203cd93b2bf25d11bb9534ae1098 Mon Sep 17 00:00:00 2001 From: eriknordmark Date: Fri, 23 Feb 2024 11:26:22 +0100 Subject: [PATCH 5/5] Add more clear log when EdgeNodeCerts not accepted by controller Signed-off-by: eriknordmark --- pkg/pillar/cmd/zedagent/zedagent.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/pillar/cmd/zedagent/zedagent.go b/pkg/pillar/cmd/zedagent/zedagent.go index d323560602..4c5bf22f7c 100644 --- a/pkg/pillar/cmd/zedagent/zedagent.go +++ b/pkg/pillar/cmd/zedagent/zedagent.go @@ -2556,7 +2556,7 @@ func getDeferredSentHandlerFunction(ctx *zedagentContext) *zedcloud.SentHandlerF ctx.publishedEdgeNodeCerts = true } } else { - if _, ok := itemType.(attest.ZAttestReqType); ok { + if el, ok := itemType.(attest.ZAttestReqType); ok { switch result { case types.SenderStatusUpgrade: log.Functionf("sendAttestReqProtobuf: Controller upgrade in progress") @@ -2570,6 +2570,13 @@ func getDeferredSentHandlerFunction(ctx *zedagentContext) *zedcloud.SentHandlerF log.Functionf("sendAttestReqProtobuf: Controller SenderStatusNotFound") potentialUUIDUpdate(ctx.getconfigCtx) } + if el == attest.ZAttestReqType_ATTEST_REQ_CERT { + log.Warnf("sendAttestReqProtobuf: Failed to send EdgeNodeCerts: %s", + result.String()) + // XXX should we declare maintenance mode? + // We get SenderStatusNotFound when a cert can + // not be replaced in the controller for security reasons. + } if !ctx.publishedEdgeNodeCerts { // Attestation request does not clog the send queue (issued // with the `ignoreErr` set to true), but once fails has to