From f0992f9a9f3314562df0c938815a2185f23da5a9 Mon Sep 17 00:00:00 2001 From: "Lau, Luke" Date: Thu, 3 Oct 2024 15:26:47 -0400 Subject: [PATCH] add logic to check state before pause and resume --- go.mod | 2 +- go.sum | 2 + pkg/controller/controller.go | 92 +++++++++++++++++++++++++++++++----- 3 files changed, 82 insertions(+), 14 deletions(-) diff --git a/go.mod b/go.mod index e42fb84..7cabf77 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ require ( github.com/dell/gofsutil v1.16.1 github.com/dell/goiscsi v1.9.0 github.com/dell/gonvme v1.8.1 - github.com/dell/gopowerstore v1.15.2-0.20240924141025-1c719e612669 + github.com/dell/gopowerstore v1.15.2-0.20241001202140-db2de61a4338 github.com/fsnotify/fsnotify v1.7.0 github.com/go-openapi/strfmt v0.23.0 github.com/golang/mock v1.6.0 diff --git a/go.sum b/go.sum index 41b190c..483b326 100644 --- a/go.sum +++ b/go.sum @@ -139,6 +139,8 @@ github.com/dell/gonvme v1.8.1 h1:46M5lPqj7+Xjen+qxooRN9cx/+uJG4xtK9TpwduWDgE= github.com/dell/gonvme v1.8.1/go.mod h1:ajbuF+fswq+ty2tRTG5FN4ecIMJsG7aDu/bkMynTKAs= github.com/dell/gopowerstore v1.15.2-0.20240924141025-1c719e612669 h1:XktIu9B0VskV/nLyDFtsurvCVedi3czY0ziq52lf5RU= github.com/dell/gopowerstore v1.15.2-0.20240924141025-1c719e612669/go.mod h1:vyN1JAZ+TO7Px+gNVa61a23/KwlI/Nj/6ttzMOQFyG0= +github.com/dell/gopowerstore v1.15.2-0.20241001202140-db2de61a4338 h1:YwWUcphY/HY5rPYY2g9vJnLMbOAgD/ITEdz6nlVW9nI= +github.com/dell/gopowerstore v1.15.2-0.20241001202140-db2de61a4338/go.mod h1:vyN1JAZ+TO7Px+gNVa61a23/KwlI/Nj/6ttzMOQFyG0= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index ec00d66..7a92d6a 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -1378,9 +1378,65 @@ func GetMetroSessionID(ctx context.Context, client gopowerstore.Client, volumeID return metroSessionID, nil } +func pauseMetroSession(ctx context.Context, metroSessionID string, arr *array.PowerStoreArray) error { + metroSession, err := arr.Client.GetReplicationSessionByID(ctx, metroSessionID) + if err != nil { + return fmt.Errorf("could not get metro replication session %s", metroSessionID) + } + + // confirm the session is in a state we can pause from. + // 'Fractured' state is normal state flow after deleting a volume from the group and resuming replication. + if metroSession.State != gopowerstore.RsStateOk && + metroSession.State != gopowerstore.RsStateSynchronizing && + metroSession.State != gopowerstore.RsStatePaused && + metroSession.State != gopowerstore.RsStateSystemPaused && + metroSession.State != gopowerstore.RsStateFractured { + return fmt.Errorf("could not pause the metro replication session, %s, because the session is not in expected state to pause", metroSession.ID) + } + + if metroSession.State != gopowerstore.RsStatePaused { + log.Debugf("pausing metro replication session, %s", metroSession.ID) + + // pause the replication session + _, err := arr.Client.ExecuteActionOnReplicationSession(ctx, metroSession.ID, gopowerstore.RsActionPause, nil) + if err != nil { + return fmt.Errorf("metro replication session, %s, could not be paused: %s", metroSession.ID, err.Error()) + } + } else { + log.Debugf("metro replication session, %s, already paused", metroSession.ID) + } + return nil +} + +func resumeMetroSession(ctx context.Context, metroSessionID string, array *array.PowerStoreArray) error { + metroSession, err := array.Client.GetReplicationSessionByID(ctx, metroSessionID) + if err != nil { + return fmt.Errorf("could not get metro replication session: %s", err.Error()) + } + + // nothing to do if not paused + if metroSession.State == gopowerstore.RsStateOk { + return nil + } + + // metro session can only be resumed if it is in 'paused' state + if metroSession.State != gopowerstore.RsStatePaused { + return errors.New("the metro session must be in 'paused' state before resuming") + } + + log.Debugf("resuming metro replication session %s", metroSession.ID) + + _, err = array.Client.ExecuteActionOnReplicationSession(ctx, metroSession.ID, gopowerstore.RsActionResume, nil) + if err != nil { + return err + } + + return nil +} + // ControllerExpandVolume resizes Volume or FileSystem by increasing available volume capacity in the storage array. func (s *Service) ControllerExpandVolume(ctx context.Context, req *csi.ControllerExpandVolumeRequest) (*csi.ControllerExpandVolumeResponse, error) { - id, arrayID, protocol, _, remoteArrayID, err := array.ParseVolumeID(ctx, req.VolumeId, s.DefaultArray(), nil) + id, arrayID, protocol, _, _, err := array.ParseVolumeID(ctx, req.VolumeId, s.DefaultArray(), nil) if err != nil { return nil, status.Errorf(codes.OutOfRange, "unable to parse the volume id") } @@ -1402,17 +1458,15 @@ func (s *Service) ControllerExpandVolume(ctx context.Context, req *csi.Controlle return nil, status.Error(codes.NotFound, "detected SCSI protocol but wasn't able to fetch the volume info") } - if vol.Size < requiredBytes { - // get replication session ID if Metro is configured. Pause the session and resume after updating size. - replicationSessionID, err := GetMetroSessionID(ctx, client, id, vol, remoteArrayID) - if err != nil { - return nil, status.Error(codes.Internal, "error while identifying metro configuration on volume") - } + isMetro := vol.MetroReplicationSessionID != "" - if replicationSessionID != "" { - _, err := client.ExecuteActionOnReplicationSession(ctx, replicationSessionID, gopowerstore.RsActionPause, nil) + if vol.Size < requiredBytes { + if isMetro { + // must pause metro session before modifying the volume + err = pauseMetroSession(ctx, vol.MetroReplicationSessionID, array) if err != nil { - return nil, status.Error(codes.Internal, "unable to pause metro session") + return nil, status.Errorf(codes.FailedPrecondition, + "failed to expand the volume %s because the metro replication session could not be paused: %s", vol.Name, err.Error()) } } @@ -1421,15 +1475,27 @@ func (s *Service) ControllerExpandVolume(ctx context.Context, req *csi.Controlle return nil, status.Error(codes.Internal, "unable to modify volume size") } - if replicationSessionID != "" { - _, err := client.ExecuteActionOnReplicationSession(ctx, replicationSessionID, gopowerstore.RsActionResume, nil) + if isMetro { + // resume the paused metro session + err := resumeMetroSession(ctx, vol.MetroReplicationSessionID, array) if err != nil { - return nil, status.Error(codes.Internal, "unable to resume metro session") + return nil, status.Errorf(codes.Internal, "failed to resume metro session for volume %s: %s", vol.Name, err.Error()) } } return &csi.ControllerExpandVolumeResponse{CapacityBytes: requiredBytes, NodeExpansionRequired: true}, nil } + + // in case the previous request failed after expanding the volume, + // check the metro session state and resume if necessary + if isMetro { + err = resumeMetroSession(ctx, vol.MetroReplicationSessionID, array) + if err != nil { + return nil, status.Errorf(codes.Internal, "failed to resume metro session for volume %s: %s", vol.Name, err.Error()) + } + + return &csi.ControllerExpandVolumeResponse{CapacityBytes: requiredBytes, NodeExpansionRequired: true}, nil + } return &csi.ControllerExpandVolumeResponse{}, nil }