Skip to content

Commit

Permalink
br/ebs: added log for EBS snapshot creation, increase backoff time fo…
Browse files Browse the repository at this point in the history
…r creating snapshot (#55667) (#55940)

close #55672
  • Loading branch information
ti-chi-bot committed Sep 9, 2024
1 parent ef4a74a commit b334993
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 0 deletions.
3 changes: 3 additions & 0 deletions br/pkg/aws/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ go_library(
deps = [
"//br/pkg/config",
"//br/pkg/glue",
"//br/pkg/logutil",
"//br/pkg/utils",
"@com_github_aws_aws_sdk_go//aws",
"@com_github_aws_aws_sdk_go//aws/awserr",
"@com_github_aws_aws_sdk_go//aws/client",
"@com_github_aws_aws_sdk_go//aws/request",
"@com_github_aws_aws_sdk_go//aws/session",
"@com_github_aws_aws_sdk_go//service/cloudwatch",
"@com_github_aws_aws_sdk_go//service/ec2",
Expand Down
47 changes: 47 additions & 0 deletions br/pkg/aws/ebs.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/aws/client"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/cloudwatch"
"github.com/aws/aws-sdk-go/service/ec2"
Expand All @@ -19,6 +21,7 @@ import (
"github.com/pingcap/log"
"github.com/pingcap/tidb/br/pkg/config"
"github.com/pingcap/tidb/br/pkg/glue"
"github.com/pingcap/tidb/br/pkg/logutil"
"github.com/pingcap/tidb/br/pkg/utils"
"go.uber.org/atomic"
"go.uber.org/zap"
Expand All @@ -40,11 +43,55 @@ type EC2Session struct {

type VolumeAZs map[string]string

type ebsBackupRetryer struct {
delegate request.Retryer
}

func (e *ebsBackupRetryer) MaxRetries() int {
return e.delegate.MaxRetries()
}

var backOffTimeOverride = map[string]time.Duration{
// From the SDK:
// Sadly it seems there isn't an exported operation name...
// const opCreateSnapshots = "CreateSnapshots"
// The quota for create snapshots is 5 per minute.
// Back off for a longer time so we won't excced it.
"CreateSnapshots": 20 * time.Second,
// const opCreateVolume = "CreateVolume"
"CreateVolume": 20 * time.Second,
}

func (e *ebsBackupRetryer) RetryRules(r *request.Request) time.Duration {
backOff := e.delegate.RetryRules(r)
if override, ok := backOffTimeOverride[r.Operation.Name]; ok {
if override > backOff {
backOff = override
}
}
log.Warn(
"Retrying an operation.",
logutil.ShortError(r.Error),
zap.Duration("backoff", backOff),
zap.StackSkip("stack", 1),
)
return backOff
}

func (e *ebsBackupRetryer) ShouldRetry(r *request.Request) bool {
return e.delegate.ShouldRetry(r)
}

func NewEC2Session(concurrency uint, region string) (*EC2Session, error) {
// aws-sdk has builtin exponential backoff retry mechanism, see:
// https://github.com/aws/aws-sdk-go/blob/db4388e8b9b19d34dcde76c492b17607cd5651e2/aws/client/default_retryer.go#L12-L16
// with default retryer & max-retry=9, we will wait for at least 30s in total
awsConfig := aws.NewConfig().WithMaxRetries(9).WithRegion(region)
defRetry := new(client.DefaultRetryer)
ourRetry := ebsBackupRetryer{
delegate: defRetry,
}
awsConfig.Retryer = ourRetry
// TiDB Operator need make sure we have the correct permission to call aws api(through aws env variables)
// we may change this behaviour in the future.
sessionOptions := session.Options{Config: *awsConfig}
Expand Down

0 comments on commit b334993

Please sign in to comment.