From 4d888ce22b08b9d2bbc2b66890d35195f666e90b Mon Sep 17 00:00:00 2001 From: ti-srebot <66930949+ti-srebot@users.noreply.github.com> Date: Tue, 31 May 2022 12:52:26 +0800 Subject: [PATCH] br: add error into retry list when backup (#34930) (#34983) close pingcap/tidb#34865 --- br/pkg/backup/push.go | 7 +++++++ br/pkg/utils/retry.go | 1 + br/tests/br_full/run.sh | 16 +++++++++++++--- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/br/pkg/backup/push.go b/br/pkg/backup/push.go index c1d4cd7664be1..f5f9ac536a63b 100644 --- a/br/pkg/backup/push.go +++ b/br/pkg/backup/push.go @@ -127,6 +127,13 @@ func (push *pushDown) pushBackup( // Finished. return res, nil } + failpoint.Inject("backup-timeout-error", func(val failpoint.Value) { + msg := val.(string) + logutil.CL(ctx).Debug("failpoint backup-timeout-error injected.", zap.String("msg", msg)) + resp.Error = &backuppb.Error{ + Msg: msg, + } + }) failpoint.Inject("backup-storage-error", func(val failpoint.Value) { msg := val.(string) logutil.CL(ctx).Debug("failpoint backup-storage-error injected.", zap.String("msg", msg)) diff --git a/br/pkg/utils/retry.go b/br/pkg/utils/retry.go index bda305aaf11ac..8d356a41eb970 100644 --- a/br/pkg/utils/retry.go +++ b/br/pkg/utils/retry.go @@ -32,6 +32,7 @@ var retryableServerError = []string{ "error during dispatch", "put object timeout", "internalerror", + "not read from or written to within the timeout period", } // RetryableFunc presents a retryable operation. diff --git a/br/tests/br_full/run.sh b/br/tests/br_full/run.sh index 21f2e2143a002..f099cebe6b014 100755 --- a/br/tests/br_full/run.sh +++ b/br/tests/br_full/run.sh @@ -46,14 +46,24 @@ if ps -q $pid ; then exit 1 fi - # backup full echo "backup with lz4 start..." -export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/backup/backup-storage-error=1*return(\"connection refused\")->1*return(\"InternalError\")" -run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB-lz4" --concurrency 4 --compression lz4 +test_log="${TEST_DIR}/${DB}_test.log" +error_str="not read from or written to within the timeout period" +unset BR_LOG_TO_TERM + +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/backup/backup-storage-error=1*return(\"connection refused\")->1*return(\"InternalError\");github.com/pingcap/tidb/br/pkg/backup/backup-timeout-error=1*return(\"not read from or written to within the timeout period\")" +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB-lz4" --concurrency 4 --compression lz4 --log-file $test_log + export GO_FAILPOINTS="" size_lz4=$(du -d 0 $TEST_DIR/$DB-lz4 | awk '{print $1}') +if ! grep -i "$error_str" $test_log; then + echo "${error_str} not found in log" + echo "TEST: [$TEST_NAME] test restore failed!" + exit 1 +fi + echo "backup with zstd start..." run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB-zstd" --concurrency 4 --compression zstd --compression-level 6 size_zstd=$(du -d 0 $TEST_DIR/$DB-zstd | awk '{print $1}')