Skip to content

Commit

Permalink
restore: fix gc life time not recovered after table restore (pingcap#218
Browse files Browse the repository at this point in the history
)

* restore: fix gc life time not recovered after table restore

* empty commit to refresh cla

* address comment

* address comment

* address comment

* fix ci
  • Loading branch information
amyangfei authored and kennytm committed Jul 25, 2019
1 parent 034ba60 commit ae59251
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 5 deletions.
25 changes: 20 additions & 5 deletions lightning/restore/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ var (
// DeliverPauser is a shared pauser to pause progress to (*chunkRestore).encodeLoop
var DeliverPauser = common.NewPauser()

var gcLifeTimeKey struct{}

func init() {
cfg := tidbcfg.GetGlobalConfig()
cfg.Log.SlowThreshold = 3000
Expand Down Expand Up @@ -490,12 +492,17 @@ func (rc *RestoreController) restoreTables(ctx context.Context) error {
}
taskCh := make(chan task, rc.cfg.App.IndexConcurrency)
defer close(taskCh)
oriGCLifeTime, err := ObtainGCLifeTime(ctx, rc.tidbMgr.db)
if err != nil {
return err
}
ctx2 := context.WithValue(ctx, &gcLifeTimeKey, oriGCLifeTime)
for i := 0; i < rc.cfg.App.IndexConcurrency; i++ {
go func() {
for task := range taskCh {
tableLogTask := task.tr.logger.Begin(zap.InfoLevel, "restore table")
web.BroadcastTableCheckpoint(task.tr.tableName, task.cp)
err := task.tr.restoreTable(ctx, rc, task.cp)
err := task.tr.restoreTable(ctx2, rc, task.cp)
tableLogTask.End(zap.ErrorLevel, err)
web.BroadcastError(task.tr.tableName, err)
metric.RecordTableCount("completed", err)
Expand Down Expand Up @@ -541,7 +548,7 @@ func (rc *RestoreController) restoreTables(ctx context.Context) error {
wg.Wait()
stopPeriodicActions <- struct{}{}

err := restoreErr.Get()
err = restoreErr.Get()
logTask.End(zap.ErrorLevel, err)
return err
}
Expand Down Expand Up @@ -1366,9 +1373,15 @@ func DoChecksum(ctx context.Context, db *sql.DB, table string) (*RemoteChecksum,
func increaseGCLifeTime(ctx context.Context, db *sql.DB) (oriGCLifeTime string, err error) {
// checksum command usually takes a long time to execute,
// so here need to increase the gcLifeTime for single transaction.
oriGCLifeTime, err = ObtainGCLifeTime(ctx, db)
if err != nil {
return "", errors.Trace(err)
// try to get gcLifeTime from context first.
gcLifeTime, ok := ctx.Value(&gcLifeTimeKey).(string)
if !ok {
oriGCLifeTime, err = ObtainGCLifeTime(ctx, db)
if err != nil {
return "", err
}
} else {
oriGCLifeTime = gcLifeTime
}

var increaseGCLifeTime bool
Expand All @@ -1391,6 +1404,8 @@ func increaseGCLifeTime(ctx context.Context, db *sql.DB) (oriGCLifeTime string,
}
}

failpoint.Inject("IncreaseGCUpdateDuration", nil)

return oriGCLifeTime, nil
}

Expand Down
23 changes: 23 additions & 0 deletions tests/concurrent-restore/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[lightning]
table-concurrency = 4
index-concurrency = 4
check-requirements = false
file = "/tmp/lightning_test_result/lightning.log"
level = "info"

[tikv-importer]
addr = "127.0.0.1:8808"

[mydumper]
data-source-dir = "/tmp/lightning_test_result/restore_conc.mydump"

[tidb]
host = "127.0.0.1"
user = "root"
status-port = 10080
log-level = "error"

[post-restore]
checksum = true
compact = false
analyze = false
48 changes: 48 additions & 0 deletions tests/concurrent-restore/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/sh
#
# Copyright 2019 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.

set -eu

# Populate the mydumper source
DBPATH="$TEST_DIR/restore_conc.mydump"
TABLE_COUNT=8

mkdir -p $DBPATH
echo 'CREATE DATABASE restore_conc;' > "$DBPATH/restore_conc-schema-create.sql"
for i in $(seq "$TABLE_COUNT"); do
echo "CREATE TABLE tbl$i(i TINYINT);" > "$DBPATH/restore_conc.tbl$i-schema.sql"
echo "INSERT INTO tbl$i VALUES (1);" > "$DBPATH/restore_conc.tbl$i.sql"
done

run_sql 'select VARIABLE_VALUE from mysql.tidb where VARIABLE_NAME = "tikv_gc_life_time"';
ORIGINAL_TIKV_GC_LIFE_TIME=$(tail -n 1 "$TEST_DIR/sql_res.$TEST_NAME.txt" | awk '{print $(NF)}')

# add a delay after increasing tikv_gc_life_time, in order to increase confilct possibility
export GO_FAILPOINTS='github.com/pingcap/tidb-lightning/lightning/restore/IncreaseGCUpdateDuration=sleep(200)'

# Start importing
run_sql 'DROP DATABASE IF EXISTS restore_conc'
run_lightning
echo "Import finished"

# Verify all data are imported
for i in $(seq "$TABLE_COUNT"); do
run_sql "SELECT sum(i) FROM restore_conc.tbl$i;"
check_contains 'sum(i): 1'
done

# check tikv_gc_life_time is recovered to the original value
run_sql 'select VARIABLE_VALUE from mysql.tidb where VARIABLE_NAME = "tikv_gc_life_time"';
check_contains "VARIABLE_VALUE: $ORIGINAL_TIKV_GC_LIFE_TIME"

0 comments on commit ae59251

Please sign in to comment.