diff --git a/executor/analyze.go b/executor/analyze.go index 188b2a48ffd10..5fc072918698e 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -219,7 +219,18 @@ func (e *AnalyzeExec) Next(ctx context.Context, req *chunk.Chunk) error { for i := 0; i < globalStats.Num; i++ { hg, cms, topN, fms := globalStats.Hg[i], globalStats.Cms[i], globalStats.TopN[i], globalStats.Fms[i] // fms for global stats doesn't need to dump to kv. - err = statsHandle.SaveStatsToStorage(globalStatsID.tableID, globalStats.Count, info.isIndex, hg, cms, topN, fms, info.statsVersion, 1, false, true) + err = statsHandle.SaveStatsToStorage(globalStatsID.tableID, + globalStats.Count, + globalStats.ModifyCount, + info.isIndex, + hg, + cms, + topN, + fms, + info.statsVersion, + 1, + false, + true) if err != nil { logutil.Logger(ctx).Error("save global-level stats to storage failed", zap.Error(err)) } diff --git a/statistics/handle/ddl.go b/statistics/handle/ddl.go index d8b8c893cac4b..aef8c174a5bc1 100644 --- a/statistics/handle/ddl.go +++ b/statistics/handle/ddl.go @@ -121,7 +121,7 @@ func (h *Handle) updateGlobalStats(tblInfo *model.TableInfo) error { for i := 0; i < newColGlobalStats.Num; i++ { hg, cms, topN, fms := newColGlobalStats.Hg[i], newColGlobalStats.Cms[i], newColGlobalStats.TopN[i], newColGlobalStats.Fms[i] // fms for global stats doesn't need to dump to kv. - err = h.SaveStatsToStorage(tableID, newColGlobalStats.Count, 0, hg, cms, topN, fms, 2, 1, false, false) + err = h.SaveStatsToStorage(tableID, newColGlobalStats.Count, newColGlobalStats.ModifyCount, 0, hg, cms, topN, fms, 2, 1, false, false) if err != nil { return err } @@ -151,7 +151,7 @@ func (h *Handle) updateGlobalStats(tblInfo *model.TableInfo) error { for i := 0; i < newIndexGlobalStats.Num; i++ { hg, cms, topN, fms := newIndexGlobalStats.Hg[i], newIndexGlobalStats.Cms[i], newIndexGlobalStats.TopN[i], newIndexGlobalStats.Fms[i] // fms for global stats doesn't need to dump to kv. - err = h.SaveStatsToStorage(tableID, newIndexGlobalStats.Count, 1, hg, cms, topN, fms, 2, 1, false, false) + err = h.SaveStatsToStorage(tableID, newIndexGlobalStats.Count, newColGlobalStats.ModifyCount, 1, hg, cms, topN, fms, 2, 1, false, false) if err != nil { return err } diff --git a/statistics/handle/dump.go b/statistics/handle/dump.go index 71038cd9a74c5..47c207df76afe 100644 --- a/statistics/handle/dump.go +++ b/statistics/handle/dump.go @@ -236,14 +236,18 @@ func (h *Handle) loadStatsFromJSON(tableInfo *model.TableInfo, physicalID int64, for _, col := range tbl.Columns { // loadStatsFromJSON doesn't support partition table now. - err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, &col.Histogram, col.CMSketch, col.TopN, col.FMSketch, int(col.StatsVer), 1, false, false) + // The table level Count and Modify_count would be overridden by the SaveMetaToStorage below, so we don't need + // to care about them here. + err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, 0, &col.Histogram, col.CMSketch, col.TopN, col.FMSketch, int(col.StatsVer), 1, false, false) if err != nil { return errors.Trace(err) } } for _, idx := range tbl.Indices { // loadStatsFromJSON doesn't support partition table now. - err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 1, &idx.Histogram, idx.CMSketch, idx.TopN, nil, int(idx.StatsVer), 1, false, false) + // The table level Count and Modify_count would be overridden by the SaveMetaToStorage below, so we don't need + // to care about them here. + err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, 1, &idx.Histogram, idx.CMSketch, idx.TopN, nil, int(idx.StatsVer), 1, false, false) if err != nil { return errors.Trace(err) } diff --git a/statistics/handle/dump_test.go b/statistics/handle/dump_test.go index d2c67d49dcdbe..d81fd659664fd 100644 --- a/statistics/handle/dump_test.go +++ b/statistics/handle/dump_test.go @@ -267,7 +267,7 @@ func TestDumpCMSketchWithTopN(t *testing.T) { cms, _, _, _ := statistics.NewCMSketchAndTopN(5, 2048, fakeData, 20, 100) stat := h.GetTableStats(tableInfo) - err = h.SaveStatsToStorage(tableInfo.ID, 1, 0, &stat.Columns[tableInfo.Columns[0].ID].Histogram, cms, nil, nil, statistics.Version2, 1, false, false) + err = h.SaveStatsToStorage(tableInfo.ID, 1, 0, 0, &stat.Columns[tableInfo.Columns[0].ID].Histogram, cms, nil, nil, statistics.Version2, 1, false, false) require.NoError(t, err) require.Nil(t, h.Update(is)) diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index c213338d44b8e..fa49b87560e7d 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -383,12 +383,13 @@ func (h *Handle) UpdateSessionVar() error { // In the column statistics, the variable `num` is equal to the number of columns in the partition table. // In the index statistics, the variable `num` is always equal to one. type GlobalStats struct { - Num int - Count int64 - Hg []*statistics.Histogram - Cms []*statistics.CMSketch - TopN []*statistics.TopN - Fms []*statistics.FMSketch + Num int + Count int64 + ModifyCount int64 + Hg []*statistics.Histogram + Cms []*statistics.CMSketch + TopN []*statistics.TopN + Fms []*statistics.FMSketch } // MergePartitionStats2GlobalStatsByTableID merge the partition-level stats to global-level stats based on the tableID. @@ -472,7 +473,7 @@ func (h *Handle) mergePartitionStats2GlobalStats(sc sessionctx.Context, opts map return } for i := 0; i < globalStats.Num; i++ { - count, hg, cms, topN, fms := partitionStats.GetStatsInfo(histIDs[i], isIndex == 1) + _, hg, cms, topN, fms := partitionStats.GetStatsInfo(histIDs[i], isIndex == 1) // partition stats is not empty but column stats(hist, topn) is missing if partitionStats.Count > 0 && (hg == nil || hg.TotalRowCount() <= 0) && (topN == nil || topN.TotalCount() <= 0) { var errMsg string @@ -486,7 +487,8 @@ func (h *Handle) mergePartitionStats2GlobalStats(sc sessionctx.Context, opts map } if i == 0 { // In a partition, we will only update globalStats.Count once - globalStats.Count += count + globalStats.Count += partitionStats.Count + globalStats.ModifyCount += partitionStats.ModifyCount } allHg[i] = append(allHg[i], hg) allCms[i] = append(allCms[i], cms) @@ -1197,8 +1199,10 @@ func (h *Handle) SaveTableStatsToStorage(results *statistics.AnalyzeResults, nee } // SaveStatsToStorage saves the stats to storage. +// If count is negative, both count and modify count would not be used and not be written to the table. Unless, corresponding +// fields in the stats_meta table will be updated. // TODO: refactor to reduce the number of parameters -func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg *statistics.Histogram, cms *statistics.CMSketch, topN *statistics.TopN, fms *statistics.FMSketch, statsVersion int, isAnalyzed int64, needDumpFMS bool, updateAnalyzeTime bool) (err error) { +func (h *Handle) SaveStatsToStorage(tableID int64, count, modifyCount int64, isIndex int, hg *statistics.Histogram, cms *statistics.CMSketch, topN *statistics.TopN, fms *statistics.FMSketch, statsVersion int, isAnalyzed int64, needDumpFMS bool, updateAnalyzeTime bool) (err error) { statsVer := uint64(0) defer func() { if err == nil && statsVer != 0 { @@ -1224,7 +1228,7 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg version := txn.StartTS() // If the count is less than 0, then we do not want to update the modify count and count. if count >= 0 { - _, err = exec.ExecuteInternal(ctx, "replace into mysql.stats_meta (version, table_id, count) values (%?, %?, %?)", version, tableID, count) + _, err = exec.ExecuteInternal(ctx, "replace into mysql.stats_meta (version, table_id, count, modify_count) values (%?, %?, %?, %?)", version, tableID, count, modifyCount) } else { _, err = exec.ExecuteInternal(ctx, "update mysql.stats_meta set version = %? where table_id = %?", version, tableID) } diff --git a/statistics/handle/handle_test.go b/statistics/handle/handle_test.go index ce77cce0c9238..c8c493f8ba87c 100644 --- a/statistics/handle/handle_test.go +++ b/statistics/handle/handle_test.go @@ -1635,6 +1635,7 @@ partition by range (a) ( partition p0 values less than (10), partition p1 values less than (20) )`) + require.NoError(t, dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh())) tk.MustExec("insert into t values (1), (5), (null), (11), (15)") require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll)) @@ -1670,14 +1671,15 @@ partition by range (a) ( require.NoError(t, err) tableInfo := tbl.Meta() globalStats := h.GetTableStats(tableInfo) - // global.count = p0.count(3) + p1.count(2) + p2.count(2) - // We did not analyze partition p1, so the value here has not changed - require.Equal(t, int64(7), globalStats.Count) + // global.count = p0.count(3) + p1.count(4) + p2.count(2) + // modify count is 2 because we didn't analyze p1 after the second insert + require.Equal(t, int64(9), globalStats.Count) + require.Equal(t, int64(2), globalStats.ModifyCount) tk.MustExec("analyze table t partition p1;") globalStats = h.GetTableStats(tableInfo) // global.count = p0.count(3) + p1.count(4) + p2.count(4) - // The value of p1.Count is correct now. + // The value of modify count is 0 now. require.Equal(t, int64(9), globalStats.Count) require.Equal(t, int64(0), globalStats.ModifyCount) diff --git a/statistics/handle/update.go b/statistics/handle/update.go index d66e5afab79f5..e76ee7d5a0a08 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -882,7 +882,7 @@ func (h *Handle) deleteOutdatedFeedback(tableID, histID, isIndex int64) error { func (h *Handle) dumpStatsUpdateToKV(tableID, isIndex int64, q *statistics.QueryFeedback, hist *statistics.Histogram, cms *statistics.CMSketch, topN *statistics.TopN, fms *statistics.FMSketch, statsVersion int64) error { hist = statistics.UpdateHistogram(hist, q, int(statsVersion)) // feedback for partition is not ready. - err := h.SaveStatsToStorage(tableID, -1, int(isIndex), hist, cms, topN, fms, int(statsVersion), 0, false, false) + err := h.SaveStatsToStorage(tableID, -1, 0, int(isIndex), hist, cms, topN, fms, int(statsVersion), 0, false, false) metrics.UpdateStatsCounter.WithLabelValues(metrics.RetLabel(err)).Inc() return errors.Trace(err) }