Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics, executor: merge Count and ModifyCount for global stats #39014

Merged
merged 9 commits into from
Nov 11, 2022
12 changes: 11 additions & 1 deletion executor/analyze_global_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,17 @@ func (e *AnalyzeExec) handleGlobalStats(ctx context.Context, needGlobalStats boo
for i := 0; i < globalStats.Num; i++ {
hg, cms, topN := globalStats.Hg[i], globalStats.Cms[i], globalStats.TopN[i]
// fms for global stats doesn't need to dump to kv.
err = statsHandle.SaveStatsToStorage(globalStatsID.tableID, globalStats.Count, info.isIndex, hg, cms, topN, info.statsVersion, 1, true)
err = statsHandle.SaveStatsToStorage(globalStatsID.tableID,
globalStats.Count,
globalStats.ModifyCount,
info.isIndex,
hg,
cms,
topN,
info.statsVersion,
1,
true,
)
if err != nil {
logutil.Logger(ctx).Error("save global-level stats to storage failed", zap.Error(err))
}
Expand Down
4 changes: 2 additions & 2 deletions statistics/handle/ddl.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func (h *Handle) updateGlobalStats(tblInfo *model.TableInfo) error {
for i := 0; i < newColGlobalStats.Num; i++ {
hg, cms, topN := newColGlobalStats.Hg[i], newColGlobalStats.Cms[i], newColGlobalStats.TopN[i]
// fms for global stats doesn't need to dump to kv.
err = h.SaveStatsToStorage(tableID, newColGlobalStats.Count, 0, hg, cms, topN, 2, 1, false)
err = h.SaveStatsToStorage(tableID, newColGlobalStats.Count, newColGlobalStats.ModifyCount, 0, hg, cms, topN, 2, 1, false)
if err != nil {
return err
}
Expand Down Expand Up @@ -186,7 +186,7 @@ func (h *Handle) updateGlobalStats(tblInfo *model.TableInfo) error {
for i := 0; i < newIndexGlobalStats.Num; i++ {
hg, cms, topN := newIndexGlobalStats.Hg[i], newIndexGlobalStats.Cms[i], newIndexGlobalStats.TopN[i]
// fms for global stats doesn't need to dump to kv.
err = h.SaveStatsToStorage(tableID, newIndexGlobalStats.Count, 1, hg, cms, topN, 2, 1, false)
err = h.SaveStatsToStorage(tableID, newIndexGlobalStats.Count, newIndexGlobalStats.ModifyCount, 1, hg, cms, topN, 2, 1, false)
if err != nil {
return err
}
Expand Down
8 changes: 6 additions & 2 deletions statistics/handle/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,18 @@ func (h *Handle) loadStatsFromJSON(tableInfo *model.TableInfo, physicalID int64,

for _, col := range tbl.Columns {
// loadStatsFromJSON doesn't support partition table now.
err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, &col.Histogram, col.CMSketch, col.TopN, int(col.StatsVer), 1, false)
// The table level Count and Modify_count would be overridden by the SaveMetaToStorage below, so we don't need
// to care about them here.
err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, 0, &col.Histogram, col.CMSketch, col.TopN, int(col.StatsVer), 1, false)
if err != nil {
return errors.Trace(err)
}
}
for _, idx := range tbl.Indices {
// loadStatsFromJSON doesn't support partition table now.
err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 1, &idx.Histogram, idx.CMSketch, idx.TopN, int(idx.StatsVer), 1, false)
// The table level Count and Modify_count would be overridden by the SaveMetaToStorage below, so we don't need
// to care about them here.
err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, 1, &idx.Histogram, idx.CMSketch, idx.TopN, int(idx.StatsVer), 1, false)
if err != nil {
return errors.Trace(err)
}
Expand Down
2 changes: 1 addition & 1 deletion statistics/handle/dump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ func TestDumpCMSketchWithTopN(t *testing.T) {
cms, _, _, _ := statistics.NewCMSketchAndTopN(5, 2048, fakeData, 20, 100)

stat := h.GetTableStats(tableInfo)
err = h.SaveStatsToStorage(tableInfo.ID, 1, 0, &stat.Columns[tableInfo.Columns[0].ID].Histogram, cms, nil, statistics.Version2, 1, false)
err = h.SaveStatsToStorage(tableInfo.ID, 1, 0, 0, &stat.Columns[tableInfo.Columns[0].ID].Histogram, cms, nil, statistics.Version2, 1, false)
require.NoError(t, err)
require.Nil(t, h.Update(is))

Expand Down
24 changes: 14 additions & 10 deletions statistics/handle/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,12 +399,13 @@ func (h *Handle) UpdateSessionVar() error {
// In the column statistics, the variable `num` is equal to the number of columns in the partition table.
// In the index statistics, the variable `num` is always equal to one.
type GlobalStats struct {
Num int
Count int64
Hg []*statistics.Histogram
Cms []*statistics.CMSketch
TopN []*statistics.TopN
Fms []*statistics.FMSketch
Num int
Count int64
ModifyCount int64
Hg []*statistics.Histogram
Cms []*statistics.CMSketch
TopN []*statistics.TopN
Fms []*statistics.FMSketch
}

// MergePartitionStats2GlobalStatsByTableID merge the partition-level stats to global-level stats based on the tableID.
Expand Down Expand Up @@ -513,7 +514,7 @@ func (h *Handle) mergePartitionStats2GlobalStats(sc sessionctx.Context,
allPartitionStats[partitionID] = partitionStats
}
for i := 0; i < globalStats.Num; i++ {
count, hg, cms, topN, fms := partitionStats.GetStatsInfo(histIDs[i], isIndex == 1)
_, hg, cms, topN, fms := partitionStats.GetStatsInfo(histIDs[i], isIndex == 1)
// partition stats is not empty but column stats(hist, topn) is missing
if partitionStats.Count > 0 && (hg == nil || hg.TotalRowCount() <= 0) && (topN == nil || topN.TotalCount() <= 0) {
var errMsg string
Expand All @@ -527,7 +528,8 @@ func (h *Handle) mergePartitionStats2GlobalStats(sc sessionctx.Context,
}
if i == 0 {
// In a partition, we will only update globalStats.Count once
globalStats.Count += count
globalStats.Count += partitionStats.Count
globalStats.ModifyCount += partitionStats.ModifyCount
}
allHg[i] = append(allHg[i], hg)
allCms[i] = append(allCms[i], cms)
Expand Down Expand Up @@ -1532,8 +1534,10 @@ func SaveTableStatsToStorage(sctx sessionctx.Context, results *statistics.Analyz
}

// SaveStatsToStorage saves the stats to storage.
// If count is negative, both count and modify count would not be used and not be written to the table. Unless, corresponding
// fields in the stats_meta table will be updated.
// TODO: refactor to reduce the number of parameters
func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg *statistics.Histogram, cms *statistics.CMSketch, topN *statistics.TopN, statsVersion int, isAnalyzed int64, updateAnalyzeTime bool) (err error) {
func (h *Handle) SaveStatsToStorage(tableID int64, count, modifyCount int64, isIndex int, hg *statistics.Histogram, cms *statistics.CMSketch, topN *statistics.TopN, statsVersion int, isAnalyzed int64, updateAnalyzeTime bool) (err error) {
statsVer := uint64(0)
defer func() {
if err == nil && statsVer != 0 {
Expand All @@ -1559,7 +1563,7 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg
version := txn.StartTS()
// If the count is less than 0, then we do not want to update the modify count and count.
if count >= 0 {
_, err = exec.ExecuteInternal(ctx, "replace into mysql.stats_meta (version, table_id, count) values (%?, %?, %?)", version, tableID, count)
_, err = exec.ExecuteInternal(ctx, "replace into mysql.stats_meta (version, table_id, count, modify_count) values (%?, %?, %?, %?)", version, tableID, count, modifyCount)
} else {
_, err = exec.ExecuteInternal(ctx, "update mysql.stats_meta set version = %? where table_id = %?", version, tableID)
}
Expand Down
10 changes: 6 additions & 4 deletions statistics/handle/handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1659,6 +1659,7 @@ partition by range (a) (
partition p0 values less than (10),
partition p1 values less than (20)
)`)
require.NoError(t, dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh()))
tk.MustExec("insert into t values (1), (5), (null), (11), (15)")
require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll))

Expand Down Expand Up @@ -1694,14 +1695,15 @@ partition by range (a) (
require.NoError(t, err)
tableInfo := tbl.Meta()
globalStats := h.GetTableStats(tableInfo)
// global.count = p0.count(3) + p1.count(2) + p2.count(2)
// We did not analyze partition p1, so the value here has not changed
require.Equal(t, int64(7), globalStats.Count)
// global.count = p0.count(3) + p1.count(4) + p2.count(2)
// modify count is 2 because we didn't analyze p1 after the second insert
require.Equal(t, int64(9), globalStats.Count)
require.Equal(t, int64(2), globalStats.ModifyCount)

tk.MustExec("analyze table t partition p1;")
globalStats = h.GetTableStats(tableInfo)
// global.count = p0.count(3) + p1.count(4) + p2.count(4)
// The value of p1.Count is correct now.
// The value of modify count is 0 now.
require.Equal(t, int64(9), globalStats.Count)
require.Equal(t, int64(0), globalStats.ModifyCount)

Expand Down
2 changes: 1 addition & 1 deletion statistics/handle/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,7 @@ func (h *Handle) deleteOutdatedFeedback(tableID, histID, isIndex int64) error {
func (h *Handle) dumpStatsUpdateToKV(tableID, isIndex int64, q *statistics.QueryFeedback, hist *statistics.Histogram, cms *statistics.CMSketch, topN *statistics.TopN, statsVersion int64) error {
hist = statistics.UpdateHistogram(hist, q, int(statsVersion))
// feedback for partition is not ready.
err := h.SaveStatsToStorage(tableID, -1, int(isIndex), hist, cms, topN, int(statsVersion), 0, false)
err := h.SaveStatsToStorage(tableID, -1, 0, int(isIndex), hist, cms, topN, int(statsVersion), 0, false)
metrics.UpdateStatsCounter.WithLabelValues(metrics.RetLabel(err)).Inc()
return errors.Trace(err)
}
Expand Down