diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result index f00603e25c4ad..b0723109e8eb1 100644 --- a/cmd/explaintest/r/explain_complex_stats.result +++ b/cmd/explaintest/r/explain_complex_stats.result @@ -158,11 +158,11 @@ Projection_5 39.28 root test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st. └─TableScan_14 160.23 cop table:st, keep order:false explain select dt.id as id, dt.aid as aid, dt.pt as pt, dt.dic as dic, dt.cm as cm, rr.gid as gid, rr.acd as acd, rr.t as t,dt.p1 as p1, dt.p2 as p2, dt.p3 as p3, dt.p4 as p4, dt.p5 as p5, dt.p6_md5 as p6, dt.p7_md5 as p7 from dt dt join rr rr on (rr.pt = 'ios' and rr.t > 1478185592 and dt.aid = rr.aid and dt.dic = rr.dic) where dt.pt = 'ios' and dt.t > 1478185592 and dt.bm = 0 limit 2000; id count task operator info -Projection_9 428.55 root dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5 -└─Limit_12 428.55 root offset:0, count:2000 - └─IndexJoin_18 428.55 root inner join, inner:IndexLookUp_17, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic - ├─TableReader_42 428.55 root data:Selection_41 - │ └─Selection_41 428.55 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592) +Projection_9 428.32 root dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5 +└─Limit_12 428.32 root offset:0, count:2000 + └─IndexJoin_18 428.32 root inner join, inner:IndexLookUp_17, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic + ├─TableReader_42 428.32 root data:Selection_41 + │ └─Selection_41 428.32 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592) │ └─TableScan_40 2000.00 cop table:dt, range:[0,+inf], keep order:false └─IndexLookUp_17 970.00 root ├─IndexScan_14 1.00 cop table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result index b93c0e4b6f3fd..73cbf7877b685 100644 --- a/cmd/explaintest/r/explain_easy_stats.result +++ b/cmd/explaintest/r/explain_easy_stats.result @@ -47,10 +47,10 @@ explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1; id count task operator info Projection_6 2481.25 root test.t1.c1, test.t1.c2, test.t1.c3, test.t2.c1, test.t2.c2 └─MergeJoin_7 2481.25 root left outer join, left key:test.t1.c2, right key:test.t2.c1 - ├─IndexLookUp_17 1999.00 root - │ ├─Selection_16 1999.00 cop gt(test.t1.c1, 1) + ├─IndexLookUp_17 1998.00 root + │ ├─Selection_16 1998.00 cop gt(test.t1.c1, 1) │ │ └─IndexScan_14 1999.00 cop table:t1, index:c2, range:[NULL,+inf], keep order:true - │ └─TableScan_15 1999.00 cop table:t1, keep order:false + │ └─TableScan_15 1998.00 cop table:t1, keep order:false └─IndexLookUp_21 1985.00 root ├─IndexScan_19 1985.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true └─TableScan_20 1985.00 cop table:t2, keep order:false diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index 1efe1d727b6ec..25c5be78f56b2 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -251,7 +251,7 @@ limit 10; id count task operator info Projection_14 10.00 root tpch.lineitem.l_orderkey, 7_col_0, tpch.orders.o_orderdate, tpch.orders.o_shippriority └─TopN_17 10.00 root 7_col_0:desc, tpch.orders.o_orderdate:asc, offset:0, count:10 - └─HashAgg_20 40256361.71 root group by:tpch.lineitem.l_orderkey, tpch.orders.o_orderdate, tpch.orders.o_shippriority, funcs:sum(mul(tpch.lineitem.l_extendedprice, minus(1, tpch.lineitem.l_discount))), firstrow(tpch.orders.o_orderdate), firstrow(tpch.orders.o_shippriority), firstrow(tpch.lineitem.l_orderkey) + └─HashAgg_20 40227041.09 root group by:tpch.lineitem.l_orderkey, tpch.orders.o_orderdate, tpch.orders.o_shippriority, funcs:sum(mul(tpch.lineitem.l_extendedprice, minus(1, tpch.lineitem.l_discount))), firstrow(tpch.orders.o_orderdate), firstrow(tpch.orders.o_shippriority), firstrow(tpch.lineitem.l_orderkey) └─IndexJoin_26 91515927.49 root inner join, inner:IndexLookUp_25, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey ├─HashRightJoin_46 22592975.51 root inner join, inner:TableReader_52, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] │ ├─TableReader_52 1498236.00 root data:Selection_51 @@ -260,9 +260,9 @@ Projection_14 10.00 root tpch.lineitem.l_orderkey, 7_col_0, tpch.orders.o_orderd │ └─TableReader_49 36870000.00 root data:Selection_48 │ └─Selection_48 36870000.00 cop lt(tpch.orders.o_orderdate, 1995-03-13 00:00:00.000000) │ └─TableScan_47 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false - └─IndexLookUp_25 163063881.42 root + └─IndexLookUp_25 162945114.27 root ├─IndexScan_22 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false - └─Selection_24 163063881.42 cop gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000) + └─Selection_24 162945114.27 cop gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000) └─TableScan_23 1.00 cop table:lineitem, keep order:false /* Q4 Order Priority Checking Query @@ -922,13 +922,13 @@ p_brand, p_type, p_size; id count task operator info -Sort_13 15.00 root supplier_cnt:desc, tpch.part.p_brand:asc, tpch.part.p_type:asc, tpch.part.p_size:asc -└─Projection_14 15.00 root tpch.part.p_brand, tpch.part.p_type, tpch.part.p_size, 9_col_0 - └─HashAgg_17 15.00 root group by:tpch.part.p_brand, tpch.part.p_size, tpch.part.p_type, funcs:count(distinct tpch.partsupp.ps_suppkey), firstrow(tpch.part.p_brand), firstrow(tpch.part.p_type), firstrow(tpch.part.p_size) - └─HashLeftJoin_22 4022816.68 root anti semi join, inner:TableReader_46, equal:[eq(tpch.partsupp.ps_suppkey, tpch.supplier.s_suppkey)] - ├─IndexJoin_26 5028520.85 root inner join, inner:IndexReader_25, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey - │ ├─TableReader_41 1249969.60 root data:Selection_40 - │ │ └─Selection_40 1249969.60 cop in(tpch.part.p_size, 48, 19, 12, 4, 41, 7, 21, 39), ne(tpch.part.p_brand, "Brand#34"), not(like(tpch.part.p_type, "LARGE BRUSHED%", 92)) +Sort_13 14.41 root supplier_cnt:desc, tpch.part.p_brand:asc, tpch.part.p_type:asc, tpch.part.p_size:asc +└─Projection_14 14.41 root tpch.part.p_brand, tpch.part.p_type, tpch.part.p_size, 9_col_0 + └─HashAgg_17 14.41 root group by:tpch.part.p_brand, tpch.part.p_size, tpch.part.p_type, funcs:count(distinct tpch.partsupp.ps_suppkey), firstrow(tpch.part.p_brand), firstrow(tpch.part.p_type), firstrow(tpch.part.p_size) + └─HashLeftJoin_22 3863988.24 root anti semi join, inner:TableReader_46, equal:[eq(tpch.partsupp.ps_suppkey, tpch.supplier.s_suppkey)] + ├─IndexJoin_26 4829985.30 root inner join, inner:IndexReader_25, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey + │ ├─TableReader_41 1200618.43 root data:Selection_40 + │ │ └─Selection_40 1200618.43 cop in(tpch.part.p_size, 48, 19, 12, 4, 41, 7, 21, 39), ne(tpch.part.p_brand, "Brand#34"), not(like(tpch.part.p_type, "LARGE BRUSHED%", 92)) │ │ └─TableScan_39 10000000.00 cop table:part, range:[-inf,+inf], keep order:false │ └─IndexReader_25 1.00 root index:IndexScan_24 │ └─IndexScan_24 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false diff --git a/statistics/ddl_test.go b/statistics/ddl_test.go index a226eb97ae9e9..b5906e7224c18 100644 --- a/statistics/ddl_test.go +++ b/statistics/ddl_test.go @@ -137,7 +137,7 @@ func (s *testStatsCacheSuite) TestDDLHistogram(c *C) { c.Assert(count, Equals, float64(2)) count, err = statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(1), tableInfo.Columns[3].ID) c.Assert(err, IsNil) - c.Assert(count, Equals, float64(2)) + c.Assert(count, Equals, float64(0)) testKit.MustExec("alter table t add column c4 datetime NOT NULL default CURRENT_TIMESTAMP") err = h.HandleDDLEvent(<-h.DDLEventCh()) diff --git a/statistics/histogram.go b/statistics/histogram.go index ca52273b20e1a..56ae7c104bf35 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -729,7 +729,7 @@ func (c *Column) String() string { return c.Histogram.ToString(0) } -func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum) (float64, error) { +func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, modifyCount int64) (float64, error) { if val.IsNull() { return float64(c.NullCount), nil } @@ -738,7 +738,7 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum) (f return 0.0, nil } if c.NDV > 0 && c.outOfRange(val) { - return c.totalRowCount() / (float64(c.NDV)), nil + return float64(modifyCount) / float64(c.NDV), nil } if c.CMSketch != nil { count, err := c.CMSketch.queryValue(sc, val) @@ -759,7 +759,7 @@ func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*range // the point case. if !rg.LowExclude && !rg.HighExclude { var cnt float64 - cnt, err = c.equalRowCount(sc, rg.LowVal[0]) + cnt, err = c.equalRowCount(sc, rg.LowVal[0], modifyCount) if err != nil { return 0, errors.Trace(err) } @@ -773,14 +773,14 @@ func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*range cnt += float64(modifyCount) / outOfRangeBetweenRate } if rg.LowExclude { - lowCnt, err := c.equalRowCount(sc, rg.LowVal[0]) + lowCnt, err := c.equalRowCount(sc, rg.LowVal[0], modifyCount) if err != nil { return 0, errors.Trace(err) } cnt -= lowCnt } if !rg.HighExclude { - highCnt, err := c.equalRowCount(sc, rg.HighVal[0]) + highCnt, err := c.equalRowCount(sc, rg.HighVal[0], modifyCount) if err != nil { return 0, errors.Trace(err) } @@ -809,10 +809,10 @@ func (idx *Index) String() string { return idx.Histogram.ToString(len(idx.Info.Columns)) } -func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte) float64 { +func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte, modifyCount int64) float64 { val := types.NewBytesDatum(b) if idx.NDV > 0 && idx.outOfRange(val) { - return idx.totalRowCount() / (float64(idx.NDV)) + return float64(modifyCount) / (float64(idx.NDV)) } if idx.CMSketch != nil { return float64(idx.CMSketch.QueryBytes(b)) @@ -834,7 +834,7 @@ func (idx *Index) getRowCount(sc *stmtctx.StatementContext, indexRanges []*range fullLen := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == len(idx.Info.Columns) if fullLen && bytes.Equal(lb, rb) { if !indexRange.LowExclude && !indexRange.HighExclude { - totalCount += idx.equalRowCount(sc, lb) + totalCount += idx.equalRowCount(sc, lb, modifyCount) } continue } diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 77225890c2fca..0a2a4e13c07a9 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -158,7 +158,7 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) { }, { exprs: "a >= 1 and b > 1 and a < 2", - selectivity: 0.01817558299, + selectivity: 0.01783264746, }, { exprs: "a >= 1 and c > 1 and a < 2", @@ -174,7 +174,7 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) { }, { exprs: "b > 1", - selectivity: 0.98148148148, + selectivity: 0.96296296296, }, { exprs: "a > 1 and b < 2 and c > 3 and d < 4 and e > 5", @@ -304,6 +304,24 @@ func (s *testSelectivitySuite) TestEstimationForUnknownValues(c *C) { count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(1, 30)) c.Assert(err, IsNil) c.Assert(count, Equals, 0.0) + + testKit.MustExec("drop table t") + testKit.MustExec("create table t(a int, b int, index idx(b))") + testKit.MustExec("insert into t values (1,1)") + testKit.MustExec("analyze table t") + table, err = s.dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + c.Assert(err, IsNil) + statsTbl = h.GetTableStats(table.Meta()) + + colID = table.Meta().Columns[0].ID + count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(2, 2)) + c.Assert(err, IsNil) + c.Assert(count, Equals, 0.0) + + idxID = table.Meta().Indices[0].ID + count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(2, 2)) + c.Assert(err, IsNil) + c.Assert(count, Equals, 0.0) } func BenchmarkSelectivity(b *testing.B) { diff --git a/statistics/table.go b/statistics/table.go index 3592b95fea1ef..1d05d1b284e6d 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -394,7 +394,7 @@ func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Da return float64(t.Count) / pseudoEqualRate, nil } c := t.Columns[colID] - result, err := c.equalRowCount(sc, value) + result, err := c.equalRowCount(sc, value, t.ModifyCount) result *= c.getIncreaseFactor(t.Count) return result, errors.Trace(err) } @@ -551,7 +551,7 @@ func (coll *HistColl) getIndexRowCount(sc *stmtctx.StatementContext, idxID int64 // so we use heuristic methods to estimate the selectivity. if idx.NDV > 0 && len(ran.LowVal) == len(idx.Info.Columns) && rangePosition == len(ran.LowVal) { // for equality queries - selectivity = 1.0 / float64(idx.NDV) + selectivity = float64(coll.ModifyCount) / float64(idx.NDV) / idx.totalRowCount() } else { // for range queries selectivity = float64(coll.ModifyCount) / outOfRangeBetweenRate / idx.totalRowCount()