Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plan: push down constant filters over join properly #9848

Merged
merged 3 commits into from
Apr 4, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions cmd/explaintest/r/explain_easy.result
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,50 @@ Projection_11 10000.00 root 9_aux_0
│ └─TableScan_27 10.00 cop table:t, keep order:false, stats:pseudo
└─TableReader_33 1.00 root data:TableScan_32
└─TableScan_32 1.00 cop table:t1, range: decided by [s.c], keep order:false, stats:pseudo
insert into t values(1, 1, 1), (2, 2 ,2), (3, 3, 3), (4, 3, 4),(5,3,5);
analyze table t;
explain select t.c in (select count(*) from t s, t t1 where s.b = t.a and s.b = 3 and s.a = t1.a) from t;
id count task operator info
Projection_11 5.00 root 9_aux_0
└─Apply_13 5.00 root left outer semi join, inner:StreamAgg_20, other cond:eq(test.t.c, 7_col_0)
├─TableReader_15 5.00 root data:TableScan_14
│ └─TableScan_14 5.00 cop table:t, range:[-inf,+inf], keep order:false
└─StreamAgg_20 1.00 root funcs:count(1)
└─IndexJoin_49 2.40 root inner join, inner:TableReader_48, outer key:s.a, inner key:t1.a
├─IndexReader_41 2.40 root index:Selection_40
│ └─Selection_40 2.40 cop eq(3, test.t.a)
│ └─IndexScan_39 3.00 cop table:s, index:b, range:[3,3], keep order:false
└─TableReader_48 0.80 root data:Selection_47
└─Selection_47 0.80 cop eq(3, test.t.a)
└─TableScan_46 1.00 cop table:t1, range: decided by [s.a], keep order:false
explain select t.c in (select count(*) from t s left join t t1 on s.a = t1.a where 3 = t.a and s.b = 3) from t;
id count task operator info
Projection_10 5.00 root 9_aux_0
└─Apply_12 5.00 root left outer semi join, inner:StreamAgg_19, other cond:eq(test.t.c, 7_col_0)
├─TableReader_14 5.00 root data:TableScan_13
│ └─TableScan_13 5.00 cop table:t, range:[-inf,+inf], keep order:false
└─StreamAgg_19 1.00 root funcs:count(1)
└─IndexJoin_43 2.40 root left outer join, inner:TableReader_42, outer key:s.a, inner key:t1.a
├─IndexReader_35 2.40 root index:Selection_34
│ └─Selection_34 2.40 cop eq(3, test.t.a)
│ └─IndexScan_33 3.00 cop table:s, index:b, range:[3,3], keep order:false
└─TableReader_42 0.80 root data:Selection_41
└─Selection_41 0.80 cop eq(3, test.t.a)
└─TableScan_40 1.00 cop table:t1, range: decided by [s.a], keep order:false
explain select t.c in (select count(*) from t s right join t t1 on s.a = t1.a where 3 = t.a and t1.b = 3) from t;
id count task operator info
Projection_10 5.00 root 9_aux_0
└─Apply_12 5.00 root left outer semi join, inner:StreamAgg_19, other cond:eq(test.t.c, 7_col_0)
├─TableReader_14 5.00 root data:TableScan_13
│ └─TableScan_13 5.00 cop table:t, range:[-inf,+inf], keep order:false
└─StreamAgg_19 1.00 root funcs:count(1)
└─IndexJoin_43 2.40 root right outer join, inner:TableReader_42, outer key:t1.a, inner key:s.a
├─TableReader_42 0.80 root data:Selection_41
│ └─Selection_41 0.80 cop eq(3, test.t.a)
│ └─TableScan_40 1.00 cop table:s, range: decided by [t1.a], keep order:false
└─IndexReader_35 2.40 root index:Selection_34
└─Selection_34 2.40 cop eq(3, test.t.a)
└─IndexScan_33 3.00 cop table:t1, index:b, range:[3,3], keep order:false
drop table if exists t;
create table t(a int unsigned);
explain select t.a = '123455' from t;
Expand Down
6 changes: 6 additions & 0 deletions cmd/explaintest/t/explain_easy.test
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ explain select t.c in (select count(*) from t s ignore index(idx), t t1 where s.
explain select t.c in (select count(*) from t s use index(idx), t t1 where s.b = t.a and s.a = t1.a) from t;
explain select t.c in (select count(*) from t s use index(idx), t t1 where s.b = t.a and s.c = t1.a) from t;

insert into t values(1, 1, 1), (2, 2 ,2), (3, 3, 3), (4, 3, 4),(5,3,5);
analyze table t;
explain select t.c in (select count(*) from t s, t t1 where s.b = t.a and s.b = 3 and s.a = t1.a) from t;
explain select t.c in (select count(*) from t s left join t t1 on s.a = t1.a where 3 = t.a and s.b = 3) from t;
explain select t.c in (select count(*) from t s right join t t1 on s.a = t1.a where 3 = t.a and t1.b = 3) from t;

drop table if exists t;
create table t(a int unsigned);
explain select t.a = '123455' from t;
Expand Down
8 changes: 7 additions & 1 deletion expression/constant_propagation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,13 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) {
"│ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
"└─TableDual_9 8000.00 root rows:0",
))
tk.MustQuery("explain select * from t1 left join t2 on t1.a =1 and t1.a = 2;").Check(testkit.Rows(
tk.MustQuery("explain select * from t1 right join t2 on false;").Check(testkit.Rows(
"HashRightJoin_6 80000000.00 root right outer join, inner:TableDual_7",
"├─TableDual_7 8000.00 root rows:0",
"└─TableReader_9 10000.00 root data:TableScan_8",
" └─TableScan_8 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
))
tk.MustQuery("explain select * from t1 left join t2 on t1.a = 1 and t1.a = 2;").Check(testkit.Rows(
"HashLeftJoin_6 80000000.00 root left outer join, inner:TableDual_9",
"├─TableReader_8 10000.00 root data:TableScan_7",
"│ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
Expand Down
35 changes: 35 additions & 0 deletions planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,35 @@ func (b *PlanBuilder) buildResultSetNode(node ast.ResultSetNode) (p LogicalPlan,
}
}

// pushDownConstExpr checks if the condition is from filter condition, if true, push it down to both
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does filter here mean?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To differentiate the where conditions from join conditions, I use filter condition here. I can change it to another name if it is misleading.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it should be the conditions exclude the join key conditions? If so, can deriveLeft || deriveRight match its meaning?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extractOnCondition is called by PredicatePushDown and atttachOnConds, in PredicatePushDown, the input conditions are treated as filter conditions, and at least deriveLeft or deriveRight is true; in attachOnConds, the input conditions are join conditions, and deriveLeft and deriveRight are both false, so it should be correct?

// children of join, whatever the join type is; if false, push it down to inner child of outer join,
// and both children of non-outer-join.
func (p *LogicalJoin) pushDownConstExpr(expr expression.Expression, leftCond []expression.Expression,
rightCond []expression.Expression, filterCond bool) ([]expression.Expression, []expression.Expression) {
switch p.JoinType {
case LeftOuterJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin:
if filterCond {
leftCond = append(leftCond, expr)
// Append the expr to right join condition instead of `rightCond`, to make it able to be
// pushed down to children of join.
p.RightConditions = append(p.RightConditions, expr)
zz-jason marked this conversation as resolved.
Show resolved Hide resolved
} else {
rightCond = append(rightCond, expr)
}
case RightOuterJoin:
if filterCond {
rightCond = append(rightCond, expr)
p.LeftConditions = append(p.LeftConditions, expr)
} else {
leftCond = append(leftCond, expr)
}
case SemiJoin, AntiSemiJoin, InnerJoin:
leftCond = append(leftCond, expr)
rightCond = append(rightCond, expr)
}
return leftCond, rightCond
}

// extractOnCondition divide conditions in CNF of join node into 4 groups.
// These conditions can be where conditions, join conditions, or collection of both.
// If deriveLeft/deriveRight is set, we would try to derive more conditions for left/right plan.
Expand Down Expand Up @@ -233,6 +262,12 @@ func (p *LogicalJoin) extractOnCondition(conditions []expression.Expression, der
}
}
columns := expression.ExtractColumns(expr)
// `columns` may be empty, if the condition is like `correlated_column op constant`, or `constant`,
// push this kind of constant condition down according to join type.
if len(columns) == 0 {
leftCond, rightCond = p.pushDownConstExpr(expr, leftCond, rightCond, deriveLeft || deriveRight)
continue
}
allFromLeft, allFromRight := true, true
for _, col := range columns {
if !left.Schema().Contains(col) {
Expand Down
2 changes: 1 addition & 1 deletion planner/core/logical_plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,7 @@ func (s *testPlanSuite) TestJoinReOrder(c *C) {
},
{
sql: "select * from t o where o.b in (select t3.c from t t1, t t2, t t3 where t1.a = t3.a and t2.a = t3.a and t2.a = o.a and t1.a = 1)",
best: "Apply{DataScan(o)->Join{Join{DataScan(t3)->DataScan(t1)}->DataScan(t2)}->Projection}->Projection",
best: "Apply{DataScan(o)->Join{Join{DataScan(t1)->DataScan(t2)}->DataScan(t3)}->Projection}->Projection",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@winoros Please confirm this join order change does not effect correctness.

Copy link
Member

@winoros winoros Mar 27, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

THe join order changed since some filter pushed before but now is not.

},
}
for _, tt := range tests {
Expand Down