From a926d1027f8364ba0113b049822c389f42230a38 Mon Sep 17 00:00:00 2001 From: wjHuang Date: Tue, 31 Jan 2023 20:29:55 +0800 Subject: [PATCH] This is an automated cherry-pick of #38826 Signed-off-by: ti-chi-bot --- ...lation_check_use_collation_disabled.result | 181 ++++++++++++++++ ...llation_check_use_collation_enabled.result | 200 ++++++++++++++++++ cmd/explaintest/r/subquery.result | 29 +++ .../t/collation_check_use_collation.test | 128 +++++++++++ expression/collation.go | 8 - expression/integration_test.go | 9 + expression/util.go | 33 +++ planner/core/exhaust_physical_plans.go | 9 + 8 files changed, 589 insertions(+), 8 deletions(-) create mode 100644 cmd/explaintest/r/collation_check_use_collation_disabled.result create mode 100644 cmd/explaintest/r/collation_check_use_collation_enabled.result create mode 100644 cmd/explaintest/t/collation_check_use_collation.test diff --git a/cmd/explaintest/r/collation_check_use_collation_disabled.result b/cmd/explaintest/r/collation_check_use_collation_disabled.result new file mode 100644 index 0000000000000..9e633133b1f4f --- /dev/null +++ b/cmd/explaintest/r/collation_check_use_collation_disabled.result @@ -0,0 +1,181 @@ +set tidb_cost_model_version=1; +create database collation_check_use_collation; +use collation_check_use_collation; +CREATE TABLE `t` ( +`a` char(10) DEFAULT NULL +); +CREATE TABLE `t1` ( +`a` char(10) COLLATE utf8mb4_general_ci DEFAULT NULL +); +insert into t values ("A"); +insert into t1 values ("a"); +select a as a_col from t where t.a = all (select a collate utf8mb4_general_ci from t1); +a_col +select a as a_col from t where t.a != any (select a collate utf8mb4_general_ci from t1); +a_col +A +select a as a_col from t where t.a <= all (select a collate utf8mb4_general_ci from t1); +a_col +A +select a as a_col from t where t.a <= any (select a collate utf8mb4_general_ci from t1); +a_col +A +select a as a_col from t where t.a = (select a collate utf8mb4_general_ci from t1); +a_col +drop table if exists t; +create table t(a enum('a', 'b'), b varchar(20)); +insert into t values ("a", "b"); +select * from t where a in (a); +a b +a b +drop table if exists t; +create table t(a enum('a', 'b') charset utf8mb4 collate utf8mb4_general_ci, b varchar(20)); +insert into t values ("b", "c"); +insert into t values ("B", "b"); +Error 1265 (01000): Data truncated for column 'a' at row 1 +select * from t where 'B' collate utf8mb4_general_ci in (a); +a b +select * from t where 'B' collate utf8mb4_bin in (a); +a b +select * from t where 'B' collate utf8mb4_bin in (a, b); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "a", 1); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "B", 1); +a b +b c +select * from t where 1 in (a); +a b +select * from t where 2 in (a); +a b +b c +select * from t where 1 in (a, 0); +a b +select * from t where a between 1 and 2; +a b +b c +select * from t where a between 1 and "a"; +a b +select * from t where a between "a" and "b"; +a b +b c +select * from t where 2 between a and "c"; +a b +select * from t where 2 between a and 3; +a b +b c +select * from t where "b" between a and a; +a b +b c +select * from t where "b" collate utf8mb4_bin between a and a; +a b +b c +select * from t where "b" between a and 3; +a b +drop table if exists t; +create table t(a set('a', 'b'), b varchar(20)); +insert into t values ("a", "b"); +select * from t where a in (a); +a b +a b +drop table if exists t; +create table t(a set('a', 'b') charset utf8mb4 collate utf8mb4_general_ci, b varchar(20)); +insert into t values ("b", "c"); +insert into t values ("B", "b"); +Error 1265 (01000): Data truncated for column 'a' at row 1 +select * from t where 'B' collate utf8mb4_general_ci in (a); +a b +select * from t where 'B' collate utf8mb4_bin in (a); +a b +select * from t where 'B' collate utf8mb4_bin in (a, b); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "a", 1); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "B", 1); +a b +b c +select * from t where 1 in (a); +a b +select * from t where 2 in (a); +a b +b c +select * from t where 1 in (a, 0); +a b +select * from t where a between 1 and 2; +a b +b c +select * from t where a between 1 and "a"; +a b +select * from t where a between "a" and "b"; +a b +b c +select * from t where 2 between a and "c"; +a b +select * from t where 2 between a and 3; +a b +b c +select * from t where "b" between a and a; +a b +b c +select * from t where "b" collate utf8mb4_bin between a and a; +a b +b c +select * from t where "b" between a and 3; +a b +drop table if exists tbl_2; +create table tbl_2 ( col_20 bigint not null , col_21 smallint not null , col_22 decimal(24,10) default null , col_23 tinyint default 71 not null , col_24 bigint not null , col_25 tinyint default 18 , col_26 varchar(330) collate utf8_bin not null , col_27 char(77) collate utf8mb4_unicode_ci , col_28 char(46) collate utf8_general_ci not null , col_29 smallint unsigned not null , primary key idx_13 ( col_27(5) ) , key idx_14 ( col_24 ) , unique key idx_15 ( col_23,col_21,col_28,col_29,col_24 ) ) collate utf8_bin ; +insert ignore into tbl_2 values ( 5888267793391993829,5371,94.63,-109,5728076076919247337,89,'WUicqUTgdGJcjbC','SapBPqczTWWSN','xUSwH',49462 ); +select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); +col_25 +select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); +col_25 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)); +create table t2(b binary(20), c binary(20)); +insert into t1 value('-1'); +insert into t2 value(0x2D31, 0x67); +insert into t2 value(0x2D31, 0x73); +select a from t1, t2 where t1.a between t2.b and t2.c; +a +select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c; +a +-1 +-1 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)) collate utf8mb4_general_ci; +create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci; +insert into t1 values ('a'); +insert into t2 values (0x0, 'A'); +select * from t1, t2 where t1.a between t2.b and t2.c; +a b c +insert into t1 values ('-1'); +insert into t2 values (0x2d31, ''); +select * from t1, t2 where t1.a in (t2.b, 3); +a b c +drop table if exists t0; +drop table if exists t1; +CREATE TABLE t0(c0 BOOL, c1 INT); +CREATE TABLE t1 LIKE t0; +CREATE VIEW v0(c0) AS SELECT IS_IPV4(t0.c1) FROM t0, t1; +INSERT INTO t0(c0, c1) VALUES (true, 0); +INSERT INTO t1(c0, c1) VALUES (true, 2); +SELECT v0.c0 FROM v0; +c0 +0 +SELECT (v0.c0)NOT LIKE(BINARY v0.c0) FROM v0; +(v0.c0)NOT LIKE(BINARY v0.c0) +0 +SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); +c0 +desc format='brief' SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); +id estRows task access object operator info +Projection 80000000.00 root is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20)))->Column#7 +└─HashJoin 80000000.00 root CARTESIAN inner join + ├─Selection(Build) 8000.00 root not(like(cast(is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20))), var_string(20)), cast(is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20))), binary(1)), 92)) + │ └─TableReader 10000.00 root data:TableFullScan + │ └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo + └─TableReader(Probe) 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo +use test diff --git a/cmd/explaintest/r/collation_check_use_collation_enabled.result b/cmd/explaintest/r/collation_check_use_collation_enabled.result new file mode 100644 index 0000000000000..3f1113fbcd868 --- /dev/null +++ b/cmd/explaintest/r/collation_check_use_collation_enabled.result @@ -0,0 +1,200 @@ +set tidb_cost_model_version=1; +create database collation_check_use_collation; +use collation_check_use_collation; +CREATE TABLE `t` ( +`a` char(10) DEFAULT NULL +); +CREATE TABLE `t1` ( +`a` char(10) COLLATE utf8mb4_general_ci DEFAULT NULL +); +insert into t values ("A"); +insert into t1 values ("a"); +select a as a_col from t where t.a = all (select a collate utf8mb4_general_ci from t1); +a_col +A +select a as a_col from t where t.a != any (select a collate utf8mb4_general_ci from t1); +a_col +select a as a_col from t where t.a <= all (select a collate utf8mb4_general_ci from t1); +a_col +A +select a as a_col from t where t.a <= any (select a collate utf8mb4_general_ci from t1); +a_col +A +select a as a_col from t where t.a = (select a collate utf8mb4_general_ci from t1); +a_col +A +drop table if exists t; +create table t(a enum('a', 'b'), b varchar(20)); +insert into t values ("a", "b"); +select * from t where a in (a); +a b +a b +drop table if exists t; +create table t(a enum('a', 'b') charset utf8mb4 collate utf8mb4_general_ci, b varchar(20)); +insert into t values ("b", "c"); +insert into t values ("B", "b"); +select * from t where 'B' collate utf8mb4_general_ci in (a); +a b +b c +b b +select * from t where 'B' collate utf8mb4_bin in (a); +a b +select * from t where 'B' collate utf8mb4_bin in (a, b); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "a", 1); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "B", 1); +a b +b c +b b +select * from t where 1 in (a); +a b +select * from t where 2 in (a); +a b +b c +b b +select * from t where 1 in (a, 0); +a b +select * from t where a between 1 and 2; +a b +b c +b b +select * from t where a between 1 and "a"; +a b +select * from t where a between "a" and "b"; +a b +b c +b b +select * from t where 2 between a and "c"; +a b +select * from t where 2 between a and 3; +a b +b c +b b +select * from t where "b" between a and a; +a b +b c +b b +select * from t where "b" collate utf8mb4_bin between a and a; +a b +b c +b b +select * from t where "b" between a and 3; +a b +drop table if exists t; +create table t(a set('a', 'b'), b varchar(20)); +insert into t values ("a", "b"); +select * from t where a in (a); +a b +a b +drop table if exists t; +create table t(a set('a', 'b') charset utf8mb4 collate utf8mb4_general_ci, b varchar(20)); +insert into t values ("b", "c"); +insert into t values ("B", "b"); +select * from t where 'B' collate utf8mb4_general_ci in (a); +a b +b c +b b +select * from t where 'B' collate utf8mb4_bin in (a); +a b +select * from t where 'B' collate utf8mb4_bin in (a, b); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "a", 1); +a b +select * from t where 'B' collate utf8mb4_bin in (a, "B", 1); +a b +b c +b b +select * from t where 1 in (a); +a b +select * from t where 2 in (a); +a b +b c +b b +select * from t where 1 in (a, 0); +a b +select * from t where a between 1 and 2; +a b +b c +b b +select * from t where a between 1 and "a"; +a b +select * from t where a between "a" and "b"; +a b +b c +b b +select * from t where 2 between a and "c"; +a b +select * from t where 2 between a and 3; +a b +b c +b b +select * from t where "b" between a and a; +a b +b c +b b +select * from t where "b" collate utf8mb4_bin between a and a; +a b +b c +b b +select * from t where "b" between a and 3; +a b +drop table if exists tbl_2; +create table tbl_2 ( col_20 bigint not null , col_21 smallint not null , col_22 decimal(24,10) default null , col_23 tinyint default 71 not null , col_24 bigint not null , col_25 tinyint default 18 , col_26 varchar(330) collate utf8_bin not null , col_27 char(77) collate utf8mb4_unicode_ci , col_28 char(46) collate utf8_general_ci not null , col_29 smallint unsigned not null , primary key idx_13 ( col_27(5) ) , key idx_14 ( col_24 ) , unique key idx_15 ( col_23,col_21,col_28,col_29,col_24 ) ) collate utf8_bin ; +insert ignore into tbl_2 values ( 5888267793391993829,5371,94.63,-109,5728076076919247337,89,'WUicqUTgdGJcjbC','SapBPqczTWWSN','xUSwH',49462 ); +select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); +col_25 +89 +select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); +col_25 +89 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)); +create table t2(b binary(20), c binary(20)); +insert into t1 value('-1'); +insert into t2 value(0x2D31, 0x67); +insert into t2 value(0x2D31, 0x73); +select a from t1, t2 where t1.a between t2.b and t2.c; +a +select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c; +a +-1 +-1 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)) collate utf8mb4_general_ci; +create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci; +insert into t1 values ('a'); +insert into t2 values (0x0, 'A'); +select * from t1, t2 where t1.a between t2.b and t2.c; +a b c +insert into t1 values ('-1'); +insert into t2 values (0x2d31, ''); +select * from t1, t2 where t1.a in (t2.b, 3); +a b c +drop table if exists t0; +drop table if exists t1; +CREATE TABLE t0(c0 BOOL, c1 INT); +CREATE TABLE t1 LIKE t0; +CREATE VIEW v0(c0) AS SELECT IS_IPV4(t0.c1) FROM t0, t1; +INSERT INTO t0(c0, c1) VALUES (true, 0); +INSERT INTO t1(c0, c1) VALUES (true, 2); +SELECT v0.c0 FROM v0; +c0 +0 +SELECT (v0.c0)NOT LIKE(BINARY v0.c0) FROM v0; +(v0.c0)NOT LIKE(BINARY v0.c0) +0 +SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); +c0 +desc format='brief' SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); +id estRows task access object operator info +Projection 80000000.00 root is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20)))->Column#7 +└─HashJoin 80000000.00 root CARTESIAN inner join + ├─Selection(Build) 8000.00 root not(like(cast(is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20))), var_string(20)), cast(is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20))), binary(1)), 92)) + │ └─TableReader 10000.00 root data:TableFullScan + │ └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo + └─TableReader(Probe) 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo +use test diff --git a/cmd/explaintest/r/subquery.result b/cmd/explaintest/r/subquery.result index 84bac87bb1d23..4c2b5f3207712 100644 --- a/cmd/explaintest/r/subquery.result +++ b/cmd/explaintest/r/subquery.result @@ -46,3 +46,32 @@ create table t1(a int(11)); create table t2(a decimal(40,20) unsigned, b decimal(40,20)); select count(*) as x from t1 group by a having x not in (select a from t2 where x = t2.b); x +<<<<<<< HEAD +======= +drop table if exists stu; +drop table if exists exam; +create table stu(id int, name varchar(100)); +insert into stu values(1, null); +create table exam(stu_id int, course varchar(100), grade int); +insert into exam values(1, 'math', 100); +set names utf8 collate utf8_general_ci; +explain format = 'brief' select * from stu where stu.name not in (select 'guo' from exam where exam.stu_id = stu.id); +id estRows task access object operator info +HashJoin 8000.00 root anti semi join, equal:[eq(test.stu.id, test.exam.stu_id)], other cond:eq(test.stu.name, "guo") +├─TableReader(Build) 10000.00 root data:TableFullScan +│ └─TableFullScan 10000.00 cop[tikv] table:exam keep order:false, stats:pseudo +└─TableReader(Probe) 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:stu keep order:false, stats:pseudo +select * from stu where stu.name not in (select 'guo' from exam where exam.stu_id = stu.id); +id name +set names utf8mb4; +explain format = 'brief' select * from stu where stu.name not in (select 'guo' from exam where exam.stu_id = stu.id); +id estRows task access object operator info +HashJoin 8000.00 root anti semi join, equal:[eq(test.stu.id, test.exam.stu_id)], other cond:eq(test.stu.name, "guo") +├─TableReader(Build) 10000.00 root data:TableFullScan +│ └─TableFullScan 10000.00 cop[tikv] table:exam keep order:false, stats:pseudo +└─TableReader(Probe) 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:stu keep order:false, stats:pseudo +select * from stu where stu.name not in (select 'guo' from exam where exam.stu_id = stu.id); +id name +>>>>>>> 00617c96ef (expression, cmd: fix ColumnSubstitute and allow some cases to substitute (#38826)) diff --git a/cmd/explaintest/t/collation_check_use_collation.test b/cmd/explaintest/t/collation_check_use_collation.test new file mode 100644 index 0000000000000..adcd8695b38c0 --- /dev/null +++ b/cmd/explaintest/t/collation_check_use_collation.test @@ -0,0 +1,128 @@ +set tidb_cost_model_version=1; +# These tests check that the used collation is correct. + +# prepare database +create database collation_check_use_collation; +use collation_check_use_collation; + +# Check subquery. +CREATE TABLE `t` ( + `a` char(10) DEFAULT NULL +); +CREATE TABLE `t1` ( + `a` char(10) COLLATE utf8mb4_general_ci DEFAULT NULL +); +insert into t values ("A"); +# Ignore error for the disabled new-collation case. +--error 1265 +insert into t1 values ("a"); +select a as a_col from t where t.a = all (select a collate utf8mb4_general_ci from t1); +select a as a_col from t where t.a != any (select a collate utf8mb4_general_ci from t1); +select a as a_col from t where t.a <= all (select a collate utf8mb4_general_ci from t1); +select a as a_col from t where t.a <= any (select a collate utf8mb4_general_ci from t1); +select a as a_col from t where t.a = (select a collate utf8mb4_general_ci from t1); + +## Check rewrite in expression + +# enum part +drop table if exists t; +create table t(a enum('a', 'b'), b varchar(20)); +insert into t values ("a", "b"); +select * from t where a in (a); +drop table if exists t; +create table t(a enum('a', 'b') charset utf8mb4 collate utf8mb4_general_ci, b varchar(20)); +insert into t values ("b", "c"); +# Ignore error for the disabled new-collation case. +--error 1265 +insert into t values ("B", "b"); +select * from t where 'B' collate utf8mb4_general_ci in (a); +select * from t where 'B' collate utf8mb4_bin in (a); +select * from t where 'B' collate utf8mb4_bin in (a, b); +select * from t where 'B' collate utf8mb4_bin in (a, "a", 1); +select * from t where 'B' collate utf8mb4_bin in (a, "B", 1); +select * from t where 1 in (a); +select * from t where 2 in (a); +select * from t where 1 in (a, 0); +select * from t where a between 1 and 2; +select * from t where a between 1 and "a"; +select * from t where a between "a" and "b"; +select * from t where 2 between a and "c"; +select * from t where 2 between a and 3; +select * from t where "b" between a and a; +select * from t where "b" collate utf8mb4_bin between a and a; +select * from t where "b" between a and 3; + +# set part +drop table if exists t; +create table t(a set('a', 'b'), b varchar(20)); +insert into t values ("a", "b"); +select * from t where a in (a); +drop table if exists t; +create table t(a set('a', 'b') charset utf8mb4 collate utf8mb4_general_ci, b varchar(20)); +insert into t values ("b", "c"); +# Ignore error for the disabled new-collation case. +--error 1265 +insert into t values ("B", "b"); +select * from t where 'B' collate utf8mb4_general_ci in (a); +select * from t where 'B' collate utf8mb4_bin in (a); +select * from t where 'B' collate utf8mb4_bin in (a, b); +select * from t where 'B' collate utf8mb4_bin in (a, "a", 1); +select * from t where 'B' collate utf8mb4_bin in (a, "B", 1); +select * from t where 1 in (a); +select * from t where 2 in (a); +select * from t where 1 in (a, 0); +select * from t where a between 1 and 2; +select * from t where a between 1 and "a"; +select * from t where a between "a" and "b"; +select * from t where 2 between a and "c"; +select * from t where 2 between a and 3; +select * from t where "b" between a and a; +select * from t where "b" collate utf8mb4_bin between a and a; +select * from t where "b" between a and 3; + +# check build range +drop table if exists tbl_2; +create table tbl_2 ( col_20 bigint not null , col_21 smallint not null , col_22 decimal(24,10) default null , col_23 tinyint default 71 not null , col_24 bigint not null , col_25 tinyint default 18 , col_26 varchar(330) collate utf8_bin not null , col_27 char(77) collate utf8mb4_unicode_ci , col_28 char(46) collate utf8_general_ci not null , col_29 smallint unsigned not null , primary key idx_13 ( col_27(5) ) , key idx_14 ( col_24 ) , unique key idx_15 ( col_23,col_21,col_28,col_29,col_24 ) ) collate utf8_bin ; +insert ignore into tbl_2 values ( 5888267793391993829,5371,94.63,-109,5728076076919247337,89,'WUicqUTgdGJcjbC','SapBPqczTWWSN','xUSwH',49462 ); +select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); +select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); + +# check implicit binary collation cast +drop table if exists t1; +drop table if exists t2; +# issue 34823 +create table t1(a char(20)); +create table t2(b binary(20), c binary(20)); +insert into t1 value('-1'); +insert into t2 value(0x2D31, 0x67); +insert into t2 value(0x2D31, 0x73); +select a from t1, t2 where t1.a between t2.b and t2.c; +select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c; +# binary collation in single side +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)) collate utf8mb4_general_ci; +create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci; +insert into t1 values ('a'); +insert into t2 values (0x0, 'A'); +select * from t1, t2 where t1.a between t2.b and t2.c; +insert into t1 values ('-1'); +insert into t2 values (0x2d31, ''); +select * from t1, t2 where t1.a in (t2.b, 3); + +# issue 38736 +drop table if exists t0; +drop table if exists t1; +CREATE TABLE t0(c0 BOOL, c1 INT); +CREATE TABLE t1 LIKE t0; +CREATE VIEW v0(c0) AS SELECT IS_IPV4(t0.c1) FROM t0, t1; +INSERT INTO t0(c0, c1) VALUES (true, 0); +INSERT INTO t1(c0, c1) VALUES (true, 2); + +SELECT v0.c0 FROM v0; +SELECT (v0.c0)NOT LIKE(BINARY v0.c0) FROM v0; +SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); +desc format='brief' SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); + +# cleanup environment +use test diff --git a/expression/collation.go b/expression/collation.go index 79bb84a0f8533..92ed1cfdd7f65 100644 --- a/expression/collation.go +++ b/expression/collation.go @@ -275,14 +275,6 @@ func deriveCollation(ctx sessionctx.Context, funcName string, args []Expression, return ec, nil } -// DeriveCollationFromExprs derives collation information from these expressions. -// Deprecated, use CheckAndDeriveCollationFromExprs instead. -// TODO: remove this function after the all usage is replaced by CheckAndDeriveCollationFromExprs -func DeriveCollationFromExprs(ctx sessionctx.Context, exprs ...Expression) (dstCharset, dstCollation string) { - collation := inferCollation(exprs...) - return collation.Charset, collation.Collation -} - // CheckAndDeriveCollationFromExprs derives collation information from these expressions, return error if derives collation error. func CheckAndDeriveCollationFromExprs(ctx sessionctx.Context, funcName string, evalType types.EvalType, args ...Expression) (et *ExprCollation, err error) { ec := inferCollation(args...) diff --git a/expression/integration_test.go b/expression/integration_test.go index 5b577279dbe7f..99d1e158bd294 100644 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -10599,3 +10599,12 @@ func (s *testIntegrationSuite) TestIssue33397(c *C) { tk.MustExec("set @@tidb_enable_vectorized_expression = true;") tk.MustQuery("select compress(a) from t").Check(testkit.Rows("", "")) } + +func TestIssue40536(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("CREATE TABLE `6bf9e76d-ab44-4031-8a07-418b10741580` (\n `e0b5f703-6cfe-49b4-bc21-16a6455e43a7` set('7','va','ung60','ow','1g','gxwz5','uhnh','k','5la1','q8d9c','1f') NOT NULL DEFAULT '7,1g,uhnh,5la1,q8d9c',\n `fbc3527f-9617-4b9d-a5dc-4be31c00d8a5` datetime DEFAULT '6449-09-28 14:39:04',\n PRIMARY KEY (`e0b5f703-6cfe-49b4-bc21-16a6455e43a7`) /*T![clustered_index] CLUSTERED */\n) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;") + tk.MustExec("CREATE TABLE `8919f3f4-25be-4a1a-904a-bb5e863d8fc8` (\n `9804d5f2-cbc7-43b7-b241-ea2656dc941a` enum('s951','36d','ua65','49yru','6l2em','4ea','jf2d2','vprsc','3yl7n','hz','ov') DEFAULT '4ea',\n `323cdbcb-0c14-4362-90ab-ea42caaed6a5` year(4) NOT NULL DEFAULT '1983',\n `b9b70f39-1a02-4114-9d7d-fa6259c1b691` time DEFAULT '20:18:04',\n PRIMARY KEY (`323cdbcb-0c14-4362-90ab-ea42caaed6a5`) /*T![clustered_index] CLUSTERED */,\n KEY `a704d6bb-772b-44ea-8cb0-6f7491c1aaa6` (`323cdbcb-0c14-4362-90ab-ea42caaed6a5`,`9804d5f2-cbc7-43b7-b241-ea2656dc941a`)\n) ENGINE=InnoDB DEFAULT CHARSET=ascii COLLATE=ascii_bin;") + tk.MustExec("delete from `6bf9e76d-ab44-4031-8a07-418b10741580` where not( `6bf9e76d-ab44-4031-8a07-418b10741580`.`e0b5f703-6cfe-49b4-bc21-16a6455e43a7` in ( select `9804d5f2-cbc7-43b7-b241-ea2656dc941a` from `8919f3f4-25be-4a1a-904a-bb5e863d8fc8` where `6bf9e76d-ab44-4031-8a07-418b10741580`.`e0b5f703-6cfe-49b4-bc21-16a6455e43a7` in ( '1f' ) and `6bf9e76d-ab44-4031-8a07-418b10741580`.`e0b5f703-6cfe-49b4-bc21-16a6455e43a7` in ( '1g' ,'va' ,'uhnh' ) ) ) and not( IsNull( `6bf9e76d-ab44-4031-8a07-418b10741580`.`e0b5f703-6cfe-49b4-bc21-16a6455e43a7` ) );\n") +} diff --git a/expression/util.go b/expression/util.go index a21e4049f620a..08fae6dfaf06c 100644 --- a/expression/util.go +++ b/expression/util.go @@ -221,8 +221,12 @@ func ColumnSubstituteImpl(expr Expression, schema *Schema, newExprs []Expression if v.InOperand { newExpr = setExprColumnInOperand(newExpr) } +<<<<<<< HEAD newExpr.SetCoercibility(v.Coercibility()) return true, newExpr +======= + return true, false, newExpr +>>>>>>> 00617c96ef (expression, cmd: fix ColumnSubstitute and allow some cases to substitute (#38826)) case *ScalarFunction: substituted := false if v.FuncName.L == ast.Cast { @@ -239,11 +243,24 @@ func ColumnSubstituteImpl(expr Expression, schema *Schema, newExprs []Expression // cowExprRef is a copy-on-write util, args array allocation happens only // when expr in args is changed refExprArr := cowExprRef{v.GetArgs(), nil} +<<<<<<< HEAD _, coll := DeriveCollationFromExprs(v.GetCtx(), v.GetArgs()...) +======= + oldCollEt, err := CheckAndDeriveCollationFromExprs(v.GetCtx(), v.FuncName.L, v.RetType.EvalType(), v.GetArgs()...) + if err != nil { + logutil.BgLogger().Error("Unexpected error happened during ColumnSubstitution", zap.Stack("stack")) + return false, false, v + } + var tmpArgForCollCheck []Expression + if collate.NewCollationEnabled() { + tmpArgForCollCheck = make([]Expression, len(v.GetArgs())) + } +>>>>>>> 00617c96ef (expression, cmd: fix ColumnSubstitute and allow some cases to substitute (#38826)) for idx, arg := range v.GetArgs() { changed, newFuncExpr := ColumnSubstituteImpl(arg, schema, newExprs) if collate.NewCollationEnabled() { // Make sure the collation used by the ScalarFunction isn't changed and its result collation is not weaker than the collation used by the ScalarFunction. +<<<<<<< HEAD if changed { changed = false tmpArgs := make([]Expression, 0, len(v.GetArgs())) @@ -251,6 +268,22 @@ func ColumnSubstituteImpl(expr Expression, schema *Schema, newExprs []Expression _, newColl := DeriveCollationFromExprs(v.GetCtx(), append(v.GetArgs(), newFuncExpr)...) if coll == newColl { changed = checkCollationStrictness(coll, newFuncExpr.GetType().Collate) +======= + changed = false + copy(tmpArgForCollCheck, refExprArr.Result()) + tmpArgForCollCheck[idx] = newFuncExpr + newCollEt, err := CheckAndDeriveCollationFromExprs(v.GetCtx(), v.FuncName.L, v.RetType.EvalType(), tmpArgForCollCheck...) + if err != nil { + logutil.BgLogger().Error("Unexpected error happened during ColumnSubstitution", zap.Stack("stack")) + return false, failed, v + } + if oldCollEt.Collation == newCollEt.Collation { + if newFuncExpr.GetType().GetCollate() == arg.GetType().GetCollate() && newFuncExpr.Coercibility() == arg.Coercibility() { + // It's safe to use the new expression, otherwise some cases in projection push-down will be wrong. + changed = true + } else { + changed = checkCollationStrictness(oldCollEt.Collation, newFuncExpr.GetType().GetCollate()) +>>>>>>> 00617c96ef (expression, cmd: fix ColumnSubstitute and allow some cases to substitute (#38826)) } } } diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index f07e15e288af3..e9d7babbed585 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -1252,9 +1252,18 @@ func (ijHelper *indexJoinBuildHelper) resetContextForIndex(innerKeys []*expressi ijHelper.curIdxOff2KeyOff[i] = tmpSchema.ColumnIndex(idxCol) if ijHelper.curIdxOff2KeyOff[i] >= 0 { // Don't use the join columns if their collations are unmatched and the new collation is enabled. +<<<<<<< HEAD if collate.NewCollationEnabled() && types.IsString(idxCol.RetType.Tp) && types.IsString(outerKeys[ijHelper.curIdxOff2KeyOff[i]].RetType.Tp) { _, coll := expression.DeriveCollationFromExprs(nil, idxCol, outerKeys[ijHelper.curIdxOff2KeyOff[i]]) if !collate.CompatibleCollate(idxCol.GetType().Collate, coll) { +======= + if collate.NewCollationEnabled() && types.IsString(idxCol.RetType.GetType()) && types.IsString(outerKeys[ijHelper.curIdxOff2KeyOff[i]].RetType.GetType()) { + et, err := expression.CheckAndDeriveCollationFromExprs(ijHelper.innerPlan.ctx, "equal", types.ETInt, idxCol, outerKeys[ijHelper.curIdxOff2KeyOff[i]]) + if err != nil { + logutil.BgLogger().Error("Unexpected error happened during constructing index join", zap.Stack("stack")) + } + if !collate.CompatibleCollate(idxCol.GetType().GetCollate(), et.Collation) { +>>>>>>> 00617c96ef (expression, cmd: fix ColumnSubstitute and allow some cases to substitute (#38826)) ijHelper.curIdxOff2KeyOff[i] = -1 } }