diff --git a/mysql-test/main/full_join.result b/mysql-test/main/full_join.result new file mode 100644 index 0000000000000..185b6ee425339 --- /dev/null +++ b/mysql-test/main/full_join.result @@ -0,0 +1,5088 @@ +# +# MDEV-37932 / MDEV-39014: FULL [OUTER] JOIN +# +# Tests are grouped by feature. Within each group, a FULL JOIN +# query is generally paired with an equivalent LEFT JOIN UNION +# RIGHT JOIN formulation to verify correctness. +# +# ======================================================== +# Section 1: Parser and syntax acceptance +# +# FULL JOIN, FULL OUTER JOIN, NATURAL FULL [OUTER] JOIN, and +# their appearance inside views, derived tables, UNIONs, and +# CTEs. +# ======================================================== +create table t1 (a int); +insert into t1 (a) values (1),(2); +create table t2 (a int); +insert into t2 (a) values (1),(3); +create table t3 (a int); +insert into t3 (a) values (1),(4); +# Basic FULL [OUTER] JOIN syntax. +select * from t1 full join t2 on t1.a = t2.a; +a a +1 1 +2 NULL +NULL 3 +select * from t1 left join t2 on t1.a = t2.a union select * from t1 right join t2 on t1.a = t2.a; +a a +1 1 +2 NULL +NULL 3 +explain extended select * from t1 full join t2 on t1.a = t2.a; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`a` AS `a` from `test`.`t1` full join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a`) where 1 +select * from t1 full outer join t2 on t1.a = t2.a; +a a +1 1 +2 NULL +NULL 3 +select * from t1 left outer join t2 on t1.a = t2.a union select * from t1 right outer join t2 on t1.a = t2.a; +a a +1 1 +2 NULL +NULL 3 +explain extended select * from t1 full outer join t2 on t1.a = t2.a; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`a` AS `a` from `test`.`t1` full join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a`) where 1 +# NATURAL FULL [OUTER] JOIN. +select * from t1 natural full outer join t2; +a +1 +2 +3 +select * from t1 natural left outer join t2 union select * from t1 natural right outer join t2; +a +1 +2 +3 +explain extended select * from t1 natural full outer join t2; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select coalesce(`test`.`t1`.`a`,`test`.`t2`.`a`) AS `a` from `test`.`t1` full join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a`) where 1 +select * from t1 natural full join t2; +a +1 +2 +3 +select * from t1 natural left join t2 union select * from t1 natural right join t2; +a +1 +2 +3 +explain extended select * from t1 natural full join t2; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select coalesce(`test`.`t1`.`a`,`test`.`t2`.`a`) AS `a` from `test`.`t1` full join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a`) where 1 +# FULL JOIN inside a view. +create view v1 as select t1.a as t1a, t2.a as t2a from t1 full join t2 on t1.a = t2.a; +select * from v1; +t1a t2a +1 1 +2 NULL +NULL 3 +select t1.a as t1a, t2.a as t2a from t1 left join t2 on t1.a = t2.a union select t1.a as t1a, t2.a as t2a from t1 right join t2 on t1.a = t2.a; +t1a t2a +1 1 +2 NULL +NULL 3 +explain extended select * from v1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY ALL NULL NULL NULL NULL 4 100.00 +2 DERIVED t1 ALL NULL NULL NULL NULL 2 100.00 +2 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `v1`.`t1a` AS `t1a`,`v1`.`t2a` AS `t2a` from `test`.`v1` +drop view v1; +create view v1 as select t1.a as t1a, t2.a as t2a from t1 full outer join t2 on t1.a = t2.a; +select * from v1; +t1a t2a +1 1 +2 NULL +NULL 3 +select t1.a as t1a, t2.a as t2a from t1 left outer join t2 on t1.a = t2.a union select t1.a as t1a, t2.a as t2a from t1 right outer join t2 on t1.a = t2.a; +t1a t2a +1 1 +2 NULL +NULL 3 +explain extended select * from v1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY ALL NULL NULL NULL NULL 4 100.00 +2 DERIVED t1 ALL NULL NULL NULL NULL 2 100.00 +2 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `v1`.`t1a` AS `t1a`,`v1`.`t2a` AS `t2a` from `test`.`v1` +drop view v1; +create view v1 as select t1.a as t1a, t2.a as t2a from t1 natural full join t2; +select * from v1; +t1a t2a +1 1 +2 NULL +NULL 3 +select t1.a as t1a, t2.a as t2a from t1 natural left join t2 union select t1.a as t1a, t2.a as t2a from t1 natural right join t2; +t1a t2a +1 1 +2 NULL +NULL 3 +explain extended select * from v1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY ALL NULL NULL NULL NULL 4 100.00 +2 DERIVED t1 ALL NULL NULL NULL NULL 2 100.00 +2 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `v1`.`t1a` AS `t1a`,`v1`.`t2a` AS `t2a` from `test`.`v1` +drop view v1; +create view v1 as select t1.a as t1a, t2.a as t2a from t1 natural full outer join t2; +select * from v1; +t1a t2a +1 1 +2 NULL +NULL 3 +select t1.a as t1a, t2.a as t2a from t1 natural left outer join t2 union select t1.a as t1a, t2.a as t2a from t1 natural right outer join t2; +t1a t2a +1 1 +2 NULL +NULL 3 +explain extended select * from v1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY ALL NULL NULL NULL NULL 4 100.00 +2 DERIVED t1 ALL NULL NULL NULL NULL 2 100.00 +2 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `v1`.`t1a` AS `t1a`,`v1`.`t2a` AS `t2a` from `test`.`v1` +drop view v1; +# FULL JOIN inside a derived table combined with UNION. +select * from (select t1.a from t1 full join t2 on t1.a = t2.a union select * from t1) dt; +a +1 +2 +NULL +select * from (select t1.a from t1 left join t2 on t1.a = t2.a union select t1.a from t1 right join t2 on t1.a = t2.a union select * from t1) dt; +a +1 +2 +NULL +select * from (select t1.a from t1 full outer join t2 on t1.a = t2.a union select * from t1) dt; +a +1 +2 +NULL +select * from (select t1.a from t1 left outer join t2 on t1.a = t2.a union select t1.a from t1 right outer join t2 on t1.a = t2.a union select * from t1) dt; +a +1 +2 +NULL +select * from (select t1.a from t1 natural full join t2 union select * from t1) dt; +a +1 +2 +NULL +select * from (select t1.a from t1 natural left join t2 union select t1.a from t1 natural right join t2 union select * from t1) dt; +a +1 +2 +NULL +explain extended select * from (select t1.a from t1 natural full join t2 union select * from t1) dt; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY ALL NULL NULL NULL NULL 6 100.00 +2 DERIVED t1 ALL NULL NULL NULL NULL 2 100.00 +2 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using where +3 UNION t1 ALL NULL NULL NULL NULL 2 100.00 +NULL UNION RESULT ALL NULL NULL NULL NULL NULL NULL +Warnings: +Note 1003 /* select#1 */ select `dt`.`a` AS `a` from (/* select#2 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` full join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a`) where 1 union /* select#3 */ select `test`.`t1`.`a` AS `a` from `test`.`t1`) `dt` +select * from (select t1.a from t1 natural full outer join t2 union select * from t1) dt; +a +1 +2 +NULL +select * from (select t1.a from t1 natural left outer join t2 union select t1.a from t1 natural right outer join t2 union select * from t1) dt; +a +1 +2 +NULL +explain extended select * from (select t1.a from t1 natural full outer join t2 union select * from t1) dt; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY ALL NULL NULL NULL NULL 6 100.00 +2 DERIVED t1 ALL NULL NULL NULL NULL 2 100.00 +2 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using where +3 UNION t1 ALL NULL NULL NULL NULL 2 100.00 +NULL UNION RESULT ALL NULL NULL NULL NULL NULL NULL +Warnings: +Note 1003 /* select#1 */ select `dt`.`a` AS `a` from (/* select#2 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` full join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a`) where 1 union /* select#3 */ select `test`.`t1`.`a` AS `a` from `test`.`t1`) `dt` +# FULL JOIN inside a CTE. +with cte as (select t1.a from t1 natural full join t2) select * from cte; +a +1 +2 +NULL +with cte as (select t1.a from t1 natural left join t2 union select t1.a from t1 natural right join t2) select * from cte; +a +1 +2 +NULL +explain extended with cte as (select t1.a from t1 natural full join t2) select * from cte; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 with cte as (select `test`.`t1`.`a` AS `a` from `test`.`t1` full join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a`))select `test`.`t1`.`a` AS `a` from `test`.`t1` full join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a`) where 1 +# FULL JOIN referencing a missing table must error cleanly. +select * from t1, t2 full join t_not_exist on t2.c=t_not_exist.e and t_not_exist.f=t1.a; +ERROR 42S02: Table 'test.t_not_exist' doesn't exist +select * from t1, t2 full outer join t_not_exist on t2.c=t_not_exist.e and t_not_exist.f=t1.a; +ERROR 42S02: Table 'test.t_not_exist' doesn't exist +select * from t1, t2 natural full join t_not_exist; +ERROR 42S02: Table 'test.t_not_exist' doesn't exist +select * from t1, t2 natural full outer join t_not_exist; +ERROR 42S02: Table 'test.t_not_exist' doesn't exist +# FULL JOIN where one operand is a derived table (on the left). +select * from (select * from t1) dt natural full join t2; +a +1 +2 +3 +select * from (select * from t1) dt natural left join t2 union select * from (select * from t1) dt natural right join t2; +a +1 +2 +3 +select * from (select * from t2) du natural full join t1; +a +1 +3 +2 +select * from (select * from t2) du natural left join t1 union select * from (select * from t2) du natural right join t1; +a +1 +3 +2 +# FULL JOIN with a constant ON clause. +select * from t1 full join t2 on true; +a a +1 1 +1 3 +2 1 +2 3 +select * from t1 left join t2 on true union select * from t1 right join t2 on true; +a a +1 1 +2 1 +1 3 +2 3 +# ======================================================== +# Section 2: Basic FULL JOIN with nested joins on the left +# +# Each FULL JOIN query is followed by an equivalent +# LEFT/RIGHT/UNION formulation; the two must match. +# ======================================================== +select * from t1 inner join t2 full join t3 on t1.a=t3.a; +a a a +1 1 1 +1 3 1 +2 1 NULL +2 3 NULL +NULL NULL 4 +select * from t1 inner join t2 left join t3 on t1.a=t3.a union select * from t1 inner join t2 right join t3 on t1.a=t3.a; +a a a +1 1 1 +1 3 1 +2 1 NULL +2 3 NULL +NULL NULL 4 +select * from t1 inner join t2 on t1.a=t2.a full join t3 on t1.a=t3.a; +a a a +1 1 1 +NULL NULL 4 +select * from t1 inner join t2 on t1.a=t2.a left join t3 on t1.a=t3.a union select * from t1 inner join t2 on t1.a=t2.a right join t3 on t1.a=t3.a; +a a a +1 1 1 +NULL NULL 4 +select * from t1 cross join t2 full join t3 on t1.a=t3.a; +a a a +1 1 1 +1 3 1 +2 1 NULL +2 3 NULL +NULL NULL 4 +select * from t1 cross join t2 left join t3 on t1.a=t3.a union select * from t1 cross join t2 right join t3 on t1.a=t3.a; +a a a +1 1 1 +1 3 1 +2 1 NULL +2 3 NULL +NULL NULL 4 +select * from t1 cross join t2 on t1.a=t2.a full join t3 on t1.a=t3.a; +a a a +1 1 1 +NULL NULL 4 +select * from t1 cross join t2 on t1.a=t2.a left join t3 on t1.a=t3.a union select * from t1 cross join t2 on t1.a=t2.a right join t3 on t1.a=t3.a; +a a a +1 1 1 +NULL NULL 4 +select * from (t1 left join t2 on t1.a=t2.a) full join t3 on t1.a=t3.a; +a a a +1 1 1 +2 NULL NULL +NULL NULL 4 +select * from (t1 left join t2 on t1.a=t2.a) left join t3 on t1.a=t3.a union select * from (t1 left join t2 on t1.a=t2.a) right join t3 on t1.a=t3.a; +a a a +1 1 1 +2 NULL NULL +NULL NULL 4 +select * from (t1 right join t2 on t1.a=t2.a) full join t3 on t1.a=t3.a; +a a a +1 1 1 +NULL 3 NULL +NULL NULL 4 +select * from (t1 right join t2 on t1.a=t2.a) left join t3 on t1.a=t3.a union select * from (t1 right join t2 on t1.a=t2.a) right join t3 on t1.a=t3.a; +a a a +1 1 1 +NULL 3 NULL +NULL NULL 4 +# Nested NATURAL JOIN on the left of FULL JOIN. +select * from (t1 natural join t2) full join t3 on t1.a=t3.a; +a a +1 1 +NULL 4 +select * from (t1 natural join t2) left join t3 on t1.a=t3.a union select * from (t1 natural join t2) right join t3 on t1.a=t3.a; +a a +1 1 +NULL 4 +# Nested FULL JOIN on the left of FULL JOIN. +# The inner FULL JOIN's unmatched right-side rows must appear +# in the result even when the outer FULL JOIN condition does +# not reference the inner right-side table. +# Data: t1(1,2) t2(1,3) t3(1,4) +select * from (t1 full join t2 on t1.a=t2.a) full join t3 on t1.a=t3.a; +a a a +1 1 1 +2 NULL NULL +NULL 3 NULL +NULL NULL 4 +select * from (t1 left join t2 on t1.a=t2.a) left join t3 on t1.a=t3.a union select * from (t1 right join t2 on t1.a=t2.a) left join t3 on t1.a=t3.a union select * from (t1 right join t2 on t1.a=t2.a) right join t3 on t1.a=t3.a; +a a a +1 1 1 +2 NULL NULL +NULL 3 NULL +NULL NULL 4 +# Chained FULL JOINs with the second ON referencing the middle table. +select * from t1 full join t2 on t1.a=t2.a full join t3 on t2.a=t3.a; +a a a +1 1 1 +2 NULL NULL +NULL 3 NULL +NULL NULL 4 +select * from t1 left join t2 on t1.a=t2.a left join t3 on t2.a=t3.a union select * from t1 right join t2 on t1.a=t2.a left join t3 on t2.a=t3.a union select * from (t1 left join t2 on t1.a=t2.a) right join t3 on t2.a=t3.a union select * from (t1 right join t2 on t1.a=t2.a) right join t3 on t2.a=t3.a; +a a a +1 1 1 +2 NULL NULL +NULL 3 NULL +NULL NULL 4 +# Nested FULL JOIN with duplicate rows. +create table d1 (a int); +insert into d1 values (1),(1),(2); +create table d2 (a int); +insert into d2 values (1),(3),(3); +select * from (d1 full join d2 on d1.a=d2.a) full join t3 on d1.a=t3.a; +a a a +1 1 1 +1 1 1 +2 NULL NULL +NULL 3 NULL +NULL 3 NULL +NULL NULL 4 +select * from (d1 left join d2 on d1.a=d2.a) left join t3 on d1.a=t3.a union all select * from (d1 right join d2 on d1.a=d2.a) left join t3 on d1.a=t3.a where d1.a is null union all select * from (d1 right join d2 on d1.a=d2.a) right join t3 on d1.a=t3.a where d1.a is null and d2.a is null; +a a a +1 1 1 +1 1 1 +2 NULL NULL +NULL 3 NULL +NULL 3 NULL +NULL NULL 4 +drop table d1, d2; +drop table t1, t2, t3; +# ======================================================== +# Section 3: FULL JOIN rewrites to LEFT, RIGHT, and INNER +# +# When a NULL-rejecting WHERE predicate selects one or both +# sides, simplify_joins() rewrites the FULL JOIN accordingly. +# The (re)written form must produce the same result as the +# direct LEFT/RIGHT/INNER formulation. +# ======================================================== +create table t1 (pk int auto_increment, x int, y int, primary key (pk)); +create table t2 (pk int auto_increment, x int, y int, primary key (pk)); +insert into t1 (x, y) values (-5,-5),(-4,-4),(-3,-3),(-2,-2),(-1,-1),(0,0),(1,1),(2,2),(3,3),(4,4),(5,5); +insert into t2 (x, y) values (-5,25),(-4,16),(-3,9),(-2,4),(-1,1),(0,0),(1,1),(2,4),(3,9),(4,16),(5,25); +# FULL to RIGHT JOIN, these two queries should be equal: +select * from t1 full join t2 on t1.y = t2.y where t2.pk is not null; +pk x y pk x y +6 0 0 6 0 0 +7 1 1 5 -1 1 +7 1 1 7 1 1 +10 4 4 4 -2 4 +10 4 4 8 2 4 +NULL NULL NULL 1 -5 25 +NULL NULL NULL 2 -4 16 +NULL NULL NULL 3 -3 9 +NULL NULL NULL 9 3 9 +NULL NULL NULL 10 4 16 +NULL NULL NULL 11 5 25 +select * from t1 right join t2 on t1.y = t2.y; +pk x y pk x y +6 0 0 6 0 0 +7 1 1 5 -1 1 +7 1 1 7 1 1 +10 4 4 4 -2 4 +10 4 4 8 2 4 +NULL NULL NULL 1 -5 25 +NULL NULL NULL 2 -4 16 +NULL NULL NULL 3 -3 9 +NULL NULL NULL 9 3 9 +NULL NULL NULL 10 4 16 +NULL NULL NULL 11 5 25 +# FULL to RIGHT JOIN, these two queries should be equal: +select * from t1 full join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1 where t2.pk is not null; +pk x y pk x y +6 0 0 6 0 0 +6 0 0 7 1 1 +7 1 1 6 0 0 +7 1 1 7 1 1 +NULL NULL NULL 1 -5 25 +NULL NULL NULL 2 -4 16 +NULL NULL NULL 3 -3 9 +NULL NULL NULL 4 -2 4 +NULL NULL NULL 5 -1 1 +NULL NULL NULL 8 2 4 +NULL NULL NULL 9 3 9 +NULL NULL NULL 10 4 16 +NULL NULL NULL 11 5 25 +select * from t1 right join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1; +pk x y pk x y +6 0 0 6 0 0 +6 0 0 7 1 1 +7 1 1 6 0 0 +7 1 1 7 1 1 +NULL NULL NULL 1 -5 25 +NULL NULL NULL 2 -4 16 +NULL NULL NULL 3 -3 9 +NULL NULL NULL 4 -2 4 +NULL NULL NULL 5 -1 1 +NULL NULL NULL 8 2 4 +NULL NULL NULL 9 3 9 +NULL NULL NULL 10 4 16 +NULL NULL NULL 11 5 25 +# FULL to INNER JOIN, these two queries should be equal: +select * from t1 full join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1 where t1.pk is not null and t2.pk is not null; +pk x y pk x y +6 0 0 6 0 0 +7 1 1 6 0 0 +6 0 0 7 1 1 +7 1 1 7 1 1 +select * from t1 inner join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1; +pk x y pk x y +6 0 0 6 0 0 +7 1 1 6 0 0 +6 0 0 7 1 1 +7 1 1 7 1 1 +# FULL to LEFT JOIN, these two queries should be equal: +select * from t1 full join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1 where t1.pk is not null; +pk x y pk x y +6 0 0 6 0 0 +7 1 1 6 0 0 +6 0 0 7 1 1 +7 1 1 7 1 1 +1 -5 -5 NULL NULL NULL +2 -4 -4 NULL NULL NULL +3 -3 -3 NULL NULL NULL +4 -2 -2 NULL NULL NULL +5 -1 -1 NULL NULL NULL +8 2 2 NULL NULL NULL +9 3 3 NULL NULL NULL +10 4 4 NULL NULL NULL +11 5 5 NULL NULL NULL +select * from t1 left join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1; +pk x y pk x y +6 0 0 6 0 0 +7 1 1 6 0 0 +6 0 0 7 1 1 +7 1 1 7 1 1 +1 -5 -5 NULL NULL NULL +2 -4 -4 NULL NULL NULL +3 -3 -3 NULL NULL NULL +4 -2 -2 NULL NULL NULL +5 -1 -1 NULL NULL NULL +8 2 2 NULL NULL NULL +9 3 3 NULL NULL NULL +10 4 4 NULL NULL NULL +11 5 5 NULL NULL NULL +# FULL NATURAL to INNER JOIN, these two queries should be equal: +select * from t1 natural full join t2 where t1.pk is not null and t2.pk is not null; +pk x y +6 0 0 +7 1 1 +select * from t1 inner join t2 on t1.x = t2.x and t1.y = t2.y; +pk x y pk x y +6 0 0 6 0 0 +7 1 1 7 1 1 +# FULL NATURAL to LEFT JOIN, these two queries should be equal: +select * from t1 natural full join t2 where t1.pk is not null; +pk x y +1 -5 -5 +2 -4 -4 +3 -3 -3 +4 -2 -2 +5 -1 -1 +6 0 0 +7 1 1 +8 2 2 +9 3 3 +10 4 4 +11 5 5 +select * from t1 left join t2 on t2.pk = t1.pk and t2.x = t1.x and t2.y = t1.y; +pk x y pk x y +1 -5 -5 NULL NULL NULL +2 -4 -4 NULL NULL NULL +3 -3 -3 NULL NULL NULL +4 -2 -2 NULL NULL NULL +5 -1 -1 NULL NULL NULL +6 0 0 6 0 0 +7 1 1 7 1 1 +8 2 2 NULL NULL NULL +9 3 3 NULL NULL NULL +10 4 4 NULL NULL NULL +11 5 5 NULL NULL NULL +# FULL NATURAL to RIGHT JOIN +select * from t1 natural full join t2 where t2.pk is not null; +pk x y +1 -5 25 +2 -4 16 +3 -3 9 +4 -2 4 +5 -1 1 +6 0 0 +7 1 1 +8 2 4 +9 3 9 +10 4 16 +11 5 25 +select * from t1 right join t2 on t1.pk = t2.pk and t1.x = t2.x and t1.y = t2.y; +pk x y pk x y +NULL NULL NULL 1 -5 25 +NULL NULL NULL 2 -4 16 +NULL NULL NULL 3 -3 9 +NULL NULL NULL 4 -2 4 +NULL NULL NULL 5 -1 1 +6 0 0 6 0 0 +7 1 1 7 1 1 +NULL NULL NULL 8 2 4 +NULL NULL NULL 9 3 9 +NULL NULL NULL 10 4 16 +NULL NULL NULL 11 5 25 +select * from t1 full join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1; +pk x y pk x y +1 -5 -5 NULL NULL NULL +10 4 4 NULL NULL NULL +11 5 5 NULL NULL NULL +2 -4 -4 NULL NULL NULL +3 -3 -3 NULL NULL NULL +4 -2 -2 NULL NULL NULL +5 -1 -1 NULL NULL NULL +6 0 0 6 0 0 +6 0 0 7 1 1 +7 1 1 6 0 0 +7 1 1 7 1 1 +8 2 2 NULL NULL NULL +9 3 3 NULL NULL NULL +NULL NULL NULL 1 -5 25 +NULL NULL NULL 10 4 16 +NULL NULL NULL 11 5 25 +NULL NULL NULL 2 -4 16 +NULL NULL NULL 3 -3 9 +NULL NULL NULL 4 -2 4 +NULL NULL NULL 5 -1 1 +NULL NULL NULL 8 2 4 +NULL NULL NULL 9 3 9 +select * from t1 left join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1 union select * from t1 right join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1; +pk x y pk x y +1 -5 -5 NULL NULL NULL +10 4 4 NULL NULL NULL +11 5 5 NULL NULL NULL +2 -4 -4 NULL NULL NULL +3 -3 -3 NULL NULL NULL +4 -2 -2 NULL NULL NULL +5 -1 -1 NULL NULL NULL +6 0 0 6 0 0 +6 0 0 7 1 1 +7 1 1 6 0 0 +7 1 1 7 1 1 +8 2 2 NULL NULL NULL +9 3 3 NULL NULL NULL +NULL NULL NULL 1 -5 25 +NULL NULL NULL 10 4 16 +NULL NULL NULL 11 5 25 +NULL NULL NULL 2 -4 16 +NULL NULL NULL 3 -3 9 +NULL NULL NULL 4 -2 4 +NULL NULL NULL 5 -1 1 +NULL NULL NULL 8 2 4 +NULL NULL NULL 9 3 9 +select * from t1 natural full join t2; +pk x y +1 -5 -5 +1 -5 25 +10 4 16 +10 4 4 +11 5 25 +11 5 5 +2 -4 -4 +2 -4 16 +3 -3 -3 +3 -3 9 +4 -2 -2 +4 -2 4 +5 -1 -1 +5 -1 1 +6 0 0 +7 1 1 +8 2 2 +8 2 4 +9 3 3 +9 3 9 +select * from t1 natural left join t2 union select * from t1 natural right join t2; +pk x y +1 -5 -5 +1 -5 25 +10 4 16 +10 4 4 +11 5 25 +11 5 5 +2 -4 -4 +2 -4 16 +3 -3 -3 +3 -3 9 +4 -2 -2 +4 -2 4 +5 -1 -1 +5 -1 1 +6 0 0 +7 1 1 +8 2 2 +8 2 4 +9 3 3 +9 3 9 +drop table t1, t2; +# Rewrites with nested joins. +create table t1 (v int); +insert into t1 (v) values (1); +create table t2 (v int); +insert into t2 (v) values (2); +create table t3 (v int); +insert into t3 (v) values (3); +# (FULL)FULL to (INNER)INNER JOIN +select * from t1 full join t2 on t1.v = t2.v full join t3 on t2.v = t3.v where t1.v is not null and t2.v is not null and t3.v is not null; +v v v +select * from t1 inner join t2 on t1.v = t2.v inner join t3 on t2.v = t3.v; +v v v +# (FULL)FULL to (RIGHT)LEFT JOIN +select * from t1 full join t2 on t1.v = t2.v full join t3 on t1.v = t3.v where t2.v is not null; +v v v +NULL 2 NULL +select * from t1 right join t2 on t1.v = t2.v left join t3 on t1.v = t3.v; +v v v +NULL 2 NULL +# (FULL)FULL to (LEFT)LEFT JOIN +select * from t1 full join t2 on t1.v = t2.v full join t3 on t1.v = t3.v where t1.v is not null; +v v v +1 NULL NULL +select * from t1 left join t2 on t2.v = t1.v left join t3 on t3.v = t1.v; +v v v +1 NULL NULL +# (FULL)LEFT to (LEFT)LEFT JOIN +select * from t1 full join t2 on t1.v = t2.v left join t3 on t2.v = t3.v where t1.v is not null; +v v v +1 NULL NULL +select * from t1 left join t2 on t1.v = t2.v left join t3 on t2.v = t3.v; +v v v +1 NULL NULL +# (FULL)LEFT to (RIGHT)LEFT JOIN +select * from t1 full join t2 on t1.v = t2.v left join t3 on t2.v = t3.v where t2.v is not null; +v v v +NULL 2 NULL +select * from t1 right join t2 on t1.v = t2.v left join t3 on t3.v = t2.v; +v v v +NULL 2 NULL +# (LEFT)FULL to (LEFT)RIGHT JOIN +select * from t1 left join t2 on t1.v = t2.v full join t3 on t2.v = t3.v where t3.v is not null; +v v v +NULL NULL 3 +select * from t1 left join t2 on t1.v = t2.v right join t3 on t2.v = t3.v; +v v v +NULL NULL 3 +# (LEFT)FULL to (LEFT)LEFT JOIN +insert into t1 (v) values (2),(3); +insert into t2 (v) values (1); +truncate t3; +insert into t3 (v) values (1); +select * from t1; +v +1 +2 +3 +select * from t2; +v +2 +1 +select * from t3; +v +1 +select * from t1 left join t2 on t1.v = t2.v full join t3 on t2.v = t3.v where t3.v = 1; +v v v +1 1 1 +select * from t3 left join t1 on t1.v = 1 left join t2 on t2.v = 1; +v v v +1 1 1 +# FULL to INNER, two variables. +select * from (select t1.v from t1 full join t2 on t1.v = t2.v where t1.v > 1 and t2.v > 1) as dt; +v +2 +select t1.v from t2 inner join t1 where t2.v = t1.v and t1.v > 1 and t1.v > 1; +v +2 +# FULL to INNER with a UNION. +select t1.v from t1 full join t2 on t1.v = t2.v where t1.v > 1 and t2.v > 1 union select * from t1; +v +2 +1 +3 +select t1.v from t2 inner join t1 where t1.v = t2.v and t2.v > 1 and t2.v > 1 union select * from t1; +v +2 +1 +3 +drop table t1, t2, t3; +# ======================================================== +# Section 4: NATURAL FULL JOIN and COALESCE +# +# Common columns surface as COALESCE expressions rather than +# plain fields. +# ======================================================== +create table t1 (a int, b int); +create table t2 (a int, b int); +create table t3 (a int, b int); +insert into t1 (a,b) values (1,1),(2,2); +insert into t2 (a,b) values (1,1),(3,3); +insert into t3 (a,b) values (3,3),(4,4); +select * from t1 natural full join t2 where +t1.a is not null and t1.b is not null and +t2.a is not null and t2.b is not null; +a b +1 1 +select * from t1 natural left join t2 where +t1.a is not null and t1.b is not null and +t2.a is not null and t2.b is not null +union +select * from t1 natural right join t2 where +t1.a is not null and t1.b is not null and +t2.a is not null and t2.b is not null; +a b +1 1 +explain extended +select * from t1 natural full join t2 where +t1.a is not null and t1.b is not null and +t2.a is not null and t2.b is not null; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Note 1003 select coalesce(`test`.`t1`.`a`,`test`.`t2`.`a`) AS `a`,coalesce(`test`.`t1`.`b`,`test`.`t2`.`b`) AS `b` from `test`.`t2` join `test`.`t1` where `test`.`t2`.`a` = `test`.`t1`.`a` and `test`.`t2`.`b` = `test`.`t1`.`b` and `test`.`t1`.`a` is not null and `test`.`t1`.`b` is not null and `test`.`t1`.`a` is not null and `test`.`t1`.`b` is not null +select coalesce(t1.a, t2.a) AS a, coalesce(t1.b, t2.b) AS b from +t2 join t1 where +t2.a = t1.a and t2.b = t1.b and +t1.a is not null and t1.b is not null; +a b +1 1 +select * from t1 natural full join t2 where t1.a is not null; +a b +1 1 +2 2 +select * from t1 natural left join t2 where t1.a is not null +union +select * from t1 natural right join t2 where t1.a is not null; +a b +1 1 +2 2 +explain extended select * from t1 natural full join t2 where t1.a is not null; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Note 1003 select coalesce(`test`.`t1`.`a`,`test`.`t2`.`a`) AS `a`,coalesce(`test`.`t1`.`b`,`test`.`t2`.`b`) AS `b` from `test`.`t1` left join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a` and `test`.`t2`.`b` = `test`.`t1`.`b`) where `test`.`t1`.`a` is not null +select coalesce(t1.a, t2.a) AS a, coalesce(t1.b, t2.b) AS b from +t1 left join t2 on t2.a = t1.a and t2.b = t1.b where t1.a is not null; +a b +1 1 +2 2 +select * from t1 natural full join t2 where t2.a is not null; +a b +1 1 +3 3 +select * from t1 natural left join t2 where t2.a is not null +union +select * from t1 natural right join t2 where t2.a is not null; +a b +1 1 +3 3 +explain extended select * from t1 natural full join t2 where t2.a is not null; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 Using where +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Note 1003 select coalesce(`test`.`t1`.`a`,`test`.`t2`.`a`) AS `a`,coalesce(`test`.`t1`.`b`,`test`.`t2`.`b`) AS `b` from `test`.`t2` left join `test`.`t1` on(`test`.`t1`.`a` = `test`.`t2`.`a` and `test`.`t1`.`b` = `test`.`t2`.`b`) where `test`.`t2`.`a` is not null +select coalesce(t1.a, t2.a) AS a, coalesce(t1.b, t2.b) AS b from +t2 left join t1 on t1.a = t2.a and t1.b = t2.b where t2.a is not null; +a b +1 1 +3 3 +select * from (t1 natural join t2) right join t2 t3 on t1.a=t3.a; +a b a b +1 1 1 1 +NULL NULL 3 3 +select * from (t1 natural full join t2) right join t2 t3 on t1.a=t3.a; +ERROR HY000: FULL JOIN is not allowed on the inner side of a LEFT or RIGHT JOIN +select t1.a from t1 natural full join (t2 natural join t3) where +t1.a is not null; +a +1 +2 +select t1.a from t1 natural left join (t2 natural join t3) where t1.a is not null +union +select t1.a from t1 natural right join (t2 natural join t3) where t1.a is not null; +a +1 +2 +explain extended select t1.a from +t1 natural full join (t2 natural join t3) where t1.a is not null; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (flat, BNL join) +1 SIMPLE t3 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (incremental, BNL join) +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` left join (`test`.`t2` join `test`.`t3`) on(`test`.`t2`.`a` = `test`.`t1`.`a` and `test`.`t3`.`a` = `test`.`t1`.`a` and `test`.`t2`.`b` = `test`.`t1`.`b` and `test`.`t3`.`b` = `test`.`t1`.`b`) where `test`.`t1`.`a` is not null +select t1.a AS a from t1 left join (t2 join t3) on +t2.a = t1.a and t3.a = t1.a and t2.b = t1.b and t3.b = t1.b where +t1.a is not null; +a +1 +2 +explain extended select *, avg(t2.a) from t1 natural full join t2 group by t1.a; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 Using temporary; Using filesort +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select coalesce(`test`.`t1`.`a`,`test`.`t2`.`a`) AS `a`,coalesce(`test`.`t1`.`b`,`test`.`t2`.`b`) AS `b`,avg(`test`.`t2`.`a`) AS `avg(t2.a)` from `test`.`t1` full join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a` and `test`.`t2`.`b` = `test`.`t1`.`b`) where 1 group by `test`.`t1`.`a` +select *, avg(t2.a) from t1 natural full join t2 where t1.a is not null group by t1.a; +a b avg(t2.a) +1 1 1.0000 +2 2 NULL +# UNION equivalent of the aggregate above. +select dt.a, dt.b, avg(dt.t2a) as `avg(t2.a)` from ( +select t1.a as a, t1.b as b, t2.a as t2a from t1 natural left join t2 +union +select t1.a, t1.b, t2.a from t1 natural right join t2) dt +where dt.a is not null group by dt.a; +a b avg(t2.a) +1 1 1.0000 +2 2 NULL +explain extended select *, avg(t2.a) from t1 natural full join t2 where +t1.a is not null group by t1.a; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 Using where; Using temporary; Using filesort +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Note 1003 select coalesce(`test`.`t1`.`a`,`test`.`t2`.`a`) AS `a`,coalesce(`test`.`t1`.`b`,`test`.`t2`.`b`) AS `b`,avg(`test`.`t2`.`a`) AS `avg(t2.a)` from `test`.`t1` left join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a` and `test`.`t2`.`b` = `test`.`t1`.`b`) where `test`.`t1`.`a` is not null group by `test`.`t1`.`a` +select coalesce(t1.a, t2.a) AS a, coalesce(t1.b, t2.b) AS b, +avg(t2.a) AS avg_t2_a from +t1 left join t2 on t2.a = t1.a and t2.b = t1.b where +t1.a is not null group by t1.a; +a b avg_t2_a +1 1 1.0000 +2 2 NULL +# -------------------------------------------------------- +# 4b: unqualified references see the COALESCE +# +# An unqualified name of a NATURAL FULL JOIN common column +# resolves to COALESCE(left_col, right_col) wherever it +# appears -- WHERE, HAVING, GROUP BY, ORDER BY, USING, +# views, prepared statements, and outer queries over a +# derived table. Qualified names like t1.a stay raw. +# -------------------------------------------------------- +drop table t1, t2, t3; +create table t1 (a int, b int); +create table t2 (a int, c int); +insert into t1 (a,b) values (1,10),(2,20); +insert into t2 (a,c) values (2,200),(3,300); +select * from t1 natural full join t2 where a = 3; +a b c +3 NULL 300 +select coalesce(t1.a, t2.a) AS a, t1.b, t2.c from +t1 left join t2 on t1.a = t2.a where coalesce(t1.a, t2.a) = 3 +union +select coalesce(t1.a, t2.a) AS a, t1.b, t2.c from +t1 right join t2 on t1.a = t2.a where coalesce(t1.a, t2.a) = 3; +a b c +3 NULL 300 +select * from t1 natural full join t2 where a = 1; +a b c +1 10 NULL +select coalesce(t1.a, t2.a) AS a, t1.b, t2.c from +t1 left join t2 on t1.a = t2.a where coalesce(t1.a, t2.a) = 1 +union +select coalesce(t1.a, t2.a) AS a, t1.b, t2.c from +t1 right join t2 on t1.a = t2.a where coalesce(t1.a, t2.a) = 1; +a b c +1 10 NULL +select * from t1 natural full join t2 where a = 2; +a b c +2 20 200 +select * from t1 natural full join t2 where t2.a is null; +a b c +1 10 NULL +select a, count(*) from t1 natural full join t2 group by a; +a count(*) +1 1 +2 1 +3 1 +select a, count(*) from t1 natural full join t2 group by a having a = 3; +a count(*) +3 1 +select a from t1 natural full join t2 order by a; +a +1 +2 +3 +select * from t1 full join t2 using(a) where a = 3; +a b c +3 NULL 300 +select * from (select * from t1 natural full join t2) dt where a = 3; +a b c +3 NULL 300 +select * from t1 natural full join t2 where a in (select 3); +a b c +3 NULL 300 +create view v1 as select * from t1 natural full join t2; +select * from v1 where a = 3; +a b c +3 NULL 300 +drop view v1; +prepare stmt from 'select * from t1 natural full join t2 where a = ?'; +set @v = 3; +execute stmt using @v; +a b c +3 NULL 300 +set @v = 1; +execute stmt using @v; +a b c +1 10 NULL +set @v = 2; +execute stmt using @v; +a b c +2 20 200 +deallocate prepare stmt; +create procedure sp_find_a(in p int) +select * from t1 natural full join t2 where a = p| +call sp_find_a(3); +a b c +3 NULL 300 +call sp_find_a(1); +a b c +1 10 NULL +drop procedure sp_find_a; +select * from t1 natural left join t2 where a = 1; +a b c +1 10 NULL +select * from t1 natural right join t2 where a = 3; +a c b +3 300 NULL +create table t3 (a int, d int); +insert into t3 (a,d) values (1,1000),(3,3000); +select * from (t1 natural full join t2) join t3 on t1.a = t3.a +order by t3.a; +a b c a d +1 10 NULL 1 1000 +drop table t1, t2, t3; +# -------------------------------------------------------- +# 4c: chained NATURAL FULL JOIN nests the COALESCE +# +# (t1 NFJ t2) NFJ t3 is left-associative, so the common +# column of the inner join is COALESCE(t1.a, t2.a) and the +# outer join coalesces that with t3.a, giving +# COALESCE(COALESCE(t1.a, t2.a), t3.a). The inner COALESCE +# must drive both the output column and the synthesized +# outer join equality, otherwise a row that came only from +# t2 carries a NULL a into the outer join and fails to match +# an equal t3 row. +# -------------------------------------------------------- +create table t1 (a int, b int); +create table t2 (a int, c int); +create table t3 (a int, d int); +create table t4 (a int, e int); +insert into t1 (a,b) values (1,10),(2,20); +insert into t2 (a,c) values (2,200),(5,500),(6,600); +insert into t3 (a,d) values (5,5000),(4,4000); +insert into t4 (a,e) values (6,60000),(7,70000); +# No UNION companion: the LEFT/RIGHT permutation UNION form +# over-approximates for chained joins (see the note above +# Section 6). The recorded result is the oracle. +# a=5 lives in t2 and t3; the inner COALESCE carries 5 from +# t2 into the outer equality so the two rows match. a=6 is +# t2-only with no t3 match and must surface with a=6, not +# NULL. a=1 is t1-only, a=4 is t3-only. +select a, b, c, d from (t1 natural full join t2) natural full join t3; +a b c d +1 10 NULL NULL +2 20 200 NULL +4 NULL NULL 4000 +5 NULL 500 5000 +6 NULL 600 NULL +# The unqualified a in WHERE sees the nested COALESCE. +select a, b, c, d from (t1 natural full join t2) natural full join t3 +where a = 6; +a b c d +6 NULL 600 NULL +# Three levels deep: a=6 originates in t2, survives the t3 +# level as the nested COALESCE, and matches t4 at the third +# level. a=7 is t4-only. +select a, b, c, d, e from +((t1 natural full join t2) natural full join t3) natural full join t4; +a b c d e +1 10 NULL NULL NULL +2 20 200 NULL NULL +4 NULL NULL 4000 NULL +5 NULL 500 5000 NULL +6 NULL 600 NULL 60000 +7 NULL NULL NULL 70000 +drop table t1, t2, t3, t4; +# -------------------------------------------------------- +# 4d: node aliasing of the shared COALESCE +# +# The COALESCE built for a common column is shared by every +# reference to that column. The oracle for each query is the +# explicit FULL JOIN with COALESCE(t1.col, t2.col) written out, +# so the two row sets match when the unqualified name resolves +# to that COALESCE. +# -------------------------------------------------------- +create table t1 (a int, b int); +create table t2 (a int, c int); +insert into t1 (a,b) values (1,10),(2,20); +insert into t2 (a,c) values (2,200),(3,300); +# An explicit alias must name only its own reference. The +# COALESCE is shared, so renaming it in place would change the +# column name for resolution and would make a second reference +# by the original name fail. The original name and an aliased +# reference appear together here with distinct names. +select a, a as a2 from t1 natural full join t2; +a a2 +1 1 +2 2 +3 3 +select coalesce(t1.a, t2.a) as a, coalesce(t1.a, t2.a) as a2 +from t1 full join t2 on t1.a = t2.a; +a a2 +1 1 +2 2 +3 3 +# Aliased reference first, then the original name in WHERE. +select a as a2 from t1 natural full join t2 where a >= 2; +a2 +2 +3 +select coalesce(t1.a, t2.a) as a2 from t1 full join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) >= 2; +a2 +2 +3 +# GROUP BY, COUNT, and SUM over the coalesced column. +select a, count(*) as cnt, sum(a) as s from t1 natural full join t2 +group by a; +a cnt s +1 1 1 +2 1 2 +3 1 3 +select coalesce(t1.a, t2.a) as a, count(*) as cnt, +sum(coalesce(t1.a, t2.a)) as s +from t1 full join t2 on t1.a = t2.a group by coalesce(t1.a, t2.a); +a cnt s +1 1 1 +2 1 2 +3 1 3 +# Aggregates directly over the coalesced column, no GROUP BY. +select count(distinct a) as cd, min(a) as mn, max(a) as mx +from t1 natural full join t2; +cd mn mx +3 1 3 +select count(distinct coalesce(t1.a, t2.a)) as cd, +min(coalesce(t1.a, t2.a)) as mn, max(coalesce(t1.a, t2.a)) as mx +from t1 full join t2 on t1.a = t2.a; +cd mn mx +3 1 3 +# HAVING over the coalesced column. +select a, count(*) as cnt from t1 natural full join t2 +group by a having a >= 2; +a cnt +2 1 +3 1 +select * from (select coalesce(t1.a, t2.a) as a, count(*) as cnt +from t1 full join t2 on t1.a = t2.a group by coalesce(t1.a, t2.a)) dt +where a >= 2; +a cnt +2 1 +3 1 +# Window functions over the coalesced column. +select a, row_number() over (order by a) as rn, count(*) over () as tot +from t1 natural full join t2 order by a; +a rn tot +1 1 3 +2 2 3 +3 3 3 +select coalesce(t1.a, t2.a) as a, +row_number() over (order by coalesce(t1.a, t2.a)) as rn, +count(*) over () as tot +from t1 full join t2 on t1.a = t2.a order by coalesce(t1.a, t2.a); +a rn tot +1 1 3 +2 2 3 +3 3 3 +# DISTINCT over the coalesced column. +select distinct a from t1 natural full join t2; +a +1 +2 +3 +select distinct coalesce(t1.a, t2.a) as a +from t1 full join t2 on t1.a = t2.a; +a +1 +2 +3 +# The same column in SELECT, WHERE, and ORDER BY at once. +select a, a + 0 as a2 from t1 natural full join t2 +where a is not null order by a desc; +a a2 +3 3 +2 2 +1 1 +select coalesce(t1.a, t2.a) as a, coalesce(t1.a, t2.a) + 0 as a2 +from t1 full join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) is not null +order by coalesce(t1.a, t2.a) desc; +a a2 +3 3 +2 2 +1 1 +drop table t1, t2; +# -------------------------------------------------------- +# 4e: multi-column NATURAL FULL JOIN +# +# Two common columns each get their own COALESCE. Unqualified +# references to either resolve to its COALESCE, including under +# aliases. +# -------------------------------------------------------- +create table t1 (a int, b int, x int); +create table t2 (a int, b int, y int); +insert into t1 values (1,1,10),(2,2,20); +insert into t2 values (2,2,200),(3,3,300); +select a, b, x, y from t1 natural full join t2; +a b x y +1 1 10 NULL +2 2 20 200 +3 3 NULL 300 +select coalesce(t1.a, t2.a) as a, coalesce(t1.b, t2.b) as b, x, y +from t1 full join t2 on t1.a = t2.a and t1.b = t2.b; +a b x y +1 1 10 NULL +2 2 20 200 +3 3 NULL 300 +# Aliased references to both common columns, with the original +# names used again in WHERE. +select a as ka, b as kb from t1 natural full join t2 +where a >= 2 and b >= 2; +ka kb +2 2 +3 3 +select coalesce(t1.a, t2.a) as ka, coalesce(t1.b, t2.b) as kb +from t1 full join t2 on t1.a = t2.a and t1.b = t2.b +where coalesce(t1.a, t2.a) >= 2 and coalesce(t1.b, t2.b) >= 2; +ka kb +2 2 +3 3 +drop table t1, t2; +# ======================================================== +# Section 5: NULL handling +# +# NULL = NULL is false, so rows whose join key is NULL never +# match and must surface from their side unmatched. The +# NULL-safe <=> operator matches NULL to NULL. +# ======================================================== +create table t1 (a int, b int); +insert into t1 values (NULL, NULL), (1, 10), (NULL, NULL); +create table t2 (a int, b int); +insert into t2 values (NULL, NULL), (2, 20), (NULL, NULL); +# Both sides have all-NULL rows; no match possible. +select * from t1 full join t2 on t1.a = t2.a; +a b a b +1 10 NULL NULL +NULL NULL 2 20 +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +# The UNION formulation eliminates duplicate all-NULL rows; this +# is expected to differ. PostgreSQL agrees with the FULL JOIN +# result above. +select * from t1 left join t2 on t1.a = t2.a union select * from t1 right join t2 on t1.a = t2.a; +a b a b +NULL NULL NULL NULL +1 10 NULL NULL +NULL NULL 2 20 +# IS NULL in the ON clause — all-NULL rows now match. +select * from t1 full join t2 on t1.a is null and t2.a is null; +a b a b +1 10 NULL NULL +NULL NULL 2 20 +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +select * from t1 left join t2 on t1.a is null and t2.a is null union select * from t1 right join t2 on t1.a is null and t2.a is null; +a b a b +1 10 NULL NULL +NULL NULL 2 20 +NULL NULL NULL NULL +# NULL-safe equality operator (<=>). +select * from t1 full join t2 on t1.a <=> t2.a; +a b a b +1 10 NULL NULL +NULL NULL 2 20 +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +select * from t1 left join t2 on t1.a <=> t2.a union select * from t1 right join t2 on t1.a <=> t2.a; +a b a b +1 10 NULL NULL +NULL NULL 2 20 +NULL NULL NULL NULL +# Table with only all-NULL rows on one side. +create table t3 (a int, b int); +insert into t3 values (NULL, NULL), (NULL, NULL); +select * from t1 full join t3 on t1.a = t3.a; +a b a b +1 10 NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL +select * from t1 left join t3 on t1.a = t3.a union select * from t1 right join t3 on t1.a = t3.a; +a b a b +1 10 NULL NULL +NULL NULL NULL NULL +drop table t1, t2, t3; +# ======================================================== +# Section 6: Deeply nested FULL JOINs +# ======================================================== +create table t1 (a int); +insert into t1 values (1), (2); +create table t2 (a int); +insert into t2 values (2), (3); +create table t3 (a int); +insert into t3 values (3), (4); +create table t4 (a int); +insert into t4 values (1), (4); +create table t5 (a int); +insert into t5 values (2), (5); +# The LEFT/RIGHT-permutation UNION form is not a valid oracle +# for chained FULL JOINs: it over-approximates by emitting a +# right-side null-complement row for C rows that were already +# matched against the inner FJ's own null-complement row. Per +# SQL:2016 §7.10, (A FJ B) FJ C is left-associative and treats +# R1 = (A FJ B) as a single relation; a C row matched against +# any R1 row (including a null-complement row) is matched, and +# must not appear again as unmatched. Therefore the chained +# cases below have no UNION companion; the recorded result is +# the oracle. +# Three-level nested FULL JOINs. +select * from t1 +full join t2 on t1.a = t2.a +full join t3 on t2.a = t3.a; +a a a +1 NULL NULL +2 2 NULL +NULL 3 3 +NULL NULL 4 +# Four-level chained FULL JOINs. +select * from t1 +full join t2 on t1.a = t2.a +full join t3 on t2.a = t3.a +full join t4 on t3.a = t4.a; +a a a a +1 NULL NULL NULL +2 2 NULL NULL +NULL 3 3 NULL +NULL NULL 4 4 +NULL NULL NULL 1 +# Mixed FULL and INNER joins, deeply nested. +select * from t1 +inner join t2 on t1.a = t2.a +full join t3 on t2.a = t3.a +full join t4 on t3.a = t4.a; +a a a a +2 2 NULL NULL +NULL NULL 3 NULL +NULL NULL 4 4 +NULL NULL NULL 1 +drop table t1, t2, t3, t4, t5; +# ======================================================== +# Section 7: Mixed data types +# ======================================================== +create table t1 ( +id int, +str_val varchar(20), +dec_val decimal(10,2), +dt_val date +); +insert into t1 values +(1, 'hello', 10.50, '2024-01-01'), +(2, 'world', 20.75, '2024-06-15'), +(3, NULL, NULL, NULL); +create table t2 ( +id int, +str_val varchar(20), +dec_val decimal(10,2), +dt_val date +); +insert into t2 values +(2, 'WORLD', 20.75, '2024-06-15'), +(4, 'test', 99.99, '2025-12-31'), +(NULL, NULL, NULL, NULL); +# FULL JOIN on integer column with mixed-type rows. +select * from t1 full join t2 on t1.id = t2.id; +id str_val dec_val dt_val id str_val dec_val dt_val +1 hello 10.50 2024-01-01 NULL NULL NULL NULL +2 world 20.75 2024-06-15 2 WORLD 20.75 2024-06-15 +3 NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL 4 test 99.99 2025-12-31 +NULL NULL NULL NULL NULL NULL NULL NULL +select * from t1 left join t2 on t1.id = t2.id union select * from t1 right join t2 on t1.id = t2.id; +id str_val dec_val dt_val id str_val dec_val dt_val +1 hello 10.50 2024-01-01 NULL NULL NULL NULL +2 world 20.75 2024-06-15 2 WORLD 20.75 2024-06-15 +3 NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL 4 test 99.99 2025-12-31 +NULL NULL NULL NULL NULL NULL NULL NULL +# FULL JOIN on varchar column (case-sensitive match depends on collation). +select t1.id as id1, t1.str_val as sv1, t2.id as id2, t2.str_val as sv2 +from t1 full join t2 on t1.str_val = t2.str_val; +id1 sv1 id2 sv2 +1 hello NULL NULL +2 world 2 WORLD +3 NULL NULL NULL +NULL NULL 4 test +NULL NULL NULL NULL +select t1.id as id1, t1.str_val as sv1, t2.id as id2, t2.str_val as sv2 +from t1 left join t2 on t1.str_val = t2.str_val +union +select t1.id as id1, t1.str_val as sv1, t2.id as id2, t2.str_val as sv2 +from t1 right join t2 on t1.str_val = t2.str_val; +id1 sv1 id2 sv2 +1 hello NULL NULL +2 world 2 WORLD +3 NULL NULL NULL +NULL NULL 4 test +NULL NULL NULL NULL +# FULL JOIN on decimal column. +select t1.id as id1, t1.dec_val as d1, t2.id as id2, t2.dec_val as d2 +from t1 full join t2 on t1.dec_val = t2.dec_val; +id1 d1 id2 d2 +1 10.50 NULL NULL +2 20.75 2 20.75 +3 NULL NULL NULL +NULL NULL 4 99.99 +NULL NULL NULL NULL +select t1.id as id1, t1.dec_val as d1, t2.id as id2, t2.dec_val as d2 +from t1 left join t2 on t1.dec_val = t2.dec_val +union +select t1.id as id1, t1.dec_val as d1, t2.id as id2, t2.dec_val as d2 +from t1 right join t2 on t1.dec_val = t2.dec_val; +id1 d1 id2 d2 +1 10.50 NULL NULL +2 20.75 2 20.75 +3 NULL NULL NULL +NULL NULL 4 99.99 +NULL NULL NULL NULL +# FULL JOIN on date column. +select t1.id as id1, t1.dt_val as dt1, t2.id as id2, t2.dt_val as dt2 +from t1 full join t2 on t1.dt_val = t2.dt_val; +id1 dt1 id2 dt2 +1 2024-01-01 NULL NULL +2 2024-06-15 2 2024-06-15 +3 NULL NULL NULL +NULL NULL 4 2025-12-31 +NULL NULL NULL NULL +select t1.id as id1, t1.dt_val as dt1, t2.id as id2, t2.dt_val as dt2 +from t1 left join t2 on t1.dt_val = t2.dt_val +union +select t1.id as id1, t1.dt_val as dt1, t2.id as id2, t2.dt_val as dt2 +from t1 right join t2 on t1.dt_val = t2.dt_val; +id1 dt1 id2 dt2 +1 2024-01-01 NULL NULL +2 2024-06-15 2 2024-06-15 +3 NULL NULL NULL +NULL NULL 4 2025-12-31 +NULL NULL NULL NULL +# FULL JOIN with cross-type comparison (int vs decimal). +create table t3 (a int); +insert into t3 values (1), (2), (3); +create table t4 (a decimal(5,1)); +insert into t4 values (1.0), (2.5), (3.0); +select * from t3 full join t4 on t3.a = t4.a; +a a +1 1.0 +2 NULL +3 3.0 +NULL 2.5 +select * from t3 left join t4 on t3.a = t4.a union select * from t3 right join t4 on t3.a = t4.a; +a a +1 1.0 +2 NULL +3 3.0 +NULL 2.5 +# FULL JOIN with cross-type comparison (int vs varchar). +create table t5 (a varchar(10)); +insert into t5 values ('1'), ('2'), ('four'); +select * from t3 full join t5 on t3.a = t5.a; +a a +1 1 +2 2 +3 NULL +NULL four +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: 'four' +Warning 1292 Truncated incorrect DECIMAL value: 'four' +Warning 1292 Truncated incorrect DECIMAL value: 'four' +select * from t3 left join t5 on t3.a = t5.a union select * from t3 right join t5 on t3.a = t5.a; +a a +1 1 +2 2 +3 NULL +NULL four +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: 'four' +Warning 1292 Truncated incorrect DECIMAL value: 'four' +Warning 1292 Truncated incorrect DECIMAL value: 'four' +Warning 1292 Truncated incorrect DECIMAL value: 'four' +Warning 1292 Truncated incorrect DECIMAL value: 'four' +Warning 1292 Truncated incorrect DECIMAL value: 'four' +# FULL JOIN on multiple mixed-type columns simultaneously. +select * from t1 full join t2 +on t1.id = t2.id and t1.dec_val = t2.dec_val; +id str_val dec_val dt_val id str_val dec_val dt_val +1 hello 10.50 2024-01-01 NULL NULL NULL NULL +2 world 20.75 2024-06-15 2 WORLD 20.75 2024-06-15 +3 NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL 4 test 99.99 2025-12-31 +NULL NULL NULL NULL NULL NULL NULL NULL +select * from t1 left join t2 +on t1.id = t2.id and t1.dec_val = t2.dec_val +union +select * from t1 right join t2 +on t1.id = t2.id and t1.dec_val = t2.dec_val; +id str_val dec_val dt_val id str_val dec_val dt_val +1 hello 10.50 2024-01-01 NULL NULL NULL NULL +2 world 20.75 2024-06-15 2 WORLD 20.75 2024-06-15 +3 NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL 4 test 99.99 2025-12-31 +NULL NULL NULL NULL NULL NULL NULL NULL +drop table t1, t2, t3, t4, t5; +# ======================================================== +# Section 8: Aggregates with FULL JOIN +# ======================================================== +create table t1 (grp char(1), val int); +insert into t1 values ('a',10), ('a',20), ('b',30), ('c',40); +create table t2 (grp char(1), val int); +insert into t2 values ('b',100), ('c',200), ('c',300), ('d',400); +# COUNT and SUM over a FULL JOIN. +select coalesce(t1.grp, t2.grp) as grp, +count(*) as cnt, +sum(t1.val) as s1, +sum(t2.val) as s2 +from t1 full join t2 on t1.grp = t2.grp +group by coalesce(t1.grp, t2.grp); +grp cnt s1 s2 +a 2 30 NULL +b 1 30 100 +c 2 80 500 +d 1 NULL 400 +select coalesce(dt.grp1, dt.grp2) as grp, +count(*) as cnt, +sum(dt.val1) as s1, +sum(dt.val2) as s2 +from (select t1.grp as grp1, t1.val as val1, t2.grp as grp2, t2.val as val2 +from t1 left join t2 on t1.grp = t2.grp +union +select t1.grp, t1.val, t2.grp, t2.val +from t1 right join t2 on t1.grp = t2.grp) dt +group by coalesce(dt.grp1, dt.grp2); +grp cnt s1 s2 +a 2 30 NULL +b 1 30 100 +c 2 80 500 +d 1 NULL 400 +# AVG and MAX over a FULL JOIN. +select coalesce(t1.grp, t2.grp) as grp, +avg(t1.val) as avg1, +max(t2.val) as max2 +from t1 full join t2 on t1.grp = t2.grp +group by coalesce(t1.grp, t2.grp); +grp avg1 max2 +a 15.0000 NULL +b 30.0000 100 +c 40.0000 300 +d NULL 400 +select coalesce(dt.grp1, dt.grp2) as grp, +avg(dt.val1) as avg1, +max(dt.val2) as max2 +from (select t1.grp as grp1, t1.val as val1, t2.grp as grp2, t2.val as val2 +from t1 left join t2 on t1.grp = t2.grp +union +select t1.grp, t1.val, t2.grp, t2.val +from t1 right join t2 on t1.grp = t2.grp) dt +group by coalesce(dt.grp1, dt.grp2); +grp avg1 max2 +a 15.0000 NULL +b 30.0000 100 +c 40.0000 300 +d NULL 400 +# HAVING clause with FULL JOIN aggregate. +select coalesce(t1.grp, t2.grp) as grp, +count(*) as cnt +from t1 full join t2 on t1.grp = t2.grp +group by coalesce(t1.grp, t2.grp) +having count(*) > 1; +grp cnt +a 2 +c 2 +select coalesce(t1.grp, t2.grp) as grp, +count(*) as cnt from t1 +left join t2 on t1.grp = t2.grp group by +coalesce(t1.grp, t2.grp) having count(*) > 1 +union +select coalesce(t1.grp, t2.grp) as grp, +count(*) as cnt from t1 +right join t2 on t1.grp = t2.grp group by +coalesce(t1.grp, t2.grp) having count(*) > 1; +grp cnt +a 2 +c 2 +# COUNT(*) with no GROUP BY — total row count of the FULL JOIN. +select count(*) from t1 full join t2 on t1.grp = t2.grp; +count(*) +6 +select count(*) from (select t1.grp as g1, t1.val as v1, t2.grp as g2, t2.val as v2 +from t1 left join t2 on t1.grp = t2.grp +union +select t1.grp, t1.val, t2.grp, t2.val +from t1 right join t2 on t1.grp = t2.grp) dt; +count(*) +6 +# GROUP_CONCAT over a FULL JOIN. +select coalesce(t1.grp, t2.grp) as grp, +group_concat(t1.val order by t1.val) as vals1, +group_concat(t2.val order by t2.val) as vals2 +from t1 full join t2 on t1.grp = t2.grp +group by coalesce(t1.grp, t2.grp); +grp vals1 vals2 +a 10,20 NULL +b 30 100 +c 40,40 200,300 +d NULL 400 +select coalesce(dt.grp1, dt.grp2) as grp, +group_concat(dt.val1 order by dt.val1) as vals1, +group_concat(dt.val2 order by dt.val2) as vals2 +from (select t1.grp as grp1, t1.val as val1, t2.grp as grp2, t2.val as val2 +from t1 left join t2 on t1.grp = t2.grp +union +select t1.grp, t1.val, t2.grp, t2.val +from t1 right join t2 on t1.grp = t2.grp) dt +group by coalesce(dt.grp1, dt.grp2); +grp vals1 vals2 +a 10,20 NULL +b 30 100 +c 40,40 200,300 +d NULL 400 +drop table t1, t2; +# ======================================================== +# Section 9: Window functions with FULL JOIN +# +# Adapted from main.win: ROW_NUMBER, RANK, DENSE_RANK, +# LEAD/LAG, and aggregate windows (SUM/COUNT with frames) +# applied to FULL JOIN result sets. +# ======================================================== +create table t1 (a int, grp int, val int); +insert into t1 values (1,10,100), (2,10,200), (3,20,300), (4,20,400); +create table t2 (a int, grp int, val int); +insert into t2 values (3,20,3000), (4,20,4000), (5,30,5000), (6,30,6000); +# ROW_NUMBER() over a FULL JOIN ordered by the coalesced key. +select coalesce(t1.a, t2.a) as a, +row_number() over (order by coalesce(t1.a, t2.a)) as rn +from t1 full join t2 on t1.a = t2.a +order by a; +a rn +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +# Equivalent: row-number over LEFT UNION RIGHT. +select a, row_number() over (order by a) as rn +from (select coalesce(t1.a, t2.a) as a +from t1 left join t2 on t1.a = t2.a +union +select coalesce(t1.a, t2.a) +from t1 right join t2 on t1.a = t2.a) u +order by a; +a rn +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +# RANK() with PARTITION BY over FULL JOIN. +select coalesce(t1.grp, t2.grp) as grp, +coalesce(t1.val, 0) + coalesce(t2.val, 0) as v, +rank() over (partition by coalesce(t1.grp, t2.grp) +order by coalesce(t1.val, 0) + coalesce(t2.val, 0)) as rk +from t1 full join t2 on t1.a = t2.a; +grp v rk +10 100 1 +10 200 2 +20 3300 1 +20 4400 2 +30 5000 1 +30 6000 2 +select coalesce(t1grp, t2grp) as grp, +coalesce(t1val, 0) + coalesce(t2val, 0) as v, +rank() over (partition by coalesce(t1grp, t2grp) +order by coalesce(t1val, 0) + coalesce(t2val, 0)) as rk +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, +t2.a as t2a, t2.grp as t2grp, t2.val as t2val +from t1 left join t2 on t1.a = t2.a +union +select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val +from t1 right join t2 on t1.a = t2.a) u; +grp v rk +10 100 1 +10 200 2 +20 3300 1 +20 4400 2 +30 5000 1 +30 6000 2 +# DENSE_RANK() over FULL JOIN. +select coalesce(t1.grp, t2.grp) as grp, +dense_rank() over (order by coalesce(t1.grp, t2.grp)) as dr +from t1 full join t2 on t1.a = t2.a; +grp dr +10 1 +10 1 +20 2 +20 2 +30 3 +30 3 +select coalesce(t1grp, t2grp) as grp, +dense_rank() over (order by coalesce(t1grp, t2grp)) as dr +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, +t2.a as t2a, t2.grp as t2grp, t2.val as t2val +from t1 left join t2 on t1.a = t2.a +union +select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val +from t1 right join t2 on t1.a = t2.a) u; +grp dr +10 1 +10 1 +20 2 +20 2 +30 3 +30 3 +# LEAD() and LAG() over FULL JOIN. +select coalesce(t1.a, t2.a) as a, +lag(coalesce(t1.a, t2.a)) +over (order by coalesce(t1.a, t2.a)) as prev_a, +lead(coalesce(t1.a, t2.a)) +over (order by coalesce(t1.a, t2.a)) as next_a +from t1 full join t2 on t1.a = t2.a +order by a; +a prev_a next_a +1 NULL 2 +2 1 3 +3 2 4 +4 3 5 +5 4 6 +6 5 NULL +select coalesce(t1a, t2a) as a, +lag(coalesce(t1a, t2a)) +over (order by coalesce(t1a, t2a)) as prev_a, +lead(coalesce(t1a, t2a)) +over (order by coalesce(t1a, t2a)) as next_a +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, +t2.a as t2a, t2.grp as t2grp, t2.val as t2val +from t1 left join t2 on t1.a = t2.a +union +select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val +from t1 right join t2 on t1.a = t2.a) u +order by a; +a prev_a next_a +1 NULL 2 +2 1 3 +3 2 4 +4 3 5 +5 4 6 +6 5 NULL +# SUM() window with rows-BETWEEN frame over FULL JOIN. +select coalesce(t1.a, t2.a) as a, +coalesce(t1.val, 0) + coalesce(t2.val, 0) as v, +sum(coalesce(t1.val, 0) + coalesce(t2.val, 0)) +over (order by coalesce(t1.a, t2.a) +rows between 1 preceding and 1 following) as window_sum +from t1 full join t2 on t1.a = t2.a +order by a; +a v window_sum +1 100 300 +2 200 3600 +3 3300 7900 +4 4400 12700 +5 5000 15400 +6 6000 11000 +select coalesce(t1a, t2a) as a, +coalesce(t1val, 0) + coalesce(t2val, 0) as v, +sum(coalesce(t1val, 0) + coalesce(t2val, 0)) +over (order by coalesce(t1a, t2a) +rows between 1 preceding and 1 following) as window_sum +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, +t2.a as t2a, t2.grp as t2grp, t2.val as t2val +from t1 left join t2 on t1.a = t2.a +union +select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val +from t1 right join t2 on t1.a = t2.a) u +order by a; +a v window_sum +1 100 300 +2 200 3600 +3 3300 7900 +4 4400 12700 +5 5000 15400 +6 6000 11000 +# COUNT() window partitioned by group, ordered within group. +select coalesce(t1.grp, t2.grp) as grp, +coalesce(t1.a, t2.a) as a, +count(*) over (partition by coalesce(t1.grp, t2.grp) +order by coalesce(t1.a, t2.a) +rows between unbounded preceding and current row) as cnt +from t1 full join t2 on t1.a = t2.a; +grp a cnt +10 1 1 +10 2 2 +20 3 1 +20 4 2 +30 5 1 +30 6 2 +select coalesce(t1grp, t2grp) as grp, +coalesce(t1a, t2a) as a, +count(*) over (partition by coalesce(t1grp, t2grp) +order by coalesce(t1a, t2a) +rows between unbounded preceding and current row) as cnt +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, +t2.a as t2a, t2.grp as t2grp, t2.val as t2val +from t1 left join t2 on t1.a = t2.a +union +select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val +from t1 right join t2 on t1.a = t2.a) u; +grp a cnt +10 1 1 +10 2 2 +20 3 1 +20 4 2 +30 5 1 +30 6 2 +# Window function combined with GROUP BY on the FULL JOIN. +# Exercises the AGGR_OP::end_send path after the null-complement +# pass completes (this previously asserted in create_sort_index). +select coalesce(t1.grp, t2.grp) as grp, +sum(coalesce(t1.val, 0) + coalesce(t2.val, 0)) as s, +rank() over (order by sum(coalesce(t1.val, 0) + coalesce(t2.val, 0))) as rk +from t1 full join t2 on t1.a = t2.a +group by coalesce(t1.grp, t2.grp); +grp s rk +10 300 1 +20 7700 2 +30 11000 3 +select coalesce(t1grp, t2grp) as grp, +sum(coalesce(t1val, 0) + coalesce(t2val, 0)) as s, +rank() over (order by sum(coalesce(t1val, 0) + coalesce(t2val, 0))) as rk +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, +t2.a as t2a, t2.grp as t2grp, t2.val as t2val +from t1 left join t2 on t1.a = t2.a +union +select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val +from t1 right join t2 on t1.a = t2.a) u +group by coalesce(t1grp, t2grp); +grp s rk +10 300 1 +20 7700 2 +30 11000 3 +drop table t1, t2; +# ======================================================== +# Section 10: CTEs +# ======================================================== +create table t1 (id int, val varchar(10)); +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (id int, val varchar(10)); +insert into t2 values (2,'x'), (3,'y'), (4,'z'); +# Simple CTE wrapping a FULL JOIN. +with fj as ( +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 full join t2 on t1.id = t2.id +) +select * from fj; +id1 v1 id2 v2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +select * from t1 left join t2 on t1.id = t2.id union select * from t1 right join t2 on t1.id = t2.id; +id val id val +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +# CTE on the left side of a FULL JOIN. +with vals as (select id, val from t1 where id <= 2) +select v1.id as id1, v1.val as val1, t2.id as id2, t2.val as val2 +from vals v1 full join t2 on v1.id = t2.id; +id1 val1 id2 val2 +1 a NULL NULL +2 b 2 x +NULL NULL 3 y +NULL NULL 4 z +with vals as (select id, val from t1 where id <= 2) +select v1.id as id1, v1.val as val1, t2.id as id2, t2.val as val2 +from vals v1 left join t2 on v1.id = t2.id +union +select v1.id as id1, v1.val as val1, t2.id as id2, t2.val as val2 +from vals v1 right join t2 on v1.id = t2.id; +id1 val1 id2 val2 +1 a NULL NULL +2 b 2 x +NULL NULL 3 y +NULL NULL 4 z +# Recursive CTE used in a FULL JOIN. +with recursive seq as ( +select 1 as n +union all +select n + 1 from seq where n < 4 +) +select s1.n as n1, s2.id as n2 +from seq s1 full join t2 s2 on s1.n = s2.id; +n1 n2 +1 NULL +2 2 +3 3 +4 4 +with recursive seq as ( +select 1 as n +union all +select n + 1 from seq where n < 4 +) +select s1.n as n1, s2.id as n2 +from seq s1 left join t2 s2 on s1.n = s2.id +union +select s1.n as n1, s2.id as n2 +from seq s1 right join t2 s2 on s1.n = s2.id; +n1 n2 +1 NULL +2 2 +3 3 +4 4 +# CTE on the left side of a FULL JOIN with filtering. +with left_cte as (select * from t1 where id in (1,2)) +select l.id as lid, l.val as lval, t2.id as rid, t2.val as rval +from left_cte l full join t2 on l.id = t2.id; +lid lval rid rval +1 a NULL NULL +2 b 2 x +NULL NULL 3 y +NULL NULL 4 z +with left_cte as (select * from t1 where id in (1,2)) +select l.id as lid, l.val as lval, t2.id as rid, t2.val as rval +from left_cte l left join t2 on l.id = t2.id +union +select l.id as lid, l.val as lval, t2.id as rid, t2.val as rval +from left_cte l right join t2 on l.id = t2.id; +lid lval rid rval +1 a NULL NULL +2 b 2 x +NULL NULL 3 y +NULL NULL 4 z +drop table t1, t2; +# ======================================================== +# Section 11: Views over FULL JOIN +# ======================================================== +create table t1 (a int, b int); +insert into t1 values (1,10), (2,20), (3,30); +create table t2 (a int, b int); +insert into t2 values (2,200), (3,300), (4,400); +# Simple view over a FULL JOIN. +create view v_full as +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +select * from v_full; +a1 b1 a2 b2 +1 10 NULL NULL +2 20 2 200 +3 30 3 300 +NULL NULL 4 400 +select * from t1 left join t2 on t1.a = t2.a union select * from t1 right join t2 on t1.a = t2.a; +a b a b +1 10 NULL NULL +2 20 2 200 +3 30 3 300 +NULL NULL 4 400 +# Query the view with additional filtering. +select * from v_full where a1 is not null and a2 is not null; +a1 b1 a2 b2 +2 20 2 200 +3 30 3 300 +select * from t1 inner join t2 on t1.a = t2.a; +a b a b +2 20 2 200 +3 30 3 300 +# View joined with another table via FULL JOIN. +create table t3 (a int, c varchar(10)); +insert into t3 values (1,'x'), (2,'y'), (4,'z'); +select v_full.a1, v_full.a2, t3.c +from v_full full join t3 on coalesce(v_full.a1, v_full.a2) = t3.a; +a1 a2 c +1 NULL x +2 2 y +3 3 NULL +NULL 4 z +# The LEFT JOIN is allowed (v_full is the outer side, so the +# FULL JOIN inside v_full is on the outer side of the LEFT JOIN). +# The RIGHT JOIN is equivalent to +# `t3 LEFT JOIN v_full`, which puts v_full (the FULL JOIN) on the +# inner side -- rejected. +select v_full.a1, v_full.a2, t3.c +from v_full left join t3 on coalesce(v_full.a1, v_full.a2) = t3.a; +a1 a2 c +1 NULL x +2 2 y +3 3 NULL +NULL 4 z +select v_full.a1, v_full.a2, t3.c +from v_full right join t3 on coalesce(v_full.a1, v_full.a2) = t3.a; +ERROR HY000: FULL JOIN is not allowed on the inner side of a LEFT or RIGHT JOIN +# View that filters the FULL JOIN result. +create view v_full_filtered as +select t1.a as a1, t2.a as a2 +from t1 full join t2 on t1.a = t2.a +where t1.a is not null; +select * from v_full_filtered; +a1 a2 +1 NULL +2 2 +3 3 +select t1.a as a1, t2.a as a2 from t1 left join t2 on t1.a = t2.a; +a1 a2 +1 NULL +2 2 +3 3 +drop view v_full, v_full_filtered; +drop table t1, t2, t3; +# ======================================================== +# Section 12: Prepared statements +# ======================================================== +create table t1 (a int, b varchar(10)); +insert into t1 values (1,'one'), (2,'two'), (3,'three'); +create table t2 (a int, b varchar(10)); +insert into t2 values (2,'TWO'), (3,'THREE'), (4,'FOUR'); +# Basic prepared statement with FULL JOIN. +prepare stmt1 from +'select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 + from t1 full join t2 on t1.a = t2.a'; +execute stmt1; +a1 b1 a2 b2 +1 one NULL NULL +2 two 2 TWO +3 three 3 THREE +NULL NULL 4 FOUR +select * from t1 left join t2 on t1.a = t2.a union select * from t1 right join t2 on t1.a = t2.a; +a b a b +1 one NULL NULL +2 two 2 TWO +3 three 3 THREE +NULL NULL 4 FOUR +# Re-execute to verify PS re-execution stability. +execute stmt1; +a1 b1 a2 b2 +1 one NULL NULL +2 two 2 TWO +3 three 3 THREE +NULL NULL 4 FOUR +deallocate prepare stmt1; +# Parameter in the ON clause. +prepare stmt2 from +'select t1.a as a1, t2.a as a2 + from t1 full join t2 on t1.a = t2.a and t1.a > ?'; +set @threshold = 1; +execute stmt2 using @threshold; +a1 a2 +1 NULL +2 2 +3 3 +NULL 4 +select t1.a as a1, t2.a as a2 +from t1 left join t2 on t1.a = t2.a and t1.a > 1 +union +select t1.a as a1, t2.a as a2 +from t1 right join t2 on t1.a = t2.a and t1.a > 1; +a1 a2 +1 NULL +2 2 +3 3 +NULL 4 +# Re-execute with a different parameter value. +set @threshold = 2; +execute stmt2 using @threshold; +a1 a2 +1 NULL +2 NULL +3 3 +NULL 2 +NULL 4 +select t1.a as a1, t2.a as a2 +from t1 left join t2 on t1.a = t2.a and t1.a > 2 +union +select t1.a as a1, t2.a as a2 +from t1 right join t2 on t1.a = t2.a and t1.a > 2; +a1 a2 +1 NULL +2 NULL +3 3 +NULL 2 +NULL 4 +deallocate prepare stmt2; +# Parameter in the WHERE clause. +prepare stmt3 from +'select t1.a, t2.a + from t1 full join t2 on t1.a = t2.a + where t1.a is not null or t2.a > ?'; +set @minval = 3; +execute stmt3 using @minval; +a a +1 NULL +2 2 +3 3 +NULL 4 +execute stmt3 using @minval; +a a +1 NULL +2 2 +3 3 +NULL 4 +deallocate prepare stmt3; +drop table t1, t2; +# ======================================================== +# Section 13: Stored procedures +# ======================================================== +create table t1 (a int, b varchar(20)); +insert into t1 values (1,'alpha'), (2,'beta'), (3,'gamma'); +create table t2 (a int, b varchar(20)); +insert into t2 values (2,'BETA'), (4,'DELTA'), (5,'EPSILON'); +# SP that performs a FULL JOIN. +create procedure sp_full_join() +begin +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +end| +# SP with a FULL JOIN and a parameter. +create procedure sp_full_join_param(in min_a int) +begin +select t1.a as a1, t2.a as a2 +from t1 full join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) >= min_a; +end| +# SP using FULL JOIN with INSERT ... SELECT. +create procedure sp_full_join_insert() +begin +create temporary table t3 (a1 int, b1 varchar(20), a2 int, b2 varchar(20)); +insert into t3 +select t1.a, t1.b, t2.a, t2.b +from t1 full join t2 on t1.a = t2.a; +select * from t3; +drop temporary table t3; +end| +call sp_full_join(); +a1 b1 a2 b2 +1 alpha NULL NULL +2 beta 2 BETA +3 gamma NULL NULL +NULL NULL 4 DELTA +NULL NULL 5 EPSILON +select * from t1 left join t2 on t1.a = t2.a union select * from t1 right join t2 on t1.a = t2.a; +a b a b +1 alpha NULL NULL +2 beta 2 BETA +3 gamma NULL NULL +NULL NULL 4 DELTA +NULL NULL 5 EPSILON +call sp_full_join_param(3); +a1 a2 +3 NULL +NULL 4 +NULL 5 +select t1.a as a1, t2.a as a2 +from t1 left join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) >= 3 +union +select t1.a as a1, t2.a as a2 +from t1 right join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) >= 3; +a1 a2 +3 NULL +NULL 4 +NULL 5 +# Call the SPs twice to test re-execution. +call sp_full_join(); +a1 b1 a2 b2 +1 alpha NULL NULL +2 beta 2 BETA +3 gamma NULL NULL +NULL NULL 4 DELTA +NULL NULL 5 EPSILON +call sp_full_join_param(1); +a1 a2 +1 NULL +2 2 +3 NULL +NULL 4 +NULL 5 +call sp_full_join_insert(); +a1 b1 a2 b2 +1 alpha NULL NULL +2 beta 2 BETA +3 gamma NULL NULL +NULL NULL 4 DELTA +NULL NULL 5 EPSILON +drop procedure sp_full_join; +drop procedure sp_full_join_param; +drop procedure sp_full_join_insert; +drop table t1, t2; +# ======================================================== +# Section 14: Subqueries and semijoins with FULL JOIN +# ======================================================== +create table t1 (a int, b int); +insert into t1 values (1,10), (2,20), (3,30); +create table t2 (a int, b int); +insert into t2 values (2,200), (3,300), (4,400); +create table t3 (a int); +insert into t3 values (1), (3), (5); +# IN subquery (semijoin) on the result of a FULL JOIN. +select * from t1 full join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) in (select a from t3); +a b a b +1 10 NULL NULL +3 30 3 300 +select * from t1 left join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) in (select a from t3) +union +select * from t1 right join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) in (select a from t3); +a b a b +1 10 NULL NULL +3 30 3 300 +# EXISTS subquery filtering a FULL JOIN. +select * from t1 full join t2 on t1.a = t2.a +where exists (select 1 from t3 where t3.a = coalesce(t1.a, t2.a)); +a b a b +1 10 NULL NULL +3 30 3 300 +select * from t1 left join t2 on t1.a = t2.a +where exists (select 1 from t3 where t3.a = coalesce(t1.a, t2.a)) +union +select * from t1 right join t2 on t1.a = t2.a +where exists (select 1 from t3 where t3.a = coalesce(t1.a, t2.a)); +a b a b +1 10 NULL NULL +3 30 3 300 +# NOT IN (anti-semijoin) with a FULL JOIN. +select * from t1 full join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) not in (select a from t3); +a b a b +2 20 2 200 +NULL NULL 4 400 +select * from t1 left join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) not in (select a from t3) +union +select * from t1 right join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) not in (select a from t3); +a b a b +2 20 2 200 +NULL NULL 4 400 +# FULL JOIN inside a subquery used as a semijoin predicate. +select * from t3 +where t3.a in ( +select coalesce(t1.a, t2.a) +from t1 full join t2 on t1.a = t2.a +); +a +1 +3 +select * from t3 +where t3.a in ( +select coalesce(t1.a, t2.a) +from t1 left join t2 on t1.a = t2.a +union +select coalesce(t1.a, t2.a) +from t1 right join t2 on t1.a = t2.a +); +a +1 +3 +# Correlated subquery with FULL JOIN. +select * from t3 +where exists ( +select 1 from t1 full join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) = t3.a +); +a +1 +3 +select * from t3 +where exists ( +select 1 from t1 left join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) = t3.a +union +select 1 from t1 right join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) = t3.a +); +a +1 +3 +drop table t1, t2, t3; +# ======================================================== +# Section 15: Indexed access (PK, secondary, composite, unique) +# +# Exercises JT_EQ_REF / JT_REF access paths on the right +# side of a FULL JOIN, including NULLable unique keys. +# ======================================================== +# Primary key join (JT_EQ_REF). +create table t1 (id int primary key, val varchar(10)); +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (id int primary key, val varchar(10)); +insert into t2 values (2,'x'), (3,'y'), (4,'z'); +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 full join t2 on t1.id = t2.id; +id1 v1 id2 v2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 left join t2 on t1.id = t2.id +union +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 right join t2 on t1.id = t2.id; +id1 v1 id2 v2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +# PK join with WHERE filter. +select t1.id as id1, t2.id as id2 +from t1 full join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) > 2; +id1 id2 +3 3 +NULL 4 +select t1.id as id1, t2.id as id2 +from t1 left join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) > 2 +union +select t1.id as id1, t2.id as id2 +from t1 right join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) > 2; +id1 id2 +3 3 +NULL 4 +# PK join with aggregate. +select count(*) from t1 full join t2 on t1.id = t2.id; +count(*) +4 +select count(*) from (select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 left join t2 on t1.id = t2.id +union +select t1.id, t1.val, t2.id, t2.val +from t1 right join t2 on t1.id = t2.id) dt; +count(*) +4 +drop table t1, t2; +# Secondary index join (JT_REF) with duplicates. +create table t1 (id int, grp int, val varchar(10), key(grp)); +insert into t1 values (1,10,'a'), (2,20,'b'), (3,20,'c'), (4,30,'d'); +create table t2 (id int, grp int, val varchar(10), key(grp)); +insert into t2 values (5,20,'x'), (6,30,'y'), (7,30,'z'), (8,40,'w'); +select t1.id as id1, t1.grp as g1, t2.id as id2, t2.grp as g2 +from t1 full join t2 on t1.grp = t2.grp; +id1 g1 id2 g2 +1 10 NULL NULL +2 20 5 20 +3 20 5 20 +4 30 6 30 +4 30 7 30 +NULL NULL 8 40 +select t1.id as id1, t1.grp as g1, t2.id as id2, t2.grp as g2 +from t1 left join t2 on t1.grp = t2.grp +union +select t1.id as id1, t1.grp as g1, t2.id as id2, t2.grp as g2 +from t1 right join t2 on t1.grp = t2.grp; +id1 g1 id2 g2 +1 10 NULL NULL +2 20 5 20 +3 20 5 20 +4 30 6 30 +4 30 7 30 +NULL NULL 8 40 +select count(*) from t1 full join t2 on t1.grp = t2.grp; +count(*) +6 +select count(*) from (select t1.id as id1, t1.grp as g1, t1.val as v1, +t2.id as id2, t2.grp as g2, t2.val as v2 +from t1 left join t2 on t1.grp = t2.grp +union +select t1.id, t1.grp, t1.val, t2.id, t2.grp, t2.val +from t1 right join t2 on t1.grp = t2.grp) dt; +count(*) +6 +drop table t1, t2; +# Composite index join. +create table t1 (a int, b int, val varchar(10), primary key(a, b)); +insert into t1 values (1,1,'p'), (1,2,'q'), (2,1,'r'); +create table t2 (a int, b int, val varchar(10), primary key(a, b)); +insert into t2 values (1,2,'s'), (2,1,'t'), (2,2,'u'); +select t1.a as a1, t1.b as b1, t1.val as v1, +t2.a as a2, t2.b as b2, t2.val as v2 +from t1 full join t2 on t1.a = t2.a and t1.b = t2.b; +a1 b1 v1 a2 b2 v2 +1 1 p NULL NULL NULL +1 2 q 1 2 s +2 1 r 2 1 t +NULL NULL NULL 2 2 u +select t1.a as a1, t1.b as b1, t1.val as v1, +t2.a as a2, t2.b as b2, t2.val as v2 +from t1 left join t2 on t1.a = t2.a and t1.b = t2.b +union +select t1.a as a1, t1.b as b1, t1.val as v1, +t2.a as a2, t2.b as b2, t2.val as v2 +from t1 right join t2 on t1.a = t2.a and t1.b = t2.b; +a1 b1 v1 a2 b2 v2 +1 1 p NULL NULL NULL +1 2 q 1 2 s +2 1 r 2 1 t +NULL NULL NULL 2 2 u +# Composite index: join on partial key (first column only). +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +a1 b1 a2 b2 +1 1 1 2 +1 2 1 2 +2 1 2 1 +2 1 2 2 +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 right join t2 on t1.a = t2.a; +a1 b1 a2 b2 +1 1 1 2 +1 2 1 2 +2 1 2 1 +2 1 2 2 +drop table t1, t2; +# UNIQUE index with NULLable column. +create table t1 (id int auto_increment primary key, val int, unique key(val)); +insert into t1 (val) values (1), (NULL), (3), (NULL); +create table t2 (id int auto_increment primary key, val int, unique key(val)); +insert into t2 (val) values (2), (3), (NULL), (NULL); +select t1.val as v1, t2.val as v2 +from t1 full join t2 on t1.val = t2.val; +v1 v2 +1 NULL +3 3 +NULL 2 +NULL NULL +NULL NULL +NULL NULL +NULL NULL +select t1.val as v1, t2.val as v2 +from t1 left join t2 on t1.val = t2.val +union +select t1.val as v1, t2.val as v2 +from t1 right join t2 on t1.val = t2.val; +v1 v2 +1 NULL +3 3 +NULL 2 +NULL NULL +drop table t1, t2; +# Vector index on the right side of FULL JOIN. The vector index +# does not affect FULL JOIN semantics, but the right table now +# carries an hlindex object; this confirms the rowid weedout +# and null-complement pass still work when it does. Three rows +# are expected: one left-only (1, NULL, NULL), one match +# (2, 2, [1,0]), one right-only (NULL, 3, [2,0]). +create table t1 (a int); +insert into t1 values (1), (2); +create table t2 (id int, v vector(2) not null, vector(v)); +insert into t2 values (2, vec_fromtext('[1,0]')), +(3, vec_fromtext('[2,0]')); +select t1.a as a1, t2.id as id2, vec_totext(t2.v) as v2 +from t1 full join t2 on t1.a = t2.id; +a1 id2 v2 +1 NULL NULL +2 2 [1,0] +NULL 3 [2,0] +select t1.a as a1, t2.id as id2, vec_totext(t2.v) as v2 +from t1 left join t2 on t1.a = t2.id +union +select t1.a as a1, t2.id as id2, vec_totext(t2.v) as v2 +from t1 right join t2 on t1.a = t2.id; +a1 id2 v2 +1 NULL NULL +2 2 [1,0] +NULL 3 [2,0] +drop table t1, t2; +# Vector indexes on both sides of FULL JOIN. Three rows are +# expected: one left-only (1, [1,0], NULL, NULL), one match +# (2, [2,0], 2, [3,0]), one right-only (NULL, NULL, 3, [4,0]). +create table t1 (id int, v vector(2) not null, vector(v)); +insert into t1 values (1, vec_fromtext('[1,0]')), +(2, vec_fromtext('[2,0]')); +create table t2 (id int, v vector(2) not null, vector(v)); +insert into t2 values (2, vec_fromtext('[3,0]')), +(3, vec_fromtext('[4,0]')); +select t1.id as id1, vec_totext(t1.v) as v1, +t2.id as id2, vec_totext(t2.v) as v2 +from t1 full join t2 on t1.id = t2.id; +id1 v1 id2 v2 +1 [1,0] NULL NULL +2 [2,0] 2 [3,0] +NULL NULL 3 [4,0] +select t1.id as id1, vec_totext(t1.v) as v1, +t2.id as id2, vec_totext(t2.v) as v2 +from t1 left join t2 on t1.id = t2.id +union +select t1.id as id1, vec_totext(t1.v) as v1, +t2.id as id2, vec_totext(t2.v) as v2 +from t1 right join t2 on t1.id = t2.id; +id1 v1 id2 v2 +1 [1,0] NULL NULL +2 [2,0] 2 [3,0] +NULL NULL 3 [4,0] +drop table t1, t2; +# There were rows missing because not all null-complements were +# generated. +create table t10 (a int, b int, index(a)); +create table t11 (a int, b int, index(a)); +insert into t10 select seq, seq from seq_1_to_10; +insert into t11 select seq*2, seq*2 from seq_1_to_10; +create table t20 (a varchar(100), b varchar(100), index(a)); +create table t21 (a varchar(100), b varchar(100), index(a)); +insert into t20 values('match','match'), ('no-match-t20', 'no-match-t20'); +insert into t21 values('match','match'), ('no-match-t21', 'no-match-t21'); +select * from (t10 full outer join t11 on t10.a=t11.a) , (t20 full outer join t21 on t20.a=t21.a); +a b a b a b a b +1 1 NULL NULL NULL NULL no-match-t21 no-match-t21 +1 1 NULL NULL match match match match +1 1 NULL NULL no-match-t20 no-match-t20 NULL NULL +10 10 10 10 NULL NULL no-match-t21 no-match-t21 +10 10 10 10 match match match match +10 10 10 10 no-match-t20 no-match-t20 NULL NULL +2 2 2 2 NULL NULL no-match-t21 no-match-t21 +2 2 2 2 match match match match +2 2 2 2 no-match-t20 no-match-t20 NULL NULL +3 3 NULL NULL NULL NULL no-match-t21 no-match-t21 +3 3 NULL NULL match match match match +3 3 NULL NULL no-match-t20 no-match-t20 NULL NULL +4 4 4 4 NULL NULL no-match-t21 no-match-t21 +4 4 4 4 match match match match +4 4 4 4 no-match-t20 no-match-t20 NULL NULL +5 5 NULL NULL NULL NULL no-match-t21 no-match-t21 +5 5 NULL NULL match match match match +5 5 NULL NULL no-match-t20 no-match-t20 NULL NULL +6 6 6 6 NULL NULL no-match-t21 no-match-t21 +6 6 6 6 match match match match +6 6 6 6 no-match-t20 no-match-t20 NULL NULL +7 7 NULL NULL NULL NULL no-match-t21 no-match-t21 +7 7 NULL NULL match match match match +7 7 NULL NULL no-match-t20 no-match-t20 NULL NULL +8 8 8 8 NULL NULL no-match-t21 no-match-t21 +8 8 8 8 match match match match +8 8 8 8 no-match-t20 no-match-t20 NULL NULL +9 9 NULL NULL NULL NULL no-match-t21 no-match-t21 +9 9 NULL NULL match match match match +9 9 NULL NULL no-match-t20 no-match-t20 NULL NULL +NULL NULL 12 12 NULL NULL no-match-t21 no-match-t21 +NULL NULL 12 12 match match match match +NULL NULL 12 12 no-match-t20 no-match-t20 NULL NULL +NULL NULL 14 14 NULL NULL no-match-t21 no-match-t21 +NULL NULL 14 14 match match match match +NULL NULL 14 14 no-match-t20 no-match-t20 NULL NULL +NULL NULL 16 16 NULL NULL no-match-t21 no-match-t21 +NULL NULL 16 16 match match match match +NULL NULL 16 16 no-match-t20 no-match-t20 NULL NULL +NULL NULL 18 18 NULL NULL no-match-t21 no-match-t21 +NULL NULL 18 18 match match match match +NULL NULL 18 18 no-match-t20 no-match-t20 NULL NULL +NULL NULL 20 20 NULL NULL no-match-t21 no-match-t21 +NULL NULL 20 20 match match match match +NULL NULL 20 20 no-match-t20 no-match-t20 NULL NULL +# Join order under straight_join with FULL JOIN +create table two (c int); +insert into two values (1),(2); +explain select * from two, (t10 full outer join t11 on t10.a=t11.a); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE two ALL NULL NULL NULL NULL 2 +1 SIMPLE t10 ALL NULL NULL NULL NULL 10 +1 SIMPLE t11 ref a a 5 test.t10.a 1 Using where +select * from two, (t10 full outer join t11 on t10.a=t11.a); +c a b a b +1 1 1 NULL NULL +1 10 10 10 10 +1 2 2 2 2 +1 3 3 NULL NULL +1 4 4 4 4 +1 5 5 NULL NULL +1 6 6 6 6 +1 7 7 NULL NULL +1 8 8 8 8 +1 9 9 NULL NULL +1 NULL NULL 12 12 +1 NULL NULL 14 14 +1 NULL NULL 16 16 +1 NULL NULL 18 18 +1 NULL NULL 20 20 +2 1 1 NULL NULL +2 10 10 10 10 +2 2 2 2 2 +2 3 3 NULL NULL +2 4 4 4 4 +2 5 5 NULL NULL +2 6 6 6 6 +2 7 7 NULL NULL +2 8 8 8 8 +2 9 9 NULL NULL +2 NULL NULL 12 12 +2 NULL NULL 14 14 +2 NULL NULL 16 16 +2 NULL NULL 18 18 +2 NULL NULL 20 20 +explain select * from (t10 full outer join t11 on t10.a=t11.a), two; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE two ALL NULL NULL NULL NULL 2 +1 SIMPLE t10 ALL NULL NULL NULL NULL 10 +1 SIMPLE t11 ref a a 5 test.t10.a 1 Using where +select * from (t10 full outer join t11 on t10.a=t11.a), two; +a b a b c +1 1 NULL NULL 1 +1 1 NULL NULL 2 +10 10 10 10 1 +10 10 10 10 2 +2 2 2 2 1 +2 2 2 2 2 +3 3 NULL NULL 1 +3 3 NULL NULL 2 +4 4 4 4 1 +4 4 4 4 2 +5 5 NULL NULL 1 +5 5 NULL NULL 2 +6 6 6 6 1 +6 6 6 6 2 +7 7 NULL NULL 1 +7 7 NULL NULL 2 +8 8 8 8 1 +8 8 8 8 2 +9 9 NULL NULL 1 +9 9 NULL NULL 2 +NULL NULL 12 12 1 +NULL NULL 12 12 2 +NULL NULL 14 14 1 +NULL NULL 14 14 2 +NULL NULL 16 16 1 +NULL NULL 16 16 2 +NULL NULL 18 18 1 +NULL NULL 18 18 2 +NULL NULL 20 20 1 +NULL NULL 20 20 2 +explain select straight_join * from two, (t10 full outer join t11 on t10.a=t11.a); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE two ALL NULL NULL NULL NULL 2 +1 SIMPLE t10 ALL NULL NULL NULL NULL 10 +1 SIMPLE t11 ref a a 5 test.t10.a 1 Using where +select straight_join * from two, (t10 full outer join t11 on t10.a=t11.a); +c a b a b +1 1 1 NULL NULL +1 10 10 10 10 +1 2 2 2 2 +1 3 3 NULL NULL +1 4 4 4 4 +1 5 5 NULL NULL +1 6 6 6 6 +1 7 7 NULL NULL +1 8 8 8 8 +1 9 9 NULL NULL +1 NULL NULL 12 12 +1 NULL NULL 14 14 +1 NULL NULL 16 16 +1 NULL NULL 18 18 +1 NULL NULL 20 20 +2 1 1 NULL NULL +2 10 10 10 10 +2 2 2 2 2 +2 3 3 NULL NULL +2 4 4 4 4 +2 5 5 NULL NULL +2 6 6 6 6 +2 7 7 NULL NULL +2 8 8 8 8 +2 9 9 NULL NULL +2 NULL NULL 12 12 +2 NULL NULL 14 14 +2 NULL NULL 16 16 +2 NULL NULL 18 18 +2 NULL NULL 20 20 +explain select straight_join * from (t10 full outer join t11 on t10.a=t11.a), two; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL NULL NULL NULL NULL 10 +1 SIMPLE t11 ref a a 5 test.t10.a 1 Using where +1 SIMPLE two ALL NULL NULL NULL NULL 2 Using join buffer (flat, BNL join) +select straight_join * from (t10 full outer join t11 on t10.a=t11.a), two; +a b a b c +1 1 NULL NULL 1 +1 1 NULL NULL 2 +10 10 10 10 1 +10 10 10 10 2 +2 2 2 2 1 +2 2 2 2 2 +3 3 NULL NULL 1 +3 3 NULL NULL 2 +4 4 4 4 1 +4 4 4 4 2 +5 5 NULL NULL 1 +5 5 NULL NULL 2 +6 6 6 6 1 +6 6 6 6 2 +7 7 NULL NULL 1 +7 7 NULL NULL 2 +8 8 8 8 1 +8 8 8 8 2 +9 9 NULL NULL 1 +9 9 NULL NULL 2 +NULL NULL 12 12 1 +NULL NULL 12 12 2 +NULL NULL 14 14 1 +NULL NULL 14 14 2 +NULL NULL 16 16 1 +NULL NULL 16 16 2 +NULL NULL 18 18 1 +NULL NULL 18 18 2 +NULL NULL 20 20 1 +NULL NULL 20 20 2 +# FULL JOIN tables must be contiguous but no longer must appear +# at the start of the join order. +create table ten(a int primary key); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table one_k(a int primary key); +insert into one_k select a.a + b.a* 10 + c.a * 100 from ten a, ten b, ten c; +create table t1 ( +a int, +b int +); +create table t2 ( +a int, +b int +); +insert into t1 select a, a from one_k where a between 1 and 100; +insert into t2 select a, a from one_k where a between 95 and 195; +select * from t1 full outer join t2 on (t1.a=t2.a and t1.b>90 and t2.b<110); +a b a b +1 1 NULL NULL +10 10 NULL NULL +100 100 100 100 +11 11 NULL NULL +12 12 NULL NULL +13 13 NULL NULL +14 14 NULL NULL +15 15 NULL NULL +16 16 NULL NULL +17 17 NULL NULL +18 18 NULL NULL +19 19 NULL NULL +2 2 NULL NULL +20 20 NULL NULL +21 21 NULL NULL +22 22 NULL NULL +23 23 NULL NULL +24 24 NULL NULL +25 25 NULL NULL +26 26 NULL NULL +27 27 NULL NULL +28 28 NULL NULL +29 29 NULL NULL +3 3 NULL NULL +30 30 NULL NULL +31 31 NULL NULL +32 32 NULL NULL +33 33 NULL NULL +34 34 NULL NULL +35 35 NULL NULL +36 36 NULL NULL +37 37 NULL NULL +38 38 NULL NULL +39 39 NULL NULL +4 4 NULL NULL +40 40 NULL NULL +41 41 NULL NULL +42 42 NULL NULL +43 43 NULL NULL +44 44 NULL NULL +45 45 NULL NULL +46 46 NULL NULL +47 47 NULL NULL +48 48 NULL NULL +49 49 NULL NULL +5 5 NULL NULL +50 50 NULL NULL +51 51 NULL NULL +52 52 NULL NULL +53 53 NULL NULL +54 54 NULL NULL +55 55 NULL NULL +56 56 NULL NULL +57 57 NULL NULL +58 58 NULL NULL +59 59 NULL NULL +6 6 NULL NULL +60 60 NULL NULL +61 61 NULL NULL +62 62 NULL NULL +63 63 NULL NULL +64 64 NULL NULL +65 65 NULL NULL +66 66 NULL NULL +67 67 NULL NULL +68 68 NULL NULL +69 69 NULL NULL +7 7 NULL NULL +70 70 NULL NULL +71 71 NULL NULL +72 72 NULL NULL +73 73 NULL NULL +74 74 NULL NULL +75 75 NULL NULL +76 76 NULL NULL +77 77 NULL NULL +78 78 NULL NULL +79 79 NULL NULL +8 8 NULL NULL +80 80 NULL NULL +81 81 NULL NULL +82 82 NULL NULL +83 83 NULL NULL +84 84 NULL NULL +85 85 NULL NULL +86 86 NULL NULL +87 87 NULL NULL +88 88 NULL NULL +89 89 NULL NULL +9 9 NULL NULL +90 90 NULL NULL +91 91 NULL NULL +92 92 NULL NULL +93 93 NULL NULL +94 94 NULL NULL +95 95 95 95 +96 96 96 96 +97 97 97 97 +98 98 98 98 +99 99 99 99 +NULL NULL 101 101 +NULL NULL 102 102 +NULL NULL 103 103 +NULL NULL 104 104 +NULL NULL 105 105 +NULL NULL 106 106 +NULL NULL 107 107 +NULL NULL 108 108 +NULL NULL 109 109 +NULL NULL 110 110 +NULL NULL 111 111 +NULL NULL 112 112 +NULL NULL 113 113 +NULL NULL 114 114 +NULL NULL 115 115 +NULL NULL 116 116 +NULL NULL 117 117 +NULL NULL 118 118 +NULL NULL 119 119 +NULL NULL 120 120 +NULL NULL 121 121 +NULL NULL 122 122 +NULL NULL 123 123 +NULL NULL 124 124 +NULL NULL 125 125 +NULL NULL 126 126 +NULL NULL 127 127 +NULL NULL 128 128 +NULL NULL 129 129 +NULL NULL 130 130 +NULL NULL 131 131 +NULL NULL 132 132 +NULL NULL 133 133 +NULL NULL 134 134 +NULL NULL 135 135 +NULL NULL 136 136 +NULL NULL 137 137 +NULL NULL 138 138 +NULL NULL 139 139 +NULL NULL 140 140 +NULL NULL 141 141 +NULL NULL 142 142 +NULL NULL 143 143 +NULL NULL 144 144 +NULL NULL 145 145 +NULL NULL 146 146 +NULL NULL 147 147 +NULL NULL 148 148 +NULL NULL 149 149 +NULL NULL 150 150 +NULL NULL 151 151 +NULL NULL 152 152 +NULL NULL 153 153 +NULL NULL 154 154 +NULL NULL 155 155 +NULL NULL 156 156 +NULL NULL 157 157 +NULL NULL 158 158 +NULL NULL 159 159 +NULL NULL 160 160 +NULL NULL 161 161 +NULL NULL 162 162 +NULL NULL 163 163 +NULL NULL 164 164 +NULL NULL 165 165 +NULL NULL 166 166 +NULL NULL 167 167 +NULL NULL 168 168 +NULL NULL 169 169 +NULL NULL 170 170 +NULL NULL 171 171 +NULL NULL 172 172 +NULL NULL 173 173 +NULL NULL 174 174 +NULL NULL 175 175 +NULL NULL 176 176 +NULL NULL 177 177 +NULL NULL 178 178 +NULL NULL 179 179 +NULL NULL 180 180 +NULL NULL 181 181 +NULL NULL 182 182 +NULL NULL 183 183 +NULL NULL 184 184 +NULL NULL 185 185 +NULL NULL 186 186 +NULL NULL 187 187 +NULL NULL 188 188 +NULL NULL 189 189 +NULL NULL 190 190 +NULL NULL 191 191 +NULL NULL 192 192 +NULL NULL 193 193 +NULL NULL 194 194 +NULL NULL 195 195 +drop table t10, t11, t20, t21, two, ten, one_k, t1, t2; +# ======================================================== +# Section 16: Storage engines (MyISAM, Aria, mixed) +# +# The null-complement rescan must work regardless of +# underlying storage engine and across mixed-engine joins +# (different rowid formats in the weedout temp table). +# ======================================================== +# Both sides MyISAM. +create table t1 (a int, b varchar(10)) engine=MyISAM; +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (a int, b varchar(10)) engine=MyISAM; +insert into t2 values (2,'x'), (3,'y'), (4,'z'); +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +a1 b1 a2 b2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 right join t2 on t1.a = t2.a; +a1 b1 a2 b2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +select count(*) from t1 full join t2 on t1.a = t2.a; +count(*) +4 +select count(*) from (select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a, t1.b, t2.a, t2.b +from t1 right join t2 on t1.a = t2.a) dt; +count(*) +4 +drop table t1, t2; +# Both sides Aria. +create table t1 (a int, b varchar(10)) engine=Aria; +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (a int, b varchar(10)) engine=Aria; +insert into t2 values (2,'x'), (3,'y'), (4,'z'); +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +a1 b1 a2 b2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 right join t2 on t1.a = t2.a; +a1 b1 a2 b2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +select count(*) from t1 full join t2 on t1.a = t2.a; +count(*) +4 +select count(*) from (select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a, t1.b, t2.a, t2.b +from t1 right join t2 on t1.a = t2.a) dt; +count(*) +4 +drop table t1, t2; +# Mixed engines: InnoDB and MyISAM. +create table t1 (a int, b varchar(10)) engine=InnoDB; +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (a int, b varchar(10)) engine=MyISAM; +insert into t2 values (2,'x'), (3,'y'), (4,'z'); +# InnoDB on left, MyISAM on right. +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +a1 b1 a2 b2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 right join t2 on t1.a = t2.a; +a1 b1 a2 b2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +# MyISAM on left, InnoDB on right. +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 full join t1 on t2.a = t1.a; +a1 b1 a2 b2 +2 x 2 b +3 y 3 c +4 z NULL NULL +NULL NULL 1 a +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 left join t1 on t2.a = t1.a +union +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 right join t1 on t2.a = t1.a; +a1 b1 a2 b2 +2 x 2 b +3 y 3 c +4 z NULL NULL +NULL NULL 1 a +drop table t1, t2; +# Mixed engines: InnoDB and Aria. +create table t1 (a int, b varchar(10)) engine=InnoDB; +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (a int, b varchar(10)) engine=Aria; +insert into t2 values (2,'x'), (3,'y'), (4,'z'); +# InnoDB on left, Aria on right. +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +a1 b1 a2 b2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 right join t2 on t1.a = t2.a; +a1 b1 a2 b2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +# Aria on left, InnoDB on right. +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 full join t1 on t2.a = t1.a; +a1 b1 a2 b2 +2 x 2 b +3 y 3 c +4 z NULL NULL +NULL NULL 1 a +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 left join t1 on t2.a = t1.a +union +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 right join t1 on t2.a = t1.a; +a1 b1 a2 b2 +2 x 2 b +3 y 3 c +4 z NULL NULL +NULL NULL 1 a +drop table t1, t2; +# Three-way mixed engine FULL JOIN. +# No UNION companion: see the note above the chained-FULL-JOIN +# section — the LEFT/RIGHT permutation UNION over-approximates +# for chained FULL JOINs. +create table t1 (a int) engine=InnoDB; +insert into t1 values (1), (2), (3); +create table t2 (a int) engine=MyISAM; +insert into t2 values (2), (3), (4); +create table t3 (a int) engine=Aria; +insert into t3 values (3), (4), (5); +select t1.a as a1, t2.a as a2, t3.a as a3 +from t1 +full join t2 on t1.a = t2.a +full join t3 on t2.a = t3.a; +a1 a2 a3 +1 NULL NULL +2 2 NULL +3 3 3 +NULL 4 4 +NULL NULL 5 +drop table t1, t2, t3; +# Indexed mixed-engine FULL JOIN chain. +create table t1 (id int primary key, val varchar(10)) engine=InnoDB; +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (id int primary key, val varchar(10)) engine=MyISAM; +insert into t2 values (2,'x'), (3,'y'), (4,'z'); +create table t3 (id int primary key, val varchar(10)) engine=Aria; +insert into t3 values (3,'p'), (4,'q'), (5,'r'); +# InnoDB PK full join MyISAM PK. +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 full join t2 on t1.id = t2.id; +id1 v1 id2 v2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 left join t2 on t1.id = t2.id +union +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 right join t2 on t1.id = t2.id; +id1 v1 id2 v2 +1 a NULL NULL +2 b 2 x +3 c 3 y +NULL NULL 4 z +# MyISAM PK full join Aria PK. +select t2.id as id1, t2.val as v1, t3.id as id2, t3.val as v2 +from t2 full join t3 on t2.id = t3.id; +id1 v1 id2 v2 +2 x NULL NULL +3 y 3 p +4 z 4 q +NULL NULL 5 r +select t2.id as id1, t2.val as v1, t3.id as id2, t3.val as v2 +from t2 left join t3 on t2.id = t3.id +union +select t2.id as id1, t2.val as v1, t3.id as id2, t3.val as v2 +from t2 right join t3 on t2.id = t3.id; +id1 v1 id2 v2 +2 x NULL NULL +3 y 3 p +4 z 4 q +NULL NULL 5 r +# Three-way: InnoDB PK, MyISAM PK, Aria PK. +# No UNION companion: see the note above the chained-FULL-JOIN +# section — the LEFT/RIGHT permutation UNION over-approximates +# for chained FULL JOINs. +select t1.id as id1, t2.id as id2, t3.id as id3 +from t1 +full join t2 on t1.id = t2.id +full join t3 on t2.id = t3.id; +id1 id2 id3 +1 NULL NULL +2 2 NULL +3 3 3 +NULL 4 4 +NULL NULL 5 +# Indexed mixed-engine with WHERE filter. +select t1.id as id1, t2.id as id2 +from t1 full join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) between 2 and 3; +id1 id2 +2 2 +3 3 +select t1.id as id1, t2.id as id2 +from t1 left join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) between 2 and 3 +union +select t1.id as id1, t2.id as id2 +from t1 right join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) between 2 and 3; +id1 id2 +2 2 +3 3 +# Indexed mixed-engine with aggregate. +select count(*) from t1 full join t2 on t1.id = t2.id; +count(*) +4 +select count(*) from (select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 left join t2 on t1.id = t2.id +union +select t1.id, t1.val, t2.id, t2.val +from t1 right join t2 on t1.id = t2.id) dt; +count(*) +4 +drop table t1, t2, t3; +# ======================================================== +# Section 17: Complex feature combinations +# +# Queries that stress FULL JOIN alongside other features at +# the same time: semi-joins with ranges, FULL-JOIN-in-FULL- +# JOIN inside a semi-join, CTEs combined with window +# functions, and HAVING over aggregates with FULL JOIN. +# ======================================================== +create table t3o (x int); +insert into t3o values (1), (2), (3), (4), (5), (6), (7); +create table t3l (k int, tag varchar(8)); +insert into t3l values (1,'L1'), (2,'L2'), (3,'L3'), (4,'L4'); +create table t3r (k int, tag varchar(8)); +insert into t3r values (3,'R3'), (4,'R4'), (5,'R5'), (6,'R6'); +create table t3m (k int, tag varchar(8)); +insert into t3m values (2,'M2'), (5,'M5'), (7,'M7'); +# 17.1 FULL JOIN inside a materialized semijoin. +# Exercises get_allowed_nj_tables() with emb_sjm_nest set: +# the FULL JOIN adjacency check must apply inside the SJM +# nest so that siblings cannot interleave between partners. +set @save_optimizer_switch= @@optimizer_switch; +set optimizer_switch='materialization=on,semijoin=on'; +select * from t3o +where t3o.x in ( +select coalesce(t3l.k, t3r.k) from t3l full join t3r on t3l.k = t3r.k +); +x +1 +2 +3 +4 +5 +6 +select * from t3o +where t3o.x in ( +select coalesce(t3l.k, t3r.k) from t3l left join t3r on t3l.k = t3r.k +union +select coalesce(t3l.k, t3r.k) from t3l right join t3r on t3l.k = t3r.k +); +x +1 +2 +3 +4 +5 +6 +# 17.2 FULL JOIN in semijoin with a range predicate on the +# FULL JOIN's coalesced key. +select * from t3o +where t3o.x in ( +select coalesce(t3l.k, t3r.k) c +from t3l full join t3r on t3l.k = t3r.k +where coalesce(t3l.k, t3r.k) between 2 and 5 +); +x +2 +3 +4 +5 +select * from t3o +where t3o.x in ( +select coalesce(t3l.k, t3r.k) c +from t3l left join t3r on t3l.k = t3r.k +where coalesce(t3l.k, t3r.k) between 2 and 5 +union +select coalesce(t3l.k, t3r.k) c +from t3l right join t3r on t3l.k = t3r.k +where coalesce(t3l.k, t3r.k) between 2 and 5 +); +x +2 +3 +4 +5 +# 17.3 Nested FULL JOINs (a FULL JOIN of a FULL JOIN) inside +# a semijoin. +select * from t3o +where t3o.x in ( +select coalesce(coalesce(t3l.k, t3r.k), t3m.k) +from (t3l full join t3r on t3l.k = t3r.k) +full join t3m on coalesce(t3l.k, t3r.k) = t3m.k +); +x +1 +2 +3 +4 +5 +6 +7 +select * from t3o +where t3o.x in ( +select coalesce(coalesce(t3l.k, t3r.k), t3m.k) +from (t3l left join t3r on t3l.k = t3r.k) +left join t3m on coalesce(t3l.k, t3r.k) = t3m.k +union +select coalesce(coalesce(t3l.k, t3r.k), t3m.k) +from (t3l right join t3r on t3l.k = t3r.k) +left join t3m on coalesce(t3l.k, t3r.k) = t3m.k +union +select coalesce(coalesce(t3l.k, t3r.k), t3m.k) +from (t3l left join t3r on t3l.k = t3r.k) +right join t3m on coalesce(t3l.k, t3r.k) = t3m.k +union +select coalesce(coalesce(t3l.k, t3r.k), t3m.k) +from (t3l right join t3r on t3l.k = t3r.k) +right join t3m on coalesce(t3l.k, t3r.k) = t3m.k +); +x +1 +2 +3 +4 +5 +6 +7 +set optimizer_switch= @save_optimizer_switch; +# 17.4 CTE that produces a FULL JOIN result, consumed by a +# window function in the outer query. +with fj as ( +select coalesce(t3l.k, t3r.k) as k, +t3l.tag as ltag, +t3r.tag as rtag +from t3l full join t3r on t3l.k = t3r.k +) +select k, ltag, rtag, +row_number() over (order by k) as rn, +count(*) over (order by k rows between unbounded preceding and current row) as running_cnt +from fj; +k ltag rtag rn running_cnt +1 L1 NULL 1 1 +2 L2 NULL 2 2 +3 L3 R3 3 3 +4 L4 R4 4 4 +5 NULL R5 5 5 +6 NULL R6 6 6 +with fj as ( +select coalesce(t3l.k, t3r.k) as k, t3l.tag as ltag, t3r.tag as rtag +from t3l left join t3r on t3l.k = t3r.k +union +select coalesce(t3l.k, t3r.k), t3l.tag, t3r.tag +from t3l right join t3r on t3l.k = t3r.k +) +select k, ltag, rtag, +row_number() over (order by k) as rn, +count(*) over (order by k rows between unbounded preceding and current row) as running_cnt +from fj; +k ltag rtag rn running_cnt +1 L1 NULL 1 1 +2 L2 NULL 2 2 +3 L3 R3 3 3 +4 L4 R4 4 4 +5 NULL R5 5 5 +6 NULL R6 6 6 +# 17.5 FULL JOIN + HAVING + ORDER BY + aggregate window. +select coalesce(t3l.k, t3r.k) as k, +count(*) as cnt, +rank() over (order by count(*) desc) as rk +from t3l full join t3r on t3l.k = t3r.k +group by coalesce(t3l.k, t3r.k) +having count(*) >= 1 +order by rk, k; +k cnt rk +1 1 1 +2 1 1 +3 1 1 +4 1 1 +5 1 1 +6 1 1 +select coalesce(t3lk, t3rk) as k, +count(*) as cnt, +rank() over (order by count(*) desc) as rk +from (select t3l.k as t3lk, t3l.tag as ltag, t3r.k as t3rk, t3r.tag as rtag +from t3l left join t3r on t3l.k = t3r.k +union +select t3l.k, t3l.tag, t3r.k, t3r.tag +from t3l right join t3r on t3l.k = t3r.k) dt +group by coalesce(t3lk, t3rk) +having count(*) >= 1 +order by rk, k; +k cnt rk +1 1 1 +2 1 1 +3 1 1 +4 1 1 +5 1 1 +6 1 1 +# 17.6 FULL JOIN + EXISTS subquery that itself contains a +# FULL JOIN, all filtered by a range condition. +select coalesce(t3l.k, t3r.k) as k, t3l.tag as ltag, t3r.tag as rtag +from t3l full join t3r on t3l.k = t3r.k +where coalesce(t3l.k, t3r.k) between 1 and 5 +and exists ( +select 1 from t3l l2 full join t3m on l2.k = t3m.k +where coalesce(l2.k, t3m.k) = coalesce(t3l.k, t3r.k) +); +k ltag rtag +1 L1 NULL +2 L2 NULL +3 L3 R3 +4 L4 R4 +5 NULL R5 +select coalesce(t3l.k, t3r.k) as k, t3l.tag as ltag, t3r.tag as rtag +from t3l full join t3r on t3l.k = t3r.k +where coalesce(t3l.k, t3r.k) between 1 and 5 +and coalesce(t3l.k, t3r.k) in ( +select coalesce(l2.k, t3m.k) from t3l l2 left join t3m on l2.k = t3m.k +union +select coalesce(l2.k, t3m.k) from t3l l2 right join t3m on l2.k = t3m.k +); +k ltag rtag +1 L1 NULL +2 L2 NULL +3 L3 R3 +4 L4 R4 +5 NULL R5 +# STRAIGHT_JOIN combined with FULL JOIN. Each FULL JOIN pair +# must stay contiguous in the join order so its null-complement +# rescan can fire at the right partner with the LEFT JOIN pass +# complete, but non FULL JOIN tables may appear before, after, +# or between distinct FULL JOIN pairs. STRAIGHT_JOIN cannot +# violate per-pair contiguity because the SQL grammar puts each +# pair's L and R syntactically adjacent in the FROM clause. +# FULL JOIN tables first. +explain +select straight_join coalesce(t3l.k, t3r.k) as k, t3o.x +from t3l full join t3r on t3l.k = t3r.k, t3o +where t3o.x between 3 and 4; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3l ALL NULL NULL NULL NULL 4 +1 SIMPLE t3r ALL NULL NULL NULL NULL 4 Using where +1 SIMPLE t3o ALL NULL NULL NULL NULL 7 Using where; Using join buffer (flat, BNL join) +select straight_join coalesce(t3l.k, t3r.k) as k, t3o.x +from t3l full join t3r on t3l.k = t3r.k, t3o +where t3o.x between 3 and 4; +k x +1 3 +1 4 +2 3 +2 4 +3 3 +3 4 +4 3 +4 4 +5 3 +5 4 +6 3 +6 4 +select coalesce(t3l.k, t3r.k) as k, t3o.x +from t3l left join t3r on t3l.k = t3r.k, t3o +where t3o.x between 3 and 4 +union +select coalesce(t3l.k, t3r.k) as k, t3o.x +from t3l right join t3r on t3l.k = t3r.k, t3o +where t3o.x between 3 and 4; +k x +1 3 +1 4 +2 3 +2 4 +3 3 +3 4 +4 3 +4 4 +5 3 +5 4 +6 3 +6 4 +# Non FULL JOIN table first. +explain +select straight_join coalesce(t3l.k, t3r.k) as k, t3o.x +from t3o, t3l full join t3r on t3l.k = t3r.k +where t3o.x between 3 and 4; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3o ALL NULL NULL NULL NULL 7 Using where +1 SIMPLE t3l ALL NULL NULL NULL NULL 4 +1 SIMPLE t3r ALL NULL NULL NULL NULL 4 Using where +select straight_join coalesce(t3l.k, t3r.k) as k, t3o.x +from t3o, t3l full join t3r on t3l.k = t3r.k +where t3o.x between 3 and 4; +k x +1 3 +1 4 +2 3 +2 4 +3 3 +3 4 +4 3 +4 4 +5 3 +5 4 +6 3 +6 4 +select coalesce(t3l.k, t3r.k) as k, t3o.x +from t3o, t3l left join t3r on t3l.k = t3r.k +where t3o.x between 3 and 4 +union +select coalesce(t3l.k, t3r.k) as k, t3o.x +from t3o, t3l right join t3r on t3l.k = t3r.k +where t3o.x between 3 and 4; +k x +1 3 +1 4 +2 3 +2 4 +3 3 +3 4 +4 3 +4 4 +5 3 +5 4 +6 3 +6 4 +# Two distinct FULL JOIN pairs with a non FULL JOIN table +# between them, under STRAIGHT_JOIN. Each pair is internally +# contiguous so STRAIGHT_JOIN is honored and the result matches +# the cross of the two pairs' LEFT/RIGHT UNION oracle. +create table t3n (k int); +insert into t3n values (1), (3), (5), (7); +explain +select straight_join +coalesce(t3l.k, t3r.k) as klr, t3o.x, +coalesce(t3m.k, t3n.k) as kmn +from (t3l full join t3r on t3l.k = t3r.k), +t3o, +(t3m full join t3n on t3m.k = t3n.k) +where t3o.x between 3 and 4; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3l ALL NULL NULL NULL NULL 4 +1 SIMPLE t3r ALL NULL NULL NULL NULL 4 Using where +1 SIMPLE t3o ALL NULL NULL NULL NULL 7 Using where; Using join buffer (flat, BNL join) +1 SIMPLE t3m ALL NULL NULL NULL NULL 3 +1 SIMPLE t3n ALL NULL NULL NULL NULL 4 Using where +select straight_join +coalesce(t3l.k, t3r.k) as klr, t3o.x, +coalesce(t3m.k, t3n.k) as kmn +from (t3l full join t3r on t3l.k = t3r.k), +t3o, +(t3m full join t3n on t3m.k = t3n.k) +where t3o.x between 3 and 4; +klr x kmn +1 3 1 +1 3 2 +1 3 3 +1 3 5 +1 3 7 +1 4 1 +1 4 2 +1 4 3 +1 4 5 +1 4 7 +2 3 1 +2 3 2 +2 3 3 +2 3 5 +2 3 7 +2 4 1 +2 4 2 +2 4 3 +2 4 5 +2 4 7 +3 3 1 +3 3 2 +3 3 3 +3 3 5 +3 3 7 +3 4 1 +3 4 2 +3 4 3 +3 4 5 +3 4 7 +4 3 1 +4 3 2 +4 3 3 +4 3 5 +4 3 7 +4 4 1 +4 4 2 +4 4 3 +4 4 5 +4 4 7 +5 3 1 +5 3 2 +5 3 3 +5 3 5 +5 3 7 +5 4 1 +5 4 2 +5 4 3 +5 4 5 +5 4 7 +6 3 1 +6 3 2 +6 3 3 +6 3 5 +6 3 7 +6 4 1 +6 4 2 +6 4 3 +6 4 5 +6 4 7 +select lr.klr, t3o.x, mn.kmn +from (select coalesce(t3l.k, t3r.k) as klr +from t3l left join t3r on t3l.k = t3r.k +union +select coalesce(t3l.k, t3r.k) +from t3l right join t3r on t3l.k = t3r.k) lr, +t3o, +(select coalesce(t3m.k, t3n.k) as kmn +from t3m left join t3n on t3m.k = t3n.k +union +select coalesce(t3m.k, t3n.k) +from t3m right join t3n on t3m.k = t3n.k) mn +where t3o.x between 3 and 4; +klr x kmn +1 3 1 +1 3 2 +1 3 3 +1 3 5 +1 3 7 +1 4 1 +1 4 2 +1 4 3 +1 4 5 +1 4 7 +2 3 1 +2 3 2 +2 3 3 +2 3 5 +2 3 7 +2 4 1 +2 4 2 +2 4 3 +2 4 5 +2 4 7 +3 3 1 +3 3 2 +3 3 3 +3 3 5 +3 3 7 +3 4 1 +3 4 2 +3 4 3 +3 4 5 +3 4 7 +4 3 1 +4 3 2 +4 3 3 +4 3 5 +4 3 7 +4 4 1 +4 4 2 +4 4 3 +4 4 5 +4 4 7 +5 3 1 +5 3 2 +5 3 3 +5 3 5 +5 3 7 +5 4 1 +5 4 2 +5 4 3 +5 4 5 +5 4 7 +6 3 1 +6 3 2 +6 3 3 +6 3 5 +6 3 7 +6 4 1 +6 4 2 +6 4 3 +6 4 5 +6 4 7 +drop table t3n; +drop table t3o, t3l, t3r, t3m; +# ======================================================== +# Section 18: Regressions +# +# Specific bugs that were fixed; kept as targeted cases so +# they don't silently regress. +# ======================================================== +# FULL JOIN with GROUP BY: previously crashed on the +# create_sort_index assertion (filesort_result != 0) when +# AGGR_OP::end_send was called twice. +create table t1 (grp char(1), val int); +insert into t1 values ('a',10), ('a',20), ('b',30), ('c',40); +create table t2 (grp char(1), val int); +insert into t2 values ('b',100), ('c',200), ('c',300), ('d',400); +select coalesce(t1.grp, t2.grp) as grp, +count(*) as cnt, +sum(t1.val) as s1, +sum(t2.val) as s2 +from t1 full join t2 on t1.grp = t2.grp +group by coalesce(t1.grp, t2.grp); +grp cnt s1 s2 +a 2 30 NULL +b 1 30 100 +c 2 80 500 +d 1 NULL 400 +select coalesce(dt.grp1, dt.grp2) as grp, +count(*) as cnt, +sum(dt.val1) as s1, +sum(dt.val2) as s2 +from (select t1.grp as grp1, t1.val as val1, t2.grp as grp2, t2.val as val2 +from t1 left join t2 on t1.grp = t2.grp +union +select t1.grp, t1.val, t2.grp, t2.val +from t1 right join t2 on t1.grp = t2.grp) dt +group by coalesce(dt.grp1, dt.grp2); +grp cnt s1 s2 +a 2 30 NULL +b 1 30 100 +c 2 80 500 +d 1 NULL 400 +drop table t1, t2; +# simplify_joins: do not flatten an FULL JOIN nest when +# it is not at the top level. +# +# simplify_joins moves ON into WHERE and clears on_expr on the +# nest, which would normally make the nest eligible for flattening. +# Flattening here would expose t1 and t2 at top level next to +# t3, allow the optimizer pick a plan that interleaves t3 between +# the FULL JOIN tables. +create table t1 (a int); +insert into t1 values (2), (1), (7), (1), (2); +create table t2 (b int); +insert into t2 values (4),(5),(9), (4),(1),(7); +create table t3 (c int); +insert into t3 values (3),(1),(3),(9); +create index idx_a on t1(a); +select * from t3 inner join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5); +c a b +1 1 1 +1 1 1 +1 NULL 5 +3 NULL 5 +3 NULL 5 +9 NULL 5 +select * from t3 inner join (t1 left join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5) +union all +select * from t3 inner join (t1 right join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5) +where t1.a is null; +c a b +1 1 1 +1 1 1 +1 NULL 5 +3 NULL 5 +3 NULL 5 +9 NULL 5 +drop table t1, t2, t3; +# simplify_joins: DO flatten an FJ-containing nest when it is the +# sole nest of its embedding. +# +# Allow the fj-nest to flatten `t1 FULL JOIN t2 ...` when it +# has no siblings, because adjacency is then trivially preserved. +create table t1 (a int, b int); +insert into t1 values (1,10), (2,20), (3,30); +create table t2 (a int, b int); +insert into t2 values (2,200), (3,300), (4,400); +create table t3 (a int, c varchar(10)); +insert into t3 values (1,'x'), (2,'y'), (4,'z'); +select dt.a1, dt.a2, t3.c +from (select t1.a as a1, t2.a as a2 from t1 full join t2 on t1.a=t2.a) dt +inner join t3 on coalesce(dt.a1, dt.a2) = t3.a; +a1 a2 c +1 NULL x +2 2 y +NULL 4 z +select dt.a1, dt.a2, t3.c +from (select t1.a as a1, t2.a as a2 from t1 left join t2 on t1.a=t2.a +union +select t1.a as a1, t2.a as a2 from t1 right join t2 on t1.a=t2.a) dt +inner join t3 on coalesce(dt.a1, dt.a2) = t3.a; +a1 a2 c +1 NULL x +2 2 y +NULL 4 z +drop table t1, t2, t3; +# simplify_joins: do not flatten a nest that carries JOIN_TYPE_FULL +# on itself. +create table t1 (a int); +insert into t1 values (1),(2),(3); +create table t2 (a int); +insert into t2 values (2),(3),(4); +create table t3 (a int); +insert into t3 values (3),(4),(5); +select t1.a as a1, t2.a as a2, t3.a as a3 +from (t1 join t2 on t1.a=t2.a) full join t3 on t2.a=t3.a; +a1 a2 a3 +2 2 NULL +3 3 3 +NULL NULL 4 +NULL NULL 5 +select t1.a as a1, t2.a as a2, t3.a as a3 +from (t1 join t2 on t1.a=t2.a) left join t3 on t2.a=t3.a +union all +select t1.a as a1, t2.a as a2, t3.a as a3 +from (t1 join t2 on t1.a=t2.a) right join t3 on t2.a=t3.a +where t2.a is null; +a1 a2 a3 +2 2 NULL +3 3 3 +NULL NULL 4 +NULL NULL 5 +drop table t1, t2, t3; +# INNER / CROSS JOIN of an outer table with a FULL JOIN must not +# interleave the outer table between the FULL JOIN tables. +create table t1 (a int); +insert into t1 values (2), (1), (7), (1), (2); +create table t2 (b int); +insert into t2 values (4),(5),(9), (4),(1),(7); +create table t3 (c int); +insert into t3 values (3),(1),(3),(9); +create index idx_a on t1(a); +select * from t3 inner join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5); +c a b +1 1 1 +1 1 1 +1 NULL 5 +3 NULL 5 +3 NULL 5 +9 NULL 5 +select * from t3 cross join (t1 full join t2 on t1.a=t2.b); +c a b +1 1 1 +1 1 1 +1 2 NULL +1 2 NULL +1 7 7 +1 NULL 4 +1 NULL 4 +1 NULL 5 +1 NULL 9 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 2 NULL +3 2 NULL +3 2 NULL +3 2 NULL +3 7 7 +3 7 7 +3 NULL 4 +3 NULL 4 +3 NULL 4 +3 NULL 4 +3 NULL 5 +3 NULL 5 +3 NULL 9 +3 NULL 9 +9 1 1 +9 1 1 +9 2 NULL +9 2 NULL +9 7 7 +9 NULL 4 +9 NULL 4 +9 NULL 5 +9 NULL 9 +select * from t3 left join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5) and t3.c != 9; +ERROR HY000: FULL JOIN is not allowed on the inner side of a LEFT or RIGHT JOIN +select * from t3 right join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5) and t3.c != 9; +c a b +1 1 1 +1 1 1 +1 NULL 5 +3 NULL 5 +3 NULL 5 +NULL 2 NULL +NULL 2 NULL +NULL 7 7 +NULL NULL 4 +NULL NULL 4 +NULL NULL 9 +drop table t1, t2, t3; +# JOIN_ORDER hints around a FULL JOIN. The partners of each +# FULL JOIN must remain contiguous in the join order so the +# null-complement pass can fire at the right partner with the +# LEFT JOIN pass complete; outside tables may sit before or +# after the pair. A hint that names an outside table between +# the two partners is rejected with a conflict warning; a hint +# that names an outside table before (or after) the pair is +# honored. +create table t1 (a int); +insert into t1 values (2), (1), (7), (1), (2); +create table t2 (b int); +insert into t2 values (4), (5), (9), (4), (1), (7); +create table t3 (c int); +insert into t3 values (3), (1), (3), (9); +# No hint, base case, should match other results. +explain extended select * +from t3 inner join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t3 ALL NULL NULL NULL NULL 4 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 6 100.00 Using where +Warnings: +Note 1003 select `test`.`t3`.`c` AS `c`,`test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t3` join (`test`.`t1` full join `test`.`t2` on(`test`.`t1`.`a` = `test`.`t2`.`b`)) where `test`.`t1`.`a` = `test`.`t3`.`c` or `test`.`t2`.`b` = 5 +select * +from t3 inner join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5); +c a b +1 1 1 +1 1 1 +1 NULL 5 +3 NULL 5 +3 NULL 5 +9 NULL 5 +# Outside table before the FULL JOIN pair, honored. +explain extended select /*+ join_order(t3,t1,t2) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t3 ALL NULL NULL NULL NULL 4 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 6 100.00 Using where +Warnings: +Note 1003 select /*+ JOIN_ORDER(@`select#1` `t3`,`t1`,`t2`) */ `test`.`t3`.`c` AS `c`,`test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t3` join (`test`.`t1` full join `test`.`t2` on(`test`.`t1`.`a` = `test`.`t2`.`b`)) where `test`.`t1`.`a` = `test`.`t3`.`c` or `test`.`t2`.`b` = 5 +select /*+ join_order(t3,t1,t2) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5); +c a b +1 1 1 +1 1 1 +1 NULL 5 +3 NULL 5 +3 NULL 5 +9 NULL 5 +# Outside table after the FULL JOIN pair, honored. +explain extended select /*+ join_order(t1,t2,t3) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 6 100.00 Using where +1 SIMPLE t3 ALL NULL NULL NULL NULL 4 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Note 1003 select /*+ JOIN_ORDER(@`select#1` `t1`,`t2`,`t3`) */ `test`.`t3`.`c` AS `c`,`test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t3` join (`test`.`t1` full join `test`.`t2` on(`test`.`t1`.`a` = `test`.`t2`.`b`)) where `test`.`t3`.`c` = `test`.`t1`.`a` or `test`.`t2`.`b` = 5 +select /*+ join_order(t1,t2,t3) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5); +c a b +1 1 1 +1 1 1 +1 NULL 5 +3 NULL 5 +3 NULL 5 +9 NULL 5 +# Outside table between the two FULL JOIN partners, rejected. +explain extended select /*+ join_order(t1,t3,t2) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t3 ALL NULL NULL NULL NULL 4 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 6 100.00 Using where +Warnings: +Warning 4219 Hint JOIN_ORDER(`t1`,`t3`,`t2`) is ignored as conflicting/duplicated +Note 1003 select `test`.`t3`.`c` AS `c`,`test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t3` join (`test`.`t1` full join `test`.`t2` on(`test`.`t1`.`a` = `test`.`t2`.`b`)) where `test`.`t1`.`a` = `test`.`t3`.`c` or `test`.`t2`.`b` = 5 +select /*+ join_order(t1,t3,t2) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) +on (t3.c=t1.a or t2.b=5); +c a b +1 1 1 +1 1 1 +1 NULL 5 +3 NULL 5 +3 NULL 5 +9 NULL 5 +Warnings: +Warning 4219 Hint JOIN_ORDER(`t1`,`t3`,`t2`) is ignored as conflicting/duplicated +drop table t1, t2, t3; +# ((t1, t2) FULL JOIN t3) INNER JOIN t4 with the FULL JOIN on +# the left side of the INNER JOIN, the configuration phase 2 +# supports. The FULL JOIN block covers three tables; t4 sits +# outside and may go before or after the block but never +# between any two of t1, t2, t3. The FULL JOIN ON +# condition is a simple equi-join so the query translates +# to PostgreSQL; the INNER JOIN ON is disjunctive so the +# FULL JOIN is not eligible for the LEFT/RIGHT/INNER +# rewrite and survives into the join optimizer. +create table t1 (a int); +insert into t1 values (1), (2); +create table t2 (b int); +insert into t2 values (3), (4); +create table t3 (c int); +insert into t3 values (1), (5); +create table t4 (d int); +insert into t4 values (1), (5); +# No hint, base case. +explain extended select * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t3 ALL NULL NULL NULL NULL 2 100.00 Using where +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c`,`test`.`t4`.`d` AS `d` from ((`test`.`t1` join `test`.`t2`) full join `test`.`t3` on(`test`.`t3`.`c` = `test`.`t1`.`a`)) join `test`.`t4` where `test`.`t4`.`d` = `test`.`t3`.`c` or `test`.`t4`.`d` = 5 +select * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +a b c d +1 3 1 1 +1 3 1 5 +1 4 1 1 +1 4 1 5 +2 3 NULL 5 +2 4 NULL 5 +NULL NULL 5 5 +# Outside table before the FULL JOIN block, honored. +explain extended select /*+ join_order(t4,t1,t2,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t3 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select /*+ JOIN_ORDER(@`select#1` `t4`,`t1`,`t2`,`t3`) */ `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c`,`test`.`t4`.`d` AS `d` from ((`test`.`t1` join `test`.`t2`) full join `test`.`t3` on(`test`.`t3`.`c` = `test`.`t1`.`a`)) join `test`.`t4` where `test`.`t3`.`c` = `test`.`t4`.`d` or `test`.`t4`.`d` = 5 +select /*+ join_order(t4,t1,t2,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +a b c d +1 3 1 1 +1 3 1 5 +1 4 1 1 +1 4 1 5 +2 3 NULL 5 +2 4 NULL 5 +NULL NULL 5 5 +# Outside table after the FULL JOIN block, honored. +explain extended select /*+ join_order(t1,t2,t3,t4) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t3 ALL NULL NULL NULL NULL 2 100.00 Using where +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Note 1003 select /*+ JOIN_ORDER(@`select#1` `t1`,`t2`,`t3`,`t4`) */ `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c`,`test`.`t4`.`d` AS `d` from ((`test`.`t1` join `test`.`t2`) full join `test`.`t3` on(`test`.`t3`.`c` = `test`.`t1`.`a`)) join `test`.`t4` where `test`.`t4`.`d` = `test`.`t3`.`c` or `test`.`t4`.`d` = 5 +select /*+ join_order(t1,t2,t3,t4) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +a b c d +1 3 1 1 +1 3 1 5 +1 4 1 1 +1 4 1 5 +2 3 NULL 5 +2 4 NULL 5 +NULL NULL 5 5 +# Swap t1 and t2 within the (t1, t2) nest while keeping +# the FULL JOIN block contiguous, honored. +explain extended select /*+ join_order(t4,t2,t1,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t3 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select /*+ JOIN_ORDER(@`select#1` `t4`,`t2`,`t1`,`t3`) */ `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c`,`test`.`t4`.`d` AS `d` from ((`test`.`t1` join `test`.`t2`) full join `test`.`t3` on(`test`.`t3`.`c` = `test`.`t1`.`a`)) join `test`.`t4` where `test`.`t3`.`c` = `test`.`t4`.`d` or `test`.`t4`.`d` = 5 +select /*+ join_order(t4,t2,t1,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +a b c d +1 3 1 1 +1 3 1 5 +1 4 1 1 +1 4 1 5 +2 3 NULL 5 +2 4 NULL 5 +NULL NULL 5 5 +# Outside table sandwiched between two FULL JOIN partners +# of the (t1, t2) nest, rejected. +explain extended select /*+ join_order(t1,t4,t2,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t3 ALL NULL NULL NULL NULL 2 100.00 Using where +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Warning 4219 Hint JOIN_ORDER(`t1`,`t4`,`t2`,`t3`) is ignored as conflicting/duplicated +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c`,`test`.`t4`.`d` AS `d` from ((`test`.`t1` join `test`.`t2`) full join `test`.`t3` on(`test`.`t3`.`c` = `test`.`t1`.`a`)) join `test`.`t4` where `test`.`t4`.`d` = `test`.`t3`.`c` or `test`.`t4`.`d` = 5 +select /*+ join_order(t1,t4,t2,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +a b c d +1 3 1 1 +1 3 1 5 +1 4 1 1 +1 4 1 5 +2 3 NULL 5 +2 4 NULL 5 +NULL NULL 5 5 +Warnings: +Warning 4219 Hint JOIN_ORDER(`t1`,`t4`,`t2`,`t3`) is ignored as conflicting/duplicated +# Outside table sandwiched between (t1, t2) and t3 +# across the FULL JOIN, rejected. +explain extended select /*+ join_order(t1,t2,t4,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 100.00 +1 SIMPLE t3 ALL NULL NULL NULL NULL 2 100.00 Using where +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Warning 4219 Hint JOIN_ORDER(`t1`,`t2`,`t4`,`t3`) is ignored as conflicting/duplicated +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c`,`test`.`t4`.`d` AS `d` from ((`test`.`t1` join `test`.`t2`) full join `test`.`t3` on(`test`.`t3`.`c` = `test`.`t1`.`a`)) join `test`.`t4` where `test`.`t4`.`d` = `test`.`t3`.`c` or `test`.`t4`.`d` = 5 +select /*+ join_order(t1,t2,t4,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 +on (t4.d=t3.c or t4.d=5); +a b c d +1 3 1 1 +1 3 1 5 +1 4 1 1 +1 4 1 5 +2 3 NULL 5 +2 4 NULL 5 +NULL NULL 5 5 +Warnings: +Warning 4219 Hint JOIN_ORDER(`t1`,`t2`,`t4`,`t3`) is ignored as conflicting/duplicated +drop table t1, t2, t3, t4; +# JOIN_PREFIX and JOIN_SUFFIX hints around a FULL JOIN, +# with the FULL JOIN on the left side of the INNER JOIN +# (the configuration phase 2 supports). JOIN_PREFIX forces +# the listed tables to lead the join order; JOIN_SUFFIX +# forces them to trail it. Both must respect the FULL JOIN +# contiguity invariant. A hint that ends up putting a +# non-FULL-JOIN table between two FULL JOIN tables, once the +# implicit prefix or suffix dependencies are added, is +# rejected. +create table t1 (a int); +insert into t1 values (2), (1), (7), (1), (2); +create table t2 (b int); +insert into t2 values (4), (5), (9), (4), (1), (7); +create table t3 (c int); +insert into t3 values (3), (1), (3), (9); +# JOIN_PREFIX with the outside table, honored (was rejected +# before the FULL JOIN check was relaxed). +explain extended select /*+ join_prefix(t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t3 ALL NULL NULL NULL NULL 4 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 6 100.00 Using where +Warnings: +Note 1003 select /*+ JOIN_PREFIX(@`select#1` `t3`) */ `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c` from (`test`.`t1` full join `test`.`t2` on(`test`.`t1`.`a` = `test`.`t2`.`b`)) join `test`.`t3` where `test`.`t1`.`a` = `test`.`t3`.`c` or `test`.`t2`.`b` = 5 +select /*+ join_prefix(t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +a b c +1 1 1 +1 1 1 +NULL 5 1 +NULL 5 3 +NULL 5 3 +NULL 5 9 +# JOIN_PREFIX with the FULL JOIN pair, honored. +explain extended select /*+ join_prefix(t1, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 6 100.00 Using where +1 SIMPLE t3 ALL NULL NULL NULL NULL 4 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Note 1003 select /*+ JOIN_PREFIX(@`select#1` `t1`,`t2`) */ `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c` from (`test`.`t1` full join `test`.`t2` on(`test`.`t1`.`a` = `test`.`t2`.`b`)) join `test`.`t3` where `test`.`t3`.`c` = `test`.`t1`.`a` or `test`.`t2`.`b` = 5 +select /*+ join_prefix(t1, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +a b c +1 1 1 +1 1 1 +NULL 5 1 +NULL 5 3 +NULL 5 3 +NULL 5 9 +# JOIN_PREFIX naming one FULL JOIN partner and the outside +# table forces the other partner after both, splitting the +# FULL JOIN block, rejected. +explain extended select /*+ join_prefix(t1, t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t3 ALL NULL NULL NULL NULL 4 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 6 100.00 Using where +Warnings: +Warning 4219 Hint JOIN_PREFIX(`t1`,`t3`) is ignored as conflicting/duplicated +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c` from (`test`.`t1` full join `test`.`t2` on(`test`.`t1`.`a` = `test`.`t2`.`b`)) join `test`.`t3` where `test`.`t1`.`a` = `test`.`t3`.`c` or `test`.`t2`.`b` = 5 +select /*+ join_prefix(t1, t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +a b c +1 1 1 +1 1 1 +NULL 5 1 +NULL 5 3 +NULL 5 3 +NULL 5 9 +Warnings: +Warning 4219 Hint JOIN_PREFIX(`t1`,`t3`) is ignored as conflicting/duplicated +# JOIN_SUFFIX with the outside table, honored. +explain extended select /*+ join_suffix(t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 6 100.00 Using where +1 SIMPLE t3 ALL NULL NULL NULL NULL 4 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Note 1003 select /*+ JOIN_SUFFIX(@`select#1` `t3`) */ `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c` from (`test`.`t1` full join `test`.`t2` on(`test`.`t1`.`a` = `test`.`t2`.`b`)) join `test`.`t3` where `test`.`t3`.`c` = `test`.`t1`.`a` or `test`.`t2`.`b` = 5 +select /*+ join_suffix(t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +a b c +1 1 1 +1 1 1 +NULL 5 1 +NULL 5 3 +NULL 5 3 +NULL 5 9 +# JOIN_SUFFIX with the FULL JOIN pair, honored (was +# rejected before the FULL JOIN check was relaxed). +explain extended select /*+ join_suffix(t1, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t3 ALL NULL NULL NULL NULL 4 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 6 100.00 Using where +Warnings: +Note 1003 select /*+ JOIN_SUFFIX(@`select#1` `t1`,`t2`) */ `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c` from (`test`.`t1` full join `test`.`t2` on(`test`.`t1`.`a` = `test`.`t2`.`b`)) join `test`.`t3` where `test`.`t1`.`a` = `test`.`t3`.`c` or `test`.`t2`.`b` = 5 +select /*+ join_suffix(t1, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +a b c +1 1 1 +1 1 1 +NULL 5 1 +NULL 5 3 +NULL 5 3 +NULL 5 9 +# JOIN_SUFFIX naming the outside table and one FULL JOIN +# partner forces the other partner before both, splitting +# the FULL JOIN block, rejected. +explain extended select /*+ join_suffix(t3, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t3 ALL NULL NULL NULL NULL 4 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 6 100.00 Using where +Warnings: +Warning 4219 Hint JOIN_SUFFIX(`t3`,`t2`) is ignored as conflicting/duplicated +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`c` AS `c` from (`test`.`t1` full join `test`.`t2` on(`test`.`t1`.`a` = `test`.`t2`.`b`)) join `test`.`t3` where `test`.`t1`.`a` = `test`.`t3`.`c` or `test`.`t2`.`b` = 5 +select /*+ join_suffix(t3, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 +on (t3.c=t1.a or t2.b=5); +a b c +1 1 1 +1 1 1 +NULL 5 1 +NULL 5 3 +NULL 5 3 +NULL 5 9 +Warnings: +Warning 4219 Hint JOIN_SUFFIX(`t3`,`t2`) is ignored as conflicting/duplicated +drop table t1, t2, t3; +# Empty MyISAM FULL JOIN partner. MyISAM reports an exact 0 row +# count, so the table used to be promoted to a JT_SYSTEM const +# NULL row at optimize time. The synthesized NULL row drove the +# LEFT JOIN component to emit a bogus row of NULLs when the +# other partner was also empty. Earlier still, the duplicate +# filter setup crashed in this scenario. FULL JOIN tables +# bypass the const table optimization. +create table t1 (pk int) ENGINE=MyISAM; +create table t2 (pk int) ENGINE=InnoDB; +select t1.pk from t1 full join t2 on (true); +pk +select t1.pk from t1 left join t2 on (true) +union +select t1.pk from t1 right join t2 on (true); +pk +drop table t1, t2; +# Both partners empty MyISAM. +create table t1 (pk int) ENGINE=MyISAM; +create table t2 (pk int) ENGINE=MyISAM; +select * from t1 full join t2 on (true); +pk pk +select * from t1 left join t2 on (true) +union +select * from t1 right join t2 on (true); +pk pk +drop table t1, t2; +# Empty MyISAM on the left, InnoDB on the right with rows. The +# bug was masked here because the phantom left NULL row happened +# to produce the same rows the null complement pass would. +create table t1 (pk int) ENGINE=MyISAM; +create table t2 (pk int) ENGINE=InnoDB; +insert into t2 values (10), (20); +select * from t1 full join t2 on (true); +pk pk +NULL 10 +NULL 20 +select * from t1 left join t2 on (true) +union +select * from t1 right join t2 on (true); +pk pk +NULL 10 +NULL 20 +drop table t1, t2; +# Empty MyISAM on the right (the side that does not normally +# become const) and empty InnoDB on the left. +create table t1 (pk int) ENGINE=InnoDB; +create table t2 (pk int) ENGINE=MyISAM; +select * from t1 full join t2 on (true); +pk pk +select * from t1 left join t2 on (true) +union +select * from t1 right join t2 on (true); +pk pk +drop table t1, t2; +# MyISAM partners with one row each. Without the guards, the +# stats.records <= 1 path would also have promoted them. +create table t1 (a int) ENGINE=MyISAM; +create table t2 (b int) ENGINE=MyISAM; +insert into t1 values (1); +insert into t2 values (2); +select * from t1 full join t2 on t1.a = t2.b; +a b +1 NULL +NULL 2 +select * from t1 left join t2 on t1.a = t2.b +union +select * from t1 right join t2 on t1.a = t2.b; +a b +1 NULL +NULL 2 +drop table t1, t2; +# simplify_joins must not rewrite a FULL JOIN whose left side +# still contains an unrewritten FULL JOIN. Without the fix, +# rewrite_full_to_right put the inner FULL JOIN on the inner +# side of the resulting LEFT JOIN, a shape the optimizer could +# not plan, and tripped the found_tables > 0 assertion in +# best_extension_by_limited_search. +# +# The trigger is a WHERE predicate that rejects nulls on the +# right base table of the outermost FULL JOIN in a chained FULL +# JOIN. The fix keeps the FULL JOIN; the null complement pass +# runs and the WHERE filters any rows the rewrite would have +# eliminated. Per Section 6, the LEFT JOIN UNION RIGHT JOIN +# oracle is not valid for chained FULL JOINs, so the recorded +# result is the oracle. +create table t1 (a int); +create table t2 (a int); +create table t3 (a int); +create table t4 (a int); +insert into t1 values (1); +insert into t2 values (10); +insert into t3 values (100); +insert into t4 values (1000), (2000); +select t1.a, t2.a, t3.a, t4.a +from t1 left join t2 on (true) +full join t3 on (true) +full join t4 on (true) +where t4.a <= 3000; +a a a a +1 10 100 1000 +1 10 100 2000 +explain extended select t1.a, t2.a, t3.a, t4.a +from t1 left join t2 on (true) +full join t3 on (true) +full join t4 on (true) +where t4.a <= 3000; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t3 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`a` AS `a`,`test`.`t3`.`a` AS `a`,`test`.`t4`.`a` AS `a` from ((`test`.`t1` left join `test`.`t2` on(1)) full join `test`.`t3` on(1)) full join `test`.`t4` on(1) where `test`.`t4`.`a` <= 3000 +# IS NOT NULL drives the same rewrite path. +select t1.a, t2.a, t3.a, t4.a +from t1 left join t2 on (true) +full join t3 on (true) +full join t4 on (true) +where t4.a is not null; +a a a a +1 10 100 1000 +1 10 100 2000 +explain extended select t1.a, t2.a, t3.a, t4.a +from t1 left join t2 on (true) +full join t3 on (true) +full join t4 on (true) +where t4.a is not null; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t3 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`a` AS `a`,`test`.`t3`.`a` AS `a`,`test`.`t4`.`a` AS `a` from ((`test`.`t1` left join `test`.`t2` on(1)) full join `test`.`t3` on(1)) full join `test`.`t4` on(1) where `test`.`t4`.`a` is not null +# RIGHT JOIN canonicalizes to LEFT JOIN, same trigger applies. +select t1.a, t2.a, t3.a, t4.a +from t1 right join t2 on (true) +full join t3 on (true) +full join t4 on (true) +where t4.a <= 3000; +a a a a +1 10 100 1000 +1 10 100 2000 +explain extended select t1.a, t2.a, t3.a, t4.a +from t1 right join t2 on (true) +full join t3 on (true) +full join t4 on (true) +where t4.a <= 3000; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 1 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t3 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`a` AS `a`,`test`.`t3`.`a` AS `a`,`test`.`t4`.`a` AS `a` from ((`test`.`t2` left join `test`.`t1` on(1)) full join `test`.`t3` on(1)) full join `test`.`t4` on(1) where `test`.`t4`.`a` <= 3000 +# Reproducer matching the original ticket. All left tables are +# empty so only null complement rows for t4 reach the WHERE, +# and GROUP BY collapses them. +delete from t1; +delete from t2; +delete from t3; +select t1.a as field1 +from t1 left join t2 on (true) +full join t3 on (true) +full join t4 on (true) +where t4.a <= 3000 +group by field1; +field1 +NULL +explain extended select t1.a as field1 +from t1 left join t2 on (true) +full join t3 on (true) +full join t4 on (true) +where t4.a <= 3000 +group by field1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 0 0.00 Using temporary; Using filesort +1 SIMPLE t2 ALL NULL NULL NULL NULL 0 0.00 Using where +1 SIMPLE t3 ALL NULL NULL NULL NULL 0 0.00 Using where +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `field1` from ((`test`.`t1` left join `test`.`t2` on(1)) full join `test`.`t3` on(1)) full join `test`.`t4` on(1) where `test`.`t4`.`a` <= 3000 group by `test`.`t1`.`a` +# The guard must not block the normal FULL JOIN rewrite when +# the left side is a base table. A plain t1 FULL JOIN t4 with +# the same WHERE predicate still rewrites and the UNION oracle +# matches. +insert into t1 values (1); +select t1.a, t4.a from t1 full join t4 on (true) where t4.a <= 3000; +a a +1 1000 +1 2000 +explain extended +select t1.a, t4.a from t1 full join t4 on (true) where t4.a <= 3000; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 Using where +1 SIMPLE t1 ALL NULL NULL NULL NULL 1 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t4`.`a` AS `a` from `test`.`t4` left join `test`.`t1` on(1) where `test`.`t4`.`a` <= 3000 +select t1.a, t4.a from t1 left join t4 on (true) where t4.a <= 3000 +union +select t1.a, t4.a from t1 right join t4 on (true) where t4.a <= 3000; +a a +1 1000 +1 2000 +explain extended +select t1.a, t4.a from t1 left join t4 on (true) where t4.a <= 3000 +union +select t1.a, t4.a from t1 right join t4 on (true) where t4.a <= 3000; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t1 system NULL NULL NULL NULL 1 100.00 +1 PRIMARY t4 ALL NULL NULL NULL NULL 2 100.00 Using where +2 UNION t4 ALL NULL NULL NULL NULL 2 100.00 Using where +2 UNION t1 ALL NULL NULL NULL NULL 1 100.00 Using where; Using join buffer (flat, BNL join) +NULL UNION RESULT ALL NULL NULL NULL NULL NULL NULL +Warnings: +Note 1003 /* select#1 */ select 1 AS `a`,`test`.`t4`.`a` AS `a` from `test`.`t4` where `test`.`t4`.`a` <= 3000 union /* select#2 */ select `test`.`t1`.`a` AS `a`,`test`.`t4`.`a` AS `a` from `test`.`t4` left join `test`.`t1` on(1) where `test`.`t4`.`a` <= 3000 +drop table t1, t2, t3, t4; +# Taller-tree variant of the chained FULL JOIN regression above. +# The guard at each level of rewrite_full_outer_joins runs after +# simplify_nested_join has descended into left_table, so a longer +# chain is handled recursively: each FULL JOIN whose left side +# still contains an unrewritten FULL JOIN stays as a FULL JOIN. +# With the null-rejecting WHERE only on the outermost right, no +# inner FULL JOIN can rewrite, so every FULL JOIN in the chain +# is preserved. +create table t1 (a int); +create table t2 (a int); +create table t3 (a int); +create table t4 (a int); +create table t5 (a int); +create table t6 (a int); +insert into t1 values (1); +insert into t2 values (10); +insert into t3 values (100); +insert into t4 values (1000); +insert into t5 values (10000); +insert into t6 values (100000), (200000); +select t1.a, t2.a, t3.a, t4.a, t5.a, t6.a +from t1 left join t2 on (true) +full join t3 on (true) +full join t4 on (true) +full join t5 on (true) +full join t6 on (true) +where t6.a <= 300000; +a a a a a a +1 10 100 1000 10000 100000 +1 10 100 1000 10000 200000 +explain extended select t1.a, t2.a, t3.a, t4.a, t5.a, t6.a +from t1 left join t2 on (true) +full join t3 on (true) +full join t4 on (true) +full join t5 on (true) +full join t6 on (true) +where t6.a <= 300000; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t3 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t4 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t5 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t6 ALL NULL NULL NULL NULL 2 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`a` AS `a`,`test`.`t3`.`a` AS `a`,`test`.`t4`.`a` AS `a`,`test`.`t5`.`a` AS `a`,`test`.`t6`.`a` AS `a` from ((((`test`.`t1` left join `test`.`t2` on(1)) full join `test`.`t3` on(1)) full join `test`.`t4` on(1)) full join `test`.`t5` on(1)) full join `test`.`t6` on(1) where `test`.`t6`.`a` <= 300000 +drop table t1, t2, t3, t4, t5, t6; +# Companion case: the guard must not fire pessimistically. When +# a null-rejecting WHERE predicate sits on a middle base table +# rather than the outermost right, the inner FULL JOIN rewrites +# (its left side is a base-table-only LEFT JOIN, not yet +# containing a FULL JOIN), and the outer FULL JOIN's left side +# no longer contains a FULL JOIN by the time the guard checks +# it, so the outer FULL JOIN also rewrites. Both FULL JOINs +# collapse to LEFT JOINs and the query is plannable without the +# null complement pass. +# +# Note: t3 is detected as a system const table in the recorded +# plan. MDEV-38508 (commit 7d6a036c3a1) skips constant table +# promotion for tables whose outer_join carries JOIN_TYPE_FULL, +# but that guard reads the current outer_join bits and +# simplify_joins clears JOIN_TYPE_FULL when it rewrites a FULL +# JOIN to a LEFT JOIN. Here both FULL JOINs are rewritten +# before make_join_statistics runs, so t3 is no longer an FJ +# partner at that point and the standard LEFT JOIN const-table +# path applies. This is safe: the FULL JOIN's null complement +# pass is what made const promotion dangerous in MDEV-38508, and +# there is no null complement pass after the rewrite; any +# predicate that drove the rewrite is null-rejecting on the +# rewritten table and so rejects the synthesized NULL row anyway. +create table t1 (a int); +create table t2 (a int); +create table t3 (a int); +create table t4 (a int); +insert into t1 values (1); +insert into t2 values (10); +insert into t3 values (100); +insert into t4 values (1000); +select t1.a, t2.a, t3.a, t4.a +from t1 left join t2 on (true) +full join t3 on (true) +full join t4 on (true) +where t3.a <= 1000; +a a a a +1 10 100 1000 +# Verify the simplified shape: a plain LEFT JOIN chain, no FULL. +explain extended select t1.a +from t1 left join t2 on (true) +full join t3 on (true) +full join t4 on (true) +where t3.a <= 1000; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t3 system NULL NULL NULL NULL 1 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 1 100.00 Using where +1 SIMPLE t4 ALL NULL NULL NULL NULL 1 100.00 Using where; Using join buffer (flat, BNL join) +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from (`test`.`t1` left join `test`.`t2` on(1)) left join `test`.`t4` on(1) where 1 +drop table t1, t2, t3, t4; +# MDEV-39605: a merged derived table containing a FULL JOIN previously +# hit an assertion in table elimination. simplify_joins +# preserves the derived's nested join (no on_expr, not a semi-join) +# to keep the FULL JOIN tables adjacent in the parent join_list, but +# the table elimination iterator must descend into that nest +# without trying to eliminate it. +create table t1 (a int); +insert into t1 values (1), (2); +select * from t1, (select alias1.a from t1 as alias1 +full join t1 as alias2 on (true)) dt; +a a +1 1 +1 1 +1 2 +1 2 +2 1 +2 1 +2 2 +2 2 +select * from t1, (select alias1.a from t1 as alias1 +left join t1 as alias2 on (true) +union all +select alias1.a from t1 as alias1 +right join t1 as alias2 on (true) +where alias1.a is null) dt; +a a +1 1 +1 1 +1 2 +1 2 +2 1 +2 1 +2 2 +2 2 +drop table t1; +# When a constant table is a subquery, the duplicate filter +# allocation loop used to walk off the end of JOIN::join_tab list. +create table t1 (pk int not null, primary key (pk)); +create table t2 (pk int); +select t1.pk from (t1 full join t2 on (true)) +where exists (select t2.pk from (t1 right join t2 on (t2.pk = t1.pk))); +pk +insert into t1 values (1), (2); +insert into t2 values (1), (null); +select t1.pk from (t1 full join t2 on (true)) +where exists (select t2.pk from (t1 right join t2 on (t2.pk = t1.pk))); +pk +1 +1 +2 +2 +select t1.pk from (t1 left join t2 on (true)) +where exists (select t2.pk from (t1 right join t2 on (t2.pk = t1.pk))) +union +select t1.pk from (t1 right join t2 on (true)) +where exists (select t2.pk from (t1 right join t2 on (t2.pk = t1.pk))); +pk +1 +2 +drop table t1, t2; +# In a correlated subquery, the FULL JOIN null complement rescan +# on the right table used to disable the keyread on the +# index without re-enabling it. The next iteration of the +# subquery then hit an assertion because the keyread +# was expected to still be active. +create table t1 (pk int primary key); +create table t2 (pk int primary key, a int); +create table t3 (pk int primary key, a int, key idx(a)); +insert into t1 values (1), (2); +insert into t2 values (1, 10); +insert into t3 values (1, 100), (2, 200); +select (select min(t2.pk) from (t2 full join t3 on true) +where t3.pk = t1.pk or t2.a = t2.a) as f +from t1; +f +1 +1 +select (select min(dt.t2pk) from +(select t2.pk as t2pk, t2.a as t2a, t3.pk as t3pk +from t2 left join t3 on true +union +select t2.pk, t2.a, t3.pk from t2 right join t3 on true +where t2.pk is null) dt +where dt.t3pk = t1.pk or dt.t2a = dt.t2a) as f +from t1; +f +1 +1 +drop table t1, t2, t3; +# First, an inner FULL JOIN leaf that +# carries its own on_expr did not inherit dep_tables from the +# enclosing nests, so its dependency on the leftmost FULL JOIN +# table was lost and the optimizer would pick it as the first +# table. Second, compute_full_join_nest_tables iterated only +# FULL JOIN leaves and walked their embedding chain, missing +# any FULL JOIN table whose left side was a nest of inner +# joins (where the nest carries JOIN_TYPE_FULL but its leaves +# do not). These two issues led to an invalid FULL JOIN table +# order. +# +# The shape places a nested join on the right side of a FULL JOIN, +# which the post simplify_joins check now rejects, so the query no +# longer reaches the table ordering code. It is kept as a guard that +# the shape stays rejected rather than silently dropping rows. +create table t1 (pk int); +create table t2 (pk int); +create table t3 (pk int); +create table t4 (pk int); +create table t5 (pk int); +select t2.pk as field1 from t1 +full join t2 +full join t4 +full join t5 right join t3 on (true) +on (true) +full join t5 as alias2 on (true) +on (true) +on t1.pk = alias2.pk; +ERROR HY000: FULL JOIN is only supported with base tables on the right side; '(nest_last_join)' is not a base table +drop table t1, t2, t3, t4, t5; +# The FULL JOIN table can be a nest of inner joins, so in that +# case the nest carries JOIN_TYPE_FULL but its leaves do not. +# compute_full_join_nest_tables must still pick those leaves +# up. +create table t1 (a int); +create table t2 (a int); +create table t3 (a int); +select * from t1 inner join t2 full join t3 on t1.a=t3.a; +a a a +drop table t1, t2, t3; +# Both FULL JOIN tables carried the ON expression which broke +# dependency propagation during make_join_statistics. Rather +# than make changes there, just carry the ON expression on only +# one of the FULL JOIN tables, like the other join types do. +# +# This shape also places a nested join on the right side of a FULL +# JOIN and is now rejected after simplify_joins. Kept as a guard +# that the shape stays rejected. +create table t1 (pk int); +create table t2 (pk int); +create table t3 (pk int); +create table t4 (pk int); +insert into t1 values (1), (2); +select max(t3.pk) from t1 +full join t2 +full join t3 on t2.pk = t3.pk +full join t1 as alias5 +full join t4 on (true) +on t3.pk = alias5.pk +on t1.pk = alias5.pk; +ERROR HY000: FULL JOIN is only supported with base tables on the right side; '(nest_last_join)' is not a base table +drop table t1, t2, t3, t4; +# Inner FULL JOIN tables carrying a constant ON expression did not +# inherit the dep_tables of their enclosing FULL JOIN nest because +# make_join_statistics, for any table whose on_expr was set, took +# an early exit that skipped embedding dependency propagation. +create table t1 (pk int); +create table t2 (pk int); +create table t3 (pk int); +create table t4 (pk int); +insert into t1 values (1), (2); +select t1.pk from t1 +full join t2 +full outer join t3 on (true) +full join t4 on (true) +on t1.pk = t4.pk; +pk +1 +2 +drop table t1, t2, t3, t4; +# When an inner table of a FULL JOIN sat inside a nest that was +# itself the LEFT side of an enclosing FULL JOIN, +# make_outerjoin_info skipped that nest while building the outer +# join scope chain, leaving the inner table's first_upper +# unlinked. add_found_match_trig_cond walked off the broken +# chain and dereferenced NULL when make_join_select pushed an +# ON condition to that table. +# +# Separately, the outermost FULL JOIN's right operand was a +# nested join expression rather than a single base table (the +# parser places it there when the FULL JOIN chain's ONs are all +# trailing), and the null complement pass had no JOIN_TAB +# carrying FULL|RIGHT to attach an fj_dups filter to, so the +# unmatched rows from the right side were never emitted. +create table t1 (pk int); +create table t2 (pk int); +create table t3 (pk int); +create table t4 (pk int); +create table t5 (pk int); +insert into t1 values (1), (2); +insert into t2 values (1), (3); +insert into t3 values (1), (4); +insert into t4 values (1), (5); +insert into t5 values (1), (6); +select t1.pk +from t1 +full join t2 +full outer join t3 on (true) +full join t4 on t3.pk = t4.pk +left join t5 on t2.pk = t5.pk +on t1.pk = t4.pk; +pk +1 +1 +2 +NULL +NULL +NULL +drop table t1, t2, t3, t4, t5; +# When an IN subquery in the ON clause of a JOIN to a FULL JOIN +# nest was converted into a semijoin, the semijoin nest was inserted +# as a child of the FULL JOIN nest. Because the FULL JOIN nest cannot +# be flattened, the semijoin nest remained inside it. +# check_interleaving_with_nj walked through the semijoin nest +# transparently and incremented the FULL JOIN nest's counter once for +# every semijoin table instead of once for the whole semijoin nest, +# eventually overflowing past n_tables and triggering the assertion in +# greedy_search. +create table t1 (pk int); +create table t2 (pk int); +select table1.pk from t1 as table1 +straight_join (select alias1.* from t2 as alias1 +full join t1 as alias2 on (true)) as table2 +on (table1.pk in (select t2.pk from t1 left join t2 on (true))); +pk +drop table t1, t2; +# t1 is detected as a const table through the keyuse based const +# detection in make_join_statistics. The HAVING predicate field2 = 8 +# is field2 = t1.pk, and condition pushdown from HAVING moves it down +# as the equality t1.pk = 8. That binds every part of t1's primary +# key to a constant, so make_join_statistics reads the single matching +# row at optimization time and marks t1 const. That promotion path +# does not exclude FULL JOIN tables, so t1 folds to const even though +# it is the left side of a FULL JOIN. +# When the left side of a FULL JOIN has const table optimizations, +# then the right side table becomes the inner-most join tab and +# also its own null complement target. sub_select started PFS batch +# mode for the inner-most tab and then, before ending it, ran the +# null complement pass, which calls sub_select again on that same tab +# and started batch mode a second time, tripping the m_psi_batch_mode +# assertion. +create table t1 (pk int not null, primary key(pk)); +create table t2 (pk int); +insert into t1 values (8); +select t1.pk as field2 from t1 full join t2 on (true) +group by field2 having field2 = 8; +field2 +8 +select t1.pk as field2 from t1 full join t2 on (true) +group by field2 having t1.pk = 8; +field2 +8 +insert into t2 values (1), (8), (null); +select t1.pk as a, t2.pk as b from t1 full join t2 on (t1.pk = t2.pk); +a b +8 8 +NULL 1 +NULL NULL +select t1.pk as a, t2.pk as b from t1 left join t2 on (t1.pk = t2.pk) +union +select t1.pk as a, t2.pk as b from t1 right join t2 on (t1.pk = t2.pk); +a b +8 8 +NULL 1 +NULL NULL +drop table t1, t2; +# A surviving FULL JOIN must have the right side ordered after the left. +# When the ON expression does not tie the right side to the left (a +# constant predicate, or a predicate over the right side alone) the +# right side had no dependency forcing that order, so the optimizer +# could place it first, either by const folding a constant key lookup +# or by cost when it was the cheaper scan, dropping the right side +# unmatched rows or producing a cross product. +create table t1 (a int); +insert into t1 values (1), (2); +create table t2 (pk int primary key, b int); +insert into t2 values (5, 50), (6, 60), (7, 70); +# Constant lookup on the right side's primary key. +select * from t1 full join t2 on t2.pk = 5; +a pk b +1 5 50 +2 5 50 +NULL 6 60 +NULL 7 70 +select * from t1 left join t2 on t2.pk = 5 +union +select * from t1 right join t2 on t2.pk = 5; +a pk b +1 5 50 +2 5 50 +NULL 6 60 +NULL 7 70 +# Constant predicate over a non-indexed right side column, with the +# left side as the cheaper scan so it is ordered first. +select * from t1 full join t2 on t2.b = 50; +a pk b +1 5 50 +2 5 50 +NULL 6 60 +NULL 7 70 +select * from t1 left join t2 on t2.b = 50 +union +select * from t1 right join t2 on t2.b = 50; +a pk b +1 5 50 +2 5 50 +NULL 6 60 +NULL 7 70 +# Impossible ON over the right side's primary key. +select * from t1 full join t2 on t2.pk = 5 and t2.pk = 6; +a pk b +1 NULL NULL +2 NULL NULL +NULL 5 50 +NULL 6 60 +NULL 7 70 +select * from t1 left join t2 on t2.pk = 5 and t2.pk = 6 +union +select * from t1 right join t2 on t2.pk = 5 and t2.pk = 6; +a pk b +1 NULL NULL +2 NULL NULL +NULL 5 50 +NULL 6 60 +NULL 7 70 +drop table t1, t2; +# The right side is the cheaper scan, so the optimizer ordered it +# first by cost, producing a cross product. +create table t1 (a int); +insert into t1 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +create table t2 (pk int primary key, b int); +insert into t2 values (5, 50), (6, 60), (7, 70); +select * from t1 full join t2 on t2.b = 50; +a pk b +1 5 50 +10 5 50 +2 5 50 +3 5 50 +4 5 50 +5 5 50 +6 5 50 +7 5 50 +8 5 50 +9 5 50 +NULL 6 60 +NULL 7 70 +select * from t1 left join t2 on t2.b = 50 +union +select * from t1 right join t2 on t2.b = 50; +a pk b +1 5 50 +10 5 50 +2 5 50 +3 5 50 +4 5 50 +5 5 50 +6 5 50 +7 5 50 +8 5 50 +9 5 50 +NULL 6 60 +NULL 7 70 +drop table t1, t2; +# ==================================================================== +# Section 19: Right side of FULL JOIN must be a base table +# ==================================================================== +# +# Derived tables, views, and subqueries on the right side of a FULL +# JOIN are not supported and must produce an error. +create table t1 (a int); +create table t2 (a int); +# Derived table on the right side of a simple FULL JOIN +select * from t1 full join (select * from t2) dt on t1.a = dt.a; +ERROR HY000: FULL JOIN is only supported with base tables on the right side; 'dt' is not a base table +# View on the right side of a simple FULL JOIN +create view v1 as select * from t2; +select * from t1 full join v1 on t1.a = v1.a; +ERROR HY000: FULL JOIN is only supported with base tables on the right side; 'v1' is not a base table +drop view v1; +# Derived table on the right side of a nested FULL JOIN +create table t3 (a int); +select * from t1 full join t2 on t1.a = t2.a +full join (select * from t3) dt on t2.a = dt.a; +ERROR HY000: FULL JOIN is only supported with base tables on the right side; 'dt' is not a base table +drop table t1, t2, t3; +# ==================================================================== +# Section 20: FULL JOIN not allowed on the inner side of a +# LEFT or RIGHT JOIN +# ==================================================================== +# +# The FULL JOIN null-complement pass emits right-unmatched rows at +# the end of its right partner's scan; it has no mechanism to pair +# those rows with each outer row of an enclosing LEFT/RIGHT JOIN +# when the FULL JOIN sits on that join's inner (null-complemented) +# side. Rather than silently return wrong results, reject such +# queries. +# +# MariaDB's convert_right_join rewrites every RIGHT JOIN into an +# equivalent LEFT JOIN at parse time, so the notion of "inner" is +# unambiguous in the post-conversion shape -- it is the side that +# gets null-complemented. The equivalences the check relies on: +# +# t3 LEFT JOIN (FJ) : (FJ) is inner -> rejected +# t3 RIGHT JOIN (FJ) : (FJ) LEFT JOIN t3 -> t3 is inner, +# FJ is outer +# -> allowed +# (FJ) LEFT JOIN t3 : t3 is inner -> allowed +# (FJ) RIGHT JOIN t3 : t3 LEFT JOIN (FJ) -> (FJ) is inner +# -> rejected +# +# INNER JOIN has no inner/outer distinction; a FULL JOIN on either +# side of an INNER JOIN is allowed. +create table t1 (a int); +insert into t1 values (1), (2); +create table t2 (b int); +insert into t2 values (1), (3); +create table t3 (c int); +insert into t3 values (1), (4); +# t3 LEFT JOIN (FJ) -- FJ is inner -- rejected. +select * from t3 left join (t1 full join t2 on t1.a = t2.b) +on t3.c = t1.a; +ERROR HY000: FULL JOIN is not allowed on the inner side of a LEFT or RIGHT JOIN +# t3 RIGHT JOIN (FJ) -- canonicalizes to (FJ) LEFT JOIN t3; +# t3 is inner, FJ is outer -- allowed. +select * from t3 right join (t1 full join t2 on t1.a = t2.b) +on t3.c = t1.a; +c a b +1 1 1 +NULL 2 NULL +NULL NULL 3 +# t3 INNER JOIN (FJ) -- allowed. +select * from t3 inner join (t1 full join t2 on t1.a = t2.b) +on t3.c = t1.a; +c a b +1 1 1 +# t3 , (FJ) (comma = inner) -- allowed. +select * from t3, (t1 full join t2 on t1.a = t2.b) +where t3.c = t1.a; +c a b +1 1 1 +# (FJ) LEFT JOIN t3 -- t3 is inner -- allowed. +select * from (t1 full join t2 on t1.a = t2.b) left join t3 +on t1.a = t3.c; +a b c +1 1 1 +2 NULL NULL +NULL 3 NULL +# (FJ) RIGHT JOIN t3 -- canonicalizes to t3 LEFT JOIN (FJ); +# FJ is inner -- rejected. +select * from (t1 full join t2 on t1.a = t2.b) right join t3 +on t1.a = t3.c; +ERROR HY000: FULL JOIN is not allowed on the inner side of a LEFT or RIGHT JOIN +# Derived table with FULL JOIN on the inner side -- rejected. +select dt.a1, dt.a2, t3.c +from (select t1.a as a1, t2.b as a2 from t1 full join t2 on t1.a=t2.b) dt +right join t3 on coalesce(dt.a1, dt.a2) = t3.c; +ERROR HY000: FULL JOIN is not allowed on the inner side of a LEFT or RIGHT JOIN +# FULL JOIN in a more deeply nested LEFT JOIN's inner side -- +# rejected. Ensures the check descends through intermediate +# INNER JOIN nesting. +select * from t3 left join ((t1 full join t2 on t1.a = t2.b) join t3 t3b) +on t3.c = t1.a; +ERROR HY000: FULL JOIN is not allowed on the inner side of a LEFT or RIGHT JOIN +# CREATE VIEW must reject the same offending queries at VIEW +# creation time, not just at execution. +create view v_bad_left as +select * from t3 left join (t1 full join t2 on t1.a = t2.b) +on t3.c = t1.a; +ERROR HY000: FULL JOIN is not allowed on the inner side of a LEFT or RIGHT JOIN +create view v_bad_fjright as +select * from (t1 full join t2 on t1.a = t2.b) right join t3 +on t1.a = t3.c; +ERROR HY000: FULL JOIN is not allowed on the inner side of a LEFT or RIGHT JOIN +# CREATE VIEW with allowed FULL JOIN shapes. +create view v_ok_left as +select * from (t1 full join t2 on t1.a = t2.b) left join t3 +on t1.a = t3.c; +create view v_ok_right as +select * from t3 right join (t1 full join t2 on t1.a = t2.b) +on t3.c = t1.a; +create view v_ok_inner as +select * from t3 inner join (t1 full join t2 on t1.a = t2.b) +on t3.c = t1.a; +drop view v_ok_left, v_ok_right, v_ok_inner; +drop table t1, t2, t3; +# End of 12.3 tests diff --git a/mysql-test/main/full_join.test b/mysql-test/main/full_join.test new file mode 100644 index 0000000000000..7fe491e6ad119 --- /dev/null +++ b/mysql-test/main/full_join.test @@ -0,0 +1,3082 @@ +--source include/have_aria.inc +--source include/have_innodb.inc +--source include/have_sequence.inc + +--echo # +--echo # MDEV-37932 / MDEV-39014: FULL [OUTER] JOIN +--echo # +--echo # Tests are grouped by feature. Within each group, a FULL JOIN +--echo # query is generally paired with an equivalent LEFT JOIN UNION +--echo # RIGHT JOIN formulation to verify correctness. +--echo # + +--echo # ======================================================== +--echo # Section 1: Parser and syntax acceptance +--echo # +--echo # FULL JOIN, FULL OUTER JOIN, NATURAL FULL [OUTER] JOIN, and +--echo # their appearance inside views, derived tables, UNIONs, and +--echo # CTEs. +--echo # ======================================================== + +create table t1 (a int); +insert into t1 (a) values (1),(2); +create table t2 (a int); +insert into t2 (a) values (1),(3); +create table t3 (a int); +insert into t3 (a) values (1),(4); + +--echo # Basic FULL [OUTER] JOIN syntax. +select * from t1 full join t2 on t1.a = t2.a; +select * from t1 left join t2 on t1.a = t2.a union select * from t1 right join t2 on t1.a = t2.a; +explain extended select * from t1 full join t2 on t1.a = t2.a; + +select * from t1 full outer join t2 on t1.a = t2.a; +select * from t1 left outer join t2 on t1.a = t2.a union select * from t1 right outer join t2 on t1.a = t2.a; +explain extended select * from t1 full outer join t2 on t1.a = t2.a; + +--echo # NATURAL FULL [OUTER] JOIN. +select * from t1 natural full outer join t2; +select * from t1 natural left outer join t2 union select * from t1 natural right outer join t2; +explain extended select * from t1 natural full outer join t2; + +select * from t1 natural full join t2; +select * from t1 natural left join t2 union select * from t1 natural right join t2; +explain extended select * from t1 natural full join t2; + +--echo # FULL JOIN inside a view. +create view v1 as select t1.a as t1a, t2.a as t2a from t1 full join t2 on t1.a = t2.a; +select * from v1; +select t1.a as t1a, t2.a as t2a from t1 left join t2 on t1.a = t2.a union select t1.a as t1a, t2.a as t2a from t1 right join t2 on t1.a = t2.a; +explain extended select * from v1; +drop view v1; + +create view v1 as select t1.a as t1a, t2.a as t2a from t1 full outer join t2 on t1.a = t2.a; +select * from v1; +select t1.a as t1a, t2.a as t2a from t1 left outer join t2 on t1.a = t2.a union select t1.a as t1a, t2.a as t2a from t1 right outer join t2 on t1.a = t2.a; +explain extended select * from v1; +drop view v1; + +create view v1 as select t1.a as t1a, t2.a as t2a from t1 natural full join t2; +select * from v1; +select t1.a as t1a, t2.a as t2a from t1 natural left join t2 union select t1.a as t1a, t2.a as t2a from t1 natural right join t2; +explain extended select * from v1; +drop view v1; + +create view v1 as select t1.a as t1a, t2.a as t2a from t1 natural full outer join t2; +select * from v1; +select t1.a as t1a, t2.a as t2a from t1 natural left outer join t2 union select t1.a as t1a, t2.a as t2a from t1 natural right outer join t2; +explain extended select * from v1; +drop view v1; + +--echo # FULL JOIN inside a derived table combined with UNION. +select * from (select t1.a from t1 full join t2 on t1.a = t2.a union select * from t1) dt; +select * from (select t1.a from t1 left join t2 on t1.a = t2.a union select t1.a from t1 right join t2 on t1.a = t2.a union select * from t1) dt; + +select * from (select t1.a from t1 full outer join t2 on t1.a = t2.a union select * from t1) dt; +select * from (select t1.a from t1 left outer join t2 on t1.a = t2.a union select t1.a from t1 right outer join t2 on t1.a = t2.a union select * from t1) dt; + +select * from (select t1.a from t1 natural full join t2 union select * from t1) dt; +select * from (select t1.a from t1 natural left join t2 union select t1.a from t1 natural right join t2 union select * from t1) dt; +explain extended select * from (select t1.a from t1 natural full join t2 union select * from t1) dt; + +select * from (select t1.a from t1 natural full outer join t2 union select * from t1) dt; +select * from (select t1.a from t1 natural left outer join t2 union select t1.a from t1 natural right outer join t2 union select * from t1) dt; +explain extended select * from (select t1.a from t1 natural full outer join t2 union select * from t1) dt; + +--echo # FULL JOIN inside a CTE. +with cte as (select t1.a from t1 natural full join t2) select * from cte; +with cte as (select t1.a from t1 natural left join t2 union select t1.a from t1 natural right join t2) select * from cte; +explain extended with cte as (select t1.a from t1 natural full join t2) select * from cte; + +--echo # FULL JOIN referencing a missing table must error cleanly. +--error ER_NO_SUCH_TABLE +select * from t1, t2 full join t_not_exist on t2.c=t_not_exist.e and t_not_exist.f=t1.a; + +--error ER_NO_SUCH_TABLE +select * from t1, t2 full outer join t_not_exist on t2.c=t_not_exist.e and t_not_exist.f=t1.a; + +--error ER_NO_SUCH_TABLE +select * from t1, t2 natural full join t_not_exist; + +--error ER_NO_SUCH_TABLE +select * from t1, t2 natural full outer join t_not_exist; + +--echo # FULL JOIN where one operand is a derived table (on the left). +select * from (select * from t1) dt natural full join t2; +select * from (select * from t1) dt natural left join t2 union select * from (select * from t1) dt natural right join t2; + +select * from (select * from t2) du natural full join t1; +select * from (select * from t2) du natural left join t1 union select * from (select * from t2) du natural right join t1; + +--echo # FULL JOIN with a constant ON clause. +select * from t1 full join t2 on true; +select * from t1 left join t2 on true union select * from t1 right join t2 on true; + + +--echo # ======================================================== +--echo # Section 2: Basic FULL JOIN with nested joins on the left +--echo # +--echo # Each FULL JOIN query is followed by an equivalent +--echo # LEFT/RIGHT/UNION formulation; the two must match. +--echo # ======================================================== + +select * from t1 inner join t2 full join t3 on t1.a=t3.a; +select * from t1 inner join t2 left join t3 on t1.a=t3.a union select * from t1 inner join t2 right join t3 on t1.a=t3.a; + +select * from t1 inner join t2 on t1.a=t2.a full join t3 on t1.a=t3.a; +select * from t1 inner join t2 on t1.a=t2.a left join t3 on t1.a=t3.a union select * from t1 inner join t2 on t1.a=t2.a right join t3 on t1.a=t3.a; + +select * from t1 cross join t2 full join t3 on t1.a=t3.a; +select * from t1 cross join t2 left join t3 on t1.a=t3.a union select * from t1 cross join t2 right join t3 on t1.a=t3.a; + +select * from t1 cross join t2 on t1.a=t2.a full join t3 on t1.a=t3.a; +select * from t1 cross join t2 on t1.a=t2.a left join t3 on t1.a=t3.a union select * from t1 cross join t2 on t1.a=t2.a right join t3 on t1.a=t3.a; + +select * from (t1 left join t2 on t1.a=t2.a) full join t3 on t1.a=t3.a; +select * from (t1 left join t2 on t1.a=t2.a) left join t3 on t1.a=t3.a union select * from (t1 left join t2 on t1.a=t2.a) right join t3 on t1.a=t3.a; + +select * from (t1 right join t2 on t1.a=t2.a) full join t3 on t1.a=t3.a; +select * from (t1 right join t2 on t1.a=t2.a) left join t3 on t1.a=t3.a union select * from (t1 right join t2 on t1.a=t2.a) right join t3 on t1.a=t3.a; + +--echo # Nested NATURAL JOIN on the left of FULL JOIN. +select * from (t1 natural join t2) full join t3 on t1.a=t3.a; +select * from (t1 natural join t2) left join t3 on t1.a=t3.a union select * from (t1 natural join t2) right join t3 on t1.a=t3.a; + +--echo # Nested FULL JOIN on the left of FULL JOIN. +--echo # The inner FULL JOIN's unmatched right-side rows must appear +--echo # in the result even when the outer FULL JOIN condition does +--echo # not reference the inner right-side table. +--echo # Data: t1(1,2) t2(1,3) t3(1,4) +--sorted_result +select * from (t1 full join t2 on t1.a=t2.a) full join t3 on t1.a=t3.a; +--sorted_result +select * from (t1 left join t2 on t1.a=t2.a) left join t3 on t1.a=t3.a union select * from (t1 right join t2 on t1.a=t2.a) left join t3 on t1.a=t3.a union select * from (t1 right join t2 on t1.a=t2.a) right join t3 on t1.a=t3.a; + +--echo # Chained FULL JOINs with the second ON referencing the middle table. +--sorted_result +select * from t1 full join t2 on t1.a=t2.a full join t3 on t2.a=t3.a; +--sorted_result +select * from t1 left join t2 on t1.a=t2.a left join t3 on t2.a=t3.a union select * from t1 right join t2 on t1.a=t2.a left join t3 on t2.a=t3.a union select * from (t1 left join t2 on t1.a=t2.a) right join t3 on t2.a=t3.a union select * from (t1 right join t2 on t1.a=t2.a) right join t3 on t2.a=t3.a; + +--echo # Nested FULL JOIN with duplicate rows. +create table d1 (a int); +insert into d1 values (1),(1),(2); +create table d2 (a int); +insert into d2 values (1),(3),(3); +--sorted_result +select * from (d1 full join d2 on d1.a=d2.a) full join t3 on d1.a=t3.a; +--sorted_result +select * from (d1 left join d2 on d1.a=d2.a) left join t3 on d1.a=t3.a union all select * from (d1 right join d2 on d1.a=d2.a) left join t3 on d1.a=t3.a where d1.a is null union all select * from (d1 right join d2 on d1.a=d2.a) right join t3 on d1.a=t3.a where d1.a is null and d2.a is null; +drop table d1, d2; + +drop table t1, t2, t3; + + +--echo # ======================================================== +--echo # Section 3: FULL JOIN rewrites to LEFT, RIGHT, and INNER +--echo # +--echo # When a NULL-rejecting WHERE predicate selects one or both +--echo # sides, simplify_joins() rewrites the FULL JOIN accordingly. +--echo # The (re)written form must produce the same result as the +--echo # direct LEFT/RIGHT/INNER formulation. +--echo # ======================================================== + +create table t1 (pk int auto_increment, x int, y int, primary key (pk)); +create table t2 (pk int auto_increment, x int, y int, primary key (pk)); +insert into t1 (x, y) values (-5,-5),(-4,-4),(-3,-3),(-2,-2),(-1,-1),(0,0),(1,1),(2,2),(3,3),(4,4),(5,5); +insert into t2 (x, y) values (-5,25),(-4,16),(-3,9),(-2,4),(-1,1),(0,0),(1,1),(2,4),(3,9),(4,16),(5,25); + +--echo # FULL to RIGHT JOIN, these two queries should be equal: +select * from t1 full join t2 on t1.y = t2.y where t2.pk is not null; +select * from t1 right join t2 on t1.y = t2.y; + +--echo # FULL to RIGHT JOIN, these two queries should be equal: +select * from t1 full join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1 where t2.pk is not null; +select * from t1 right join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1; + +--echo # FULL to INNER JOIN, these two queries should be equal: +select * from t1 full join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1 where t1.pk is not null and t2.pk is not null; +select * from t1 inner join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1; + +--echo # FULL to LEFT JOIN, these two queries should be equal: +select * from t1 full join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1 where t1.pk is not null; +select * from t1 left join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1; + +--echo # FULL NATURAL to INNER JOIN, these two queries should be equal: +select * from t1 natural full join t2 where t1.pk is not null and t2.pk is not null; +select * from t1 inner join t2 on t1.x = t2.x and t1.y = t2.y; + +--echo # FULL NATURAL to LEFT JOIN, these two queries should be equal: +select * from t1 natural full join t2 where t1.pk is not null; +select * from t1 left join t2 on t2.pk = t1.pk and t2.x = t1.x and t2.y = t1.y; + +--echo # FULL NATURAL to RIGHT JOIN +select * from t1 natural full join t2 where t2.pk is not null; +select * from t1 right join t2 on t1.pk = t2.pk and t1.x = t2.x and t1.y = t2.y; + +--sorted_result +select * from t1 full join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1; +--sorted_result +select * from t1 left join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1 union select * from t1 right join t2 on t1.x >= 0 and t1.x <= 1 and t2.x >= 0 and t2.x <= 1; + +--sorted_result +select * from t1 natural full join t2; +--sorted_result +select * from t1 natural left join t2 union select * from t1 natural right join t2; + +drop table t1, t2; + +--echo # Rewrites with nested joins. +create table t1 (v int); +insert into t1 (v) values (1); +create table t2 (v int); +insert into t2 (v) values (2); +create table t3 (v int); +insert into t3 (v) values (3); + +--echo # (FULL)FULL to (INNER)INNER JOIN +select * from t1 full join t2 on t1.v = t2.v full join t3 on t2.v = t3.v where t1.v is not null and t2.v is not null and t3.v is not null; +select * from t1 inner join t2 on t1.v = t2.v inner join t3 on t2.v = t3.v; + +--echo # (FULL)FULL to (RIGHT)LEFT JOIN +select * from t1 full join t2 on t1.v = t2.v full join t3 on t1.v = t3.v where t2.v is not null; +select * from t1 right join t2 on t1.v = t2.v left join t3 on t1.v = t3.v; + +--echo # (FULL)FULL to (LEFT)LEFT JOIN +select * from t1 full join t2 on t1.v = t2.v full join t3 on t1.v = t3.v where t1.v is not null; +select * from t1 left join t2 on t2.v = t1.v left join t3 on t3.v = t1.v; + +--echo # (FULL)LEFT to (LEFT)LEFT JOIN +select * from t1 full join t2 on t1.v = t2.v left join t3 on t2.v = t3.v where t1.v is not null; +select * from t1 left join t2 on t1.v = t2.v left join t3 on t2.v = t3.v; + +--echo # (FULL)LEFT to (RIGHT)LEFT JOIN +select * from t1 full join t2 on t1.v = t2.v left join t3 on t2.v = t3.v where t2.v is not null; +select * from t1 right join t2 on t1.v = t2.v left join t3 on t3.v = t2.v; + +--echo # (LEFT)FULL to (LEFT)RIGHT JOIN +select * from t1 left join t2 on t1.v = t2.v full join t3 on t2.v = t3.v where t3.v is not null; +select * from t1 left join t2 on t1.v = t2.v right join t3 on t2.v = t3.v; + +--echo # (LEFT)FULL to (LEFT)LEFT JOIN +insert into t1 (v) values (2),(3); +insert into t2 (v) values (1); +truncate t3; +insert into t3 (v) values (1); +select * from t1; +select * from t2; +select * from t3; +select * from t1 left join t2 on t1.v = t2.v full join t3 on t2.v = t3.v where t3.v = 1; +select * from t3 left join t1 on t1.v = 1 left join t2 on t2.v = 1; + +--echo # FULL to INNER, two variables. +select * from (select t1.v from t1 full join t2 on t1.v = t2.v where t1.v > 1 and t2.v > 1) as dt; +select t1.v from t2 inner join t1 where t2.v = t1.v and t1.v > 1 and t1.v > 1; + +--echo # FULL to INNER with a UNION. +select t1.v from t1 full join t2 on t1.v = t2.v where t1.v > 1 and t2.v > 1 union select * from t1; +select t1.v from t2 inner join t1 where t1.v = t2.v and t2.v > 1 and t2.v > 1 union select * from t1; + +drop table t1, t2, t3; + + +--echo # ======================================================== +--echo # Section 4: NATURAL FULL JOIN and COALESCE +--echo # +--echo # Common columns surface as COALESCE expressions rather than +--echo # plain fields. +--echo # ======================================================== + +create table t1 (a int, b int); +create table t2 (a int, b int); +create table t3 (a int, b int); +insert into t1 (a,b) values (1,1),(2,2); +insert into t2 (a,b) values (1,1),(3,3); +insert into t3 (a,b) values (3,3),(4,4); + +select * from t1 natural full join t2 where + t1.a is not null and t1.b is not null and + t2.a is not null and t2.b is not null; + +select * from t1 natural left join t2 where + t1.a is not null and t1.b is not null and + t2.a is not null and t2.b is not null +union +select * from t1 natural right join t2 where + t1.a is not null and t1.b is not null and + t2.a is not null and t2.b is not null; + +explain extended +select * from t1 natural full join t2 where + t1.a is not null and t1.b is not null and + t2.a is not null and t2.b is not null; + +select coalesce(t1.a, t2.a) AS a, coalesce(t1.b, t2.b) AS b from + t2 join t1 where + t2.a = t1.a and t2.b = t1.b and + t1.a is not null and t1.b is not null; + + +select * from t1 natural full join t2 where t1.a is not null; + +select * from t1 natural left join t2 where t1.a is not null +union +select * from t1 natural right join t2 where t1.a is not null; + +explain extended select * from t1 natural full join t2 where t1.a is not null; + +select coalesce(t1.a, t2.a) AS a, coalesce(t1.b, t2.b) AS b from + t1 left join t2 on t2.a = t1.a and t2.b = t1.b where t1.a is not null; + + +select * from t1 natural full join t2 where t2.a is not null; + +select * from t1 natural left join t2 where t2.a is not null +union +select * from t1 natural right join t2 where t2.a is not null; + +explain extended select * from t1 natural full join t2 where t2.a is not null; + +select coalesce(t1.a, t2.a) AS a, coalesce(t1.b, t2.b) AS b from + t2 left join t1 on t1.a = t2.a and t1.b = t2.b where t2.a is not null; + +select * from (t1 natural join t2) right join t2 t3 on t1.a=t3.a; + +# Disabling VIEW protocol here because duplicate column names are +# replaced by generating unique names that have 'My_exp_' prefixes +# in VIEW protocol mode. That changes the result file output. +--disable_view_protocol +# `(t1 natural full join t2) right join t3` rewritten as +# `t3 LEFT JOIN (t1 natural full join t2)`, putting the FULL JOIN on +# the inner side of the outer join -- rejected. +--error ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN +select * from (t1 natural full join t2) right join t2 t3 on t1.a=t3.a; +--enable_view_protocol + +select t1.a from t1 natural full join (t2 natural join t3) where + t1.a is not null; + +select t1.a from t1 natural left join (t2 natural join t3) where t1.a is not null +union +select t1.a from t1 natural right join (t2 natural join t3) where t1.a is not null; + +explain extended select t1.a from + t1 natural full join (t2 natural join t3) where t1.a is not null; + +select t1.a AS a from t1 left join (t2 join t3) on + t2.a = t1.a and t3.a = t1.a and t2.b = t1.b and t3.b = t1.b where + t1.a is not null; + + +explain extended select *, avg(t2.a) from t1 natural full join t2 group by t1.a; + + +select *, avg(t2.a) from t1 natural full join t2 where t1.a is not null group by t1.a; + +--echo # UNION equivalent of the aggregate above. +select dt.a, dt.b, avg(dt.t2a) as `avg(t2.a)` from ( + select t1.a as a, t1.b as b, t2.a as t2a from t1 natural left join t2 + union + select t1.a, t1.b, t2.a from t1 natural right join t2) dt +where dt.a is not null group by dt.a; + +explain extended select *, avg(t2.a) from t1 natural full join t2 where + t1.a is not null group by t1.a; + +select coalesce(t1.a, t2.a) AS a, coalesce(t1.b, t2.b) AS b, + avg(t2.a) AS avg_t2_a from + t1 left join t2 on t2.a = t1.a and t2.b = t1.b where + t1.a is not null group by t1.a; + +--echo # -------------------------------------------------------- +--echo # 4b: unqualified references see the COALESCE +--echo # +--echo # An unqualified name of a NATURAL FULL JOIN common column +--echo # resolves to COALESCE(left_col, right_col) wherever it +--echo # appears -- WHERE, HAVING, GROUP BY, ORDER BY, USING, +--echo # views, prepared statements, and outer queries over a +--echo # derived table. Qualified names like t1.a stay raw. +--echo # -------------------------------------------------------- + +drop table t1, t2, t3; +create table t1 (a int, b int); +create table t2 (a int, c int); +insert into t1 (a,b) values (1,10),(2,20); +insert into t2 (a,c) values (2,200),(3,300); + +# Right-only row: a = 3 lives only in t2. The WHERE must see the +# COALESCE, otherwise the right-only (3, NULL, 300) row is filtered +# out because raw t1.a is NULL for it. +select * from t1 natural full join t2 where a = 3; +select coalesce(t1.a, t2.a) AS a, t1.b, t2.c from + t1 left join t2 on t1.a = t2.a where coalesce(t1.a, t2.a) = 3 +union +select coalesce(t1.a, t2.a) AS a, t1.b, t2.c from + t1 right join t2 on t1.a = t2.a where coalesce(t1.a, t2.a) = 3; + +# Left-only row: a = 1 lives only in t1. +select * from t1 natural full join t2 where a = 1; +select coalesce(t1.a, t2.a) AS a, t1.b, t2.c from + t1 left join t2 on t1.a = t2.a where coalesce(t1.a, t2.a) = 1 +union +select coalesce(t1.a, t2.a) AS a, t1.b, t2.c from + t1 right join t2 on t1.a = t2.a where coalesce(t1.a, t2.a) = 1; + +# Matched row. +select * from t1 natural full join t2 where a = 2; + +# Qualified t2.a stays raw. Raw t2.a is NULL only on the left-only +# row (a = 1), so IS NULL filters down to that row. +select * from t1 natural full join t2 where t2.a is null; + +# GROUP BY and HAVING also see the COALESCE. Without the fix the +# right-only and left-only rows would be grouped under raw t1.a = NULL. +--sorted_result +select a, count(*) from t1 natural full join t2 group by a; +select a, count(*) from t1 natural full join t2 group by a having a = 3; + +# ORDER BY of the unqualified column. +select a from t1 natural full join t2 order by a; + +# FULL JOIN ... USING(col) goes through the same join_columns path. +select * from t1 full join t2 using(a) where a = 3; + +# Derived table over a NATURAL FULL JOIN, then outer WHERE references +# the coalesced column by its propagated name. +select * from (select * from t1 natural full join t2) dt where a = 3; + +# Subquery in WHERE -- the outer's a still resolves to the COALESCE. +select * from t1 natural full join t2 where a in (select 3); + +# View over a NATURAL FULL JOIN: queries against the view see the +# coalesced column. +create view v1 as select * from t1 natural full join t2; +select * from v1 where a = 3; +drop view v1; + +# Prepared statement: execute twice to confirm the substitution is +# applied on every execute, not just on prepare. +prepare stmt from 'select * from t1 natural full join t2 where a = ?'; +set @v = 3; +execute stmt using @v; +set @v = 1; +execute stmt using @v; +set @v = 2; +execute stmt using @v; +deallocate prepare stmt; + +# Stored procedure: parameter binds the coalesced column. +delimiter |; +create procedure sp_find_a(in p int) + select * from t1 natural full join t2 where a = p| +delimiter ;| +call sp_find_a(3); +call sp_find_a(1); +drop procedure sp_find_a; + +# NATURAL LEFT / RIGHT JOIN do not coalesce -- unqualified a stays +# bound to t1.a (LEFT) or t2.a (RIGHT) as before. +select * from t1 natural left join t2 where a = 1; +select * from t1 natural right join t2 where a = 3; + +# (NFJ) joined to a third table. Qualified t1.a still raw on the +# left side of the inner join. +create table t3 (a int, d int); +insert into t3 (a,d) values (1,1000),(3,3000); +# Disabling VIEW protocol here because the coalesced a and t3.a are +# both named a, so view creation renames one with a 'My_exp_' prefix, +# changing the result file output. +--disable_view_protocol +select * from (t1 natural full join t2) join t3 on t1.a = t3.a + order by t3.a; +--enable_view_protocol + +drop table t1, t2, t3; + +--echo # -------------------------------------------------------- +--echo # 4c: chained NATURAL FULL JOIN nests the COALESCE +--echo # +--echo # (t1 NFJ t2) NFJ t3 is left-associative, so the common +--echo # column of the inner join is COALESCE(t1.a, t2.a) and the +--echo # outer join coalesces that with t3.a, giving +--echo # COALESCE(COALESCE(t1.a, t2.a), t3.a). The inner COALESCE +--echo # must drive both the output column and the synthesized +--echo # outer join equality, otherwise a row that came only from +--echo # t2 carries a NULL a into the outer join and fails to match +--echo # an equal t3 row. +--echo # -------------------------------------------------------- + +create table t1 (a int, b int); +create table t2 (a int, c int); +create table t3 (a int, d int); +create table t4 (a int, e int); +insert into t1 (a,b) values (1,10),(2,20); +insert into t2 (a,c) values (2,200),(5,500),(6,600); +insert into t3 (a,d) values (5,5000),(4,4000); +insert into t4 (a,e) values (6,60000),(7,70000); + +--echo # No UNION companion: the LEFT/RIGHT permutation UNION form +--echo # over-approximates for chained joins (see the note above +--echo # Section 6). The recorded result is the oracle. + +--echo # a=5 lives in t2 and t3; the inner COALESCE carries 5 from +--echo # t2 into the outer equality so the two rows match. a=6 is +--echo # t2-only with no t3 match and must surface with a=6, not +--echo # NULL. a=1 is t1-only, a=4 is t3-only. +--sorted_result +select a, b, c, d from (t1 natural full join t2) natural full join t3; + +--echo # The unqualified a in WHERE sees the nested COALESCE. +--sorted_result +select a, b, c, d from (t1 natural full join t2) natural full join t3 + where a = 6; + +--echo # Three levels deep: a=6 originates in t2, survives the t3 +--echo # level as the nested COALESCE, and matches t4 at the third +--echo # level. a=7 is t4-only. +--sorted_result +select a, b, c, d, e from + ((t1 natural full join t2) natural full join t3) natural full join t4; + +drop table t1, t2, t3, t4; + +--echo # -------------------------------------------------------- +--echo # 4d: node aliasing of the shared COALESCE +--echo # +--echo # The COALESCE built for a common column is shared by every +--echo # reference to that column. The oracle for each query is the +--echo # explicit FULL JOIN with COALESCE(t1.col, t2.col) written out, +--echo # so the two row sets match when the unqualified name resolves +--echo # to that COALESCE. +--echo # -------------------------------------------------------- + +create table t1 (a int, b int); +create table t2 (a int, c int); +insert into t1 (a,b) values (1,10),(2,20); +insert into t2 (a,c) values (2,200),(3,300); + +--echo # An explicit alias must name only its own reference. The +--echo # COALESCE is shared, so renaming it in place would change the +--echo # column name for resolution and would make a second reference +--echo # by the original name fail. The original name and an aliased +--echo # reference appear together here with distinct names. +--sorted_result +select a, a as a2 from t1 natural full join t2; +--sorted_result +select coalesce(t1.a, t2.a) as a, coalesce(t1.a, t2.a) as a2 + from t1 full join t2 on t1.a = t2.a; + +--echo # Aliased reference first, then the original name in WHERE. +--sorted_result +select a as a2 from t1 natural full join t2 where a >= 2; +--sorted_result +select coalesce(t1.a, t2.a) as a2 from t1 full join t2 on t1.a = t2.a + where coalesce(t1.a, t2.a) >= 2; + +--echo # GROUP BY, COUNT, and SUM over the coalesced column. +--sorted_result +select a, count(*) as cnt, sum(a) as s from t1 natural full join t2 + group by a; +--sorted_result +select coalesce(t1.a, t2.a) as a, count(*) as cnt, + sum(coalesce(t1.a, t2.a)) as s + from t1 full join t2 on t1.a = t2.a group by coalesce(t1.a, t2.a); + +--echo # Aggregates directly over the coalesced column, no GROUP BY. +select count(distinct a) as cd, min(a) as mn, max(a) as mx + from t1 natural full join t2; +select count(distinct coalesce(t1.a, t2.a)) as cd, + min(coalesce(t1.a, t2.a)) as mn, max(coalesce(t1.a, t2.a)) as mx + from t1 full join t2 on t1.a = t2.a; + +--echo # HAVING over the coalesced column. +--sorted_result +select a, count(*) as cnt from t1 natural full join t2 + group by a having a >= 2; +--sorted_result +select * from (select coalesce(t1.a, t2.a) as a, count(*) as cnt + from t1 full join t2 on t1.a = t2.a group by coalesce(t1.a, t2.a)) dt + where a >= 2; + +--echo # Window functions over the coalesced column. +select a, row_number() over (order by a) as rn, count(*) over () as tot + from t1 natural full join t2 order by a; +select coalesce(t1.a, t2.a) as a, + row_number() over (order by coalesce(t1.a, t2.a)) as rn, + count(*) over () as tot + from t1 full join t2 on t1.a = t2.a order by coalesce(t1.a, t2.a); + +--echo # DISTINCT over the coalesced column. +--sorted_result +select distinct a from t1 natural full join t2; +--sorted_result +select distinct coalesce(t1.a, t2.a) as a + from t1 full join t2 on t1.a = t2.a; + +--echo # The same column in SELECT, WHERE, and ORDER BY at once. +select a, a + 0 as a2 from t1 natural full join t2 + where a is not null order by a desc; +select coalesce(t1.a, t2.a) as a, coalesce(t1.a, t2.a) + 0 as a2 + from t1 full join t2 on t1.a = t2.a + where coalesce(t1.a, t2.a) is not null + order by coalesce(t1.a, t2.a) desc; + +drop table t1, t2; + +--echo # -------------------------------------------------------- +--echo # 4e: multi-column NATURAL FULL JOIN +--echo # +--echo # Two common columns each get their own COALESCE. Unqualified +--echo # references to either resolve to its COALESCE, including under +--echo # aliases. +--echo # -------------------------------------------------------- + +create table t1 (a int, b int, x int); +create table t2 (a int, b int, y int); +insert into t1 values (1,1,10),(2,2,20); +insert into t2 values (2,2,200),(3,3,300); + +--sorted_result +select a, b, x, y from t1 natural full join t2; +--sorted_result +select coalesce(t1.a, t2.a) as a, coalesce(t1.b, t2.b) as b, x, y + from t1 full join t2 on t1.a = t2.a and t1.b = t2.b; + +--echo # Aliased references to both common columns, with the original +--echo # names used again in WHERE. +--sorted_result +select a as ka, b as kb from t1 natural full join t2 + where a >= 2 and b >= 2; +--sorted_result +select coalesce(t1.a, t2.a) as ka, coalesce(t1.b, t2.b) as kb + from t1 full join t2 on t1.a = t2.a and t1.b = t2.b + where coalesce(t1.a, t2.a) >= 2 and coalesce(t1.b, t2.b) >= 2; + +drop table t1, t2; + + +--echo # ======================================================== +--echo # Section 5: NULL handling +--echo # +--echo # NULL = NULL is false, so rows whose join key is NULL never +--echo # match and must surface from their side unmatched. The +--echo # NULL-safe <=> operator matches NULL to NULL. +--echo # ======================================================== + +create table t1 (a int, b int); +insert into t1 values (NULL, NULL), (1, 10), (NULL, NULL); +create table t2 (a int, b int); +insert into t2 values (NULL, NULL), (2, 20), (NULL, NULL); + +--echo # Both sides have all-NULL rows; no match possible. +--sorted_result +select * from t1 full join t2 on t1.a = t2.a; +--sorted_result +--echo # The UNION formulation eliminates duplicate all-NULL rows; this +--echo # is expected to differ. PostgreSQL agrees with the FULL JOIN +--echo # result above. +select * from t1 left join t2 on t1.a = t2.a union select * from t1 right join t2 on t1.a = t2.a; + +--echo # IS NULL in the ON clause — all-NULL rows now match. +--sorted_result +select * from t1 full join t2 on t1.a is null and t2.a is null; +--sorted_result +select * from t1 left join t2 on t1.a is null and t2.a is null union select * from t1 right join t2 on t1.a is null and t2.a is null; + +--echo # NULL-safe equality operator (<=>). +--sorted_result +select * from t1 full join t2 on t1.a <=> t2.a; +--sorted_result +select * from t1 left join t2 on t1.a <=> t2.a union select * from t1 right join t2 on t1.a <=> t2.a; + +--echo # Table with only all-NULL rows on one side. +create table t3 (a int, b int); +insert into t3 values (NULL, NULL), (NULL, NULL); +--sorted_result +select * from t1 full join t3 on t1.a = t3.a; +--sorted_result +select * from t1 left join t3 on t1.a = t3.a union select * from t1 right join t3 on t1.a = t3.a; + +drop table t1, t2, t3; + + +--echo # ======================================================== +--echo # Section 6: Deeply nested FULL JOINs +--echo # ======================================================== + +create table t1 (a int); +insert into t1 values (1), (2); +create table t2 (a int); +insert into t2 values (2), (3); +create table t3 (a int); +insert into t3 values (3), (4); +create table t4 (a int); +insert into t4 values (1), (4); +create table t5 (a int); +insert into t5 values (2), (5); + +--echo # The LEFT/RIGHT-permutation UNION form is not a valid oracle +--echo # for chained FULL JOINs: it over-approximates by emitting a +--echo # right-side null-complement row for C rows that were already +--echo # matched against the inner FJ's own null-complement row. Per +--echo # SQL:2016 §7.10, (A FJ B) FJ C is left-associative and treats +--echo # R1 = (A FJ B) as a single relation; a C row matched against +--echo # any R1 row (including a null-complement row) is matched, and +--echo # must not appear again as unmatched. Therefore the chained +--echo # cases below have no UNION companion; the recorded result is +--echo # the oracle. + +--echo # Three-level nested FULL JOINs. +--sorted_result +select * from t1 + full join t2 on t1.a = t2.a + full join t3 on t2.a = t3.a; + +--echo # Four-level chained FULL JOINs. +--sorted_result +select * from t1 + full join t2 on t1.a = t2.a + full join t3 on t2.a = t3.a + full join t4 on t3.a = t4.a; + +--echo # Mixed FULL and INNER joins, deeply nested. +--sorted_result +select * from t1 + inner join t2 on t1.a = t2.a + full join t3 on t2.a = t3.a + full join t4 on t3.a = t4.a; + +drop table t1, t2, t3, t4, t5; + + +--echo # ======================================================== +--echo # Section 7: Mixed data types +--echo # ======================================================== + +create table t1 ( + id int, + str_val varchar(20), + dec_val decimal(10,2), + dt_val date +); +insert into t1 values + (1, 'hello', 10.50, '2024-01-01'), + (2, 'world', 20.75, '2024-06-15'), + (3, NULL, NULL, NULL); + +create table t2 ( + id int, + str_val varchar(20), + dec_val decimal(10,2), + dt_val date +); +insert into t2 values + (2, 'WORLD', 20.75, '2024-06-15'), + (4, 'test', 99.99, '2025-12-31'), + (NULL, NULL, NULL, NULL); + +--echo # FULL JOIN on integer column with mixed-type rows. +--sorted_result +select * from t1 full join t2 on t1.id = t2.id; +--sorted_result +select * from t1 left join t2 on t1.id = t2.id union select * from t1 right join t2 on t1.id = t2.id; + +--echo # FULL JOIN on varchar column (case-sensitive match depends on collation). +--sorted_result +select t1.id as id1, t1.str_val as sv1, t2.id as id2, t2.str_val as sv2 +from t1 full join t2 on t1.str_val = t2.str_val; +--sorted_result +select t1.id as id1, t1.str_val as sv1, t2.id as id2, t2.str_val as sv2 +from t1 left join t2 on t1.str_val = t2.str_val +union +select t1.id as id1, t1.str_val as sv1, t2.id as id2, t2.str_val as sv2 +from t1 right join t2 on t1.str_val = t2.str_val; + +--echo # FULL JOIN on decimal column. +--sorted_result +select t1.id as id1, t1.dec_val as d1, t2.id as id2, t2.dec_val as d2 +from t1 full join t2 on t1.dec_val = t2.dec_val; +--sorted_result +select t1.id as id1, t1.dec_val as d1, t2.id as id2, t2.dec_val as d2 +from t1 left join t2 on t1.dec_val = t2.dec_val +union +select t1.id as id1, t1.dec_val as d1, t2.id as id2, t2.dec_val as d2 +from t1 right join t2 on t1.dec_val = t2.dec_val; + +--echo # FULL JOIN on date column. +--sorted_result +select t1.id as id1, t1.dt_val as dt1, t2.id as id2, t2.dt_val as dt2 +from t1 full join t2 on t1.dt_val = t2.dt_val; +--sorted_result +select t1.id as id1, t1.dt_val as dt1, t2.id as id2, t2.dt_val as dt2 +from t1 left join t2 on t1.dt_val = t2.dt_val +union +select t1.id as id1, t1.dt_val as dt1, t2.id as id2, t2.dt_val as dt2 +from t1 right join t2 on t1.dt_val = t2.dt_val; + +--echo # FULL JOIN with cross-type comparison (int vs decimal). +create table t3 (a int); +insert into t3 values (1), (2), (3); +create table t4 (a decimal(5,1)); +insert into t4 values (1.0), (2.5), (3.0); +--sorted_result +select * from t3 full join t4 on t3.a = t4.a; +--sorted_result +select * from t3 left join t4 on t3.a = t4.a union select * from t3 right join t4 on t3.a = t4.a; + +--echo # FULL JOIN with cross-type comparison (int vs varchar). +create table t5 (a varchar(10)); +insert into t5 values ('1'), ('2'), ('four'); +--sorted_result +select * from t3 full join t5 on t3.a = t5.a; +--sorted_result +select * from t3 left join t5 on t3.a = t5.a union select * from t3 right join t5 on t3.a = t5.a; + +--echo # FULL JOIN on multiple mixed-type columns simultaneously. +--sorted_result +select * from t1 full join t2 + on t1.id = t2.id and t1.dec_val = t2.dec_val; +--sorted_result +select * from t1 left join t2 + on t1.id = t2.id and t1.dec_val = t2.dec_val +union +select * from t1 right join t2 + on t1.id = t2.id and t1.dec_val = t2.dec_val; + +drop table t1, t2, t3, t4, t5; + + +--echo # ======================================================== +--echo # Section 8: Aggregates with FULL JOIN +--echo # ======================================================== + +create table t1 (grp char(1), val int); +insert into t1 values ('a',10), ('a',20), ('b',30), ('c',40); +create table t2 (grp char(1), val int); +insert into t2 values ('b',100), ('c',200), ('c',300), ('d',400); + +--echo # COUNT and SUM over a FULL JOIN. +--sorted_result +select coalesce(t1.grp, t2.grp) as grp, + count(*) as cnt, + sum(t1.val) as s1, + sum(t2.val) as s2 +from t1 full join t2 on t1.grp = t2.grp +group by coalesce(t1.grp, t2.grp); +--sorted_result +select coalesce(dt.grp1, dt.grp2) as grp, + count(*) as cnt, + sum(dt.val1) as s1, + sum(dt.val2) as s2 +from (select t1.grp as grp1, t1.val as val1, t2.grp as grp2, t2.val as val2 + from t1 left join t2 on t1.grp = t2.grp + union + select t1.grp, t1.val, t2.grp, t2.val + from t1 right join t2 on t1.grp = t2.grp) dt +group by coalesce(dt.grp1, dt.grp2); + +--echo # AVG and MAX over a FULL JOIN. +--sorted_result +select coalesce(t1.grp, t2.grp) as grp, + avg(t1.val) as avg1, + max(t2.val) as max2 +from t1 full join t2 on t1.grp = t2.grp +group by coalesce(t1.grp, t2.grp); +--sorted_result +select coalesce(dt.grp1, dt.grp2) as grp, + avg(dt.val1) as avg1, + max(dt.val2) as max2 +from (select t1.grp as grp1, t1.val as val1, t2.grp as grp2, t2.val as val2 + from t1 left join t2 on t1.grp = t2.grp + union + select t1.grp, t1.val, t2.grp, t2.val + from t1 right join t2 on t1.grp = t2.grp) dt +group by coalesce(dt.grp1, dt.grp2); + +--echo # HAVING clause with FULL JOIN aggregate. +--sorted_result +select coalesce(t1.grp, t2.grp) as grp, + count(*) as cnt +from t1 full join t2 on t1.grp = t2.grp +group by coalesce(t1.grp, t2.grp) +having count(*) > 1; +--sorted_result +select coalesce(t1.grp, t2.grp) as grp, + count(*) as cnt from t1 +left join t2 on t1.grp = t2.grp group by +coalesce(t1.grp, t2.grp) having count(*) > 1 +union +select coalesce(t1.grp, t2.grp) as grp, + count(*) as cnt from t1 +right join t2 on t1.grp = t2.grp group by +coalesce(t1.grp, t2.grp) having count(*) > 1; + +--echo # COUNT(*) with no GROUP BY — total row count of the FULL JOIN. +select count(*) from t1 full join t2 on t1.grp = t2.grp; +select count(*) from (select t1.grp as g1, t1.val as v1, t2.grp as g2, t2.val as v2 + from t1 left join t2 on t1.grp = t2.grp + union + select t1.grp, t1.val, t2.grp, t2.val + from t1 right join t2 on t1.grp = t2.grp) dt; + +--echo # GROUP_CONCAT over a FULL JOIN. +--sorted_result +select coalesce(t1.grp, t2.grp) as grp, + group_concat(t1.val order by t1.val) as vals1, + group_concat(t2.val order by t2.val) as vals2 +from t1 full join t2 on t1.grp = t2.grp +group by coalesce(t1.grp, t2.grp); +--sorted_result +select coalesce(dt.grp1, dt.grp2) as grp, + group_concat(dt.val1 order by dt.val1) as vals1, + group_concat(dt.val2 order by dt.val2) as vals2 +from (select t1.grp as grp1, t1.val as val1, t2.grp as grp2, t2.val as val2 + from t1 left join t2 on t1.grp = t2.grp + union + select t1.grp, t1.val, t2.grp, t2.val + from t1 right join t2 on t1.grp = t2.grp) dt +group by coalesce(dt.grp1, dt.grp2); + +drop table t1, t2; + + +--echo # ======================================================== +--echo # Section 9: Window functions with FULL JOIN +--echo # +--echo # Adapted from main.win: ROW_NUMBER, RANK, DENSE_RANK, +--echo # LEAD/LAG, and aggregate windows (SUM/COUNT with frames) +--echo # applied to FULL JOIN result sets. +--echo # ======================================================== + +create table t1 (a int, grp int, val int); +insert into t1 values (1,10,100), (2,10,200), (3,20,300), (4,20,400); +create table t2 (a int, grp int, val int); +insert into t2 values (3,20,3000), (4,20,4000), (5,30,5000), (6,30,6000); + +--echo # ROW_NUMBER() over a FULL JOIN ordered by the coalesced key. +select coalesce(t1.a, t2.a) as a, + row_number() over (order by coalesce(t1.a, t2.a)) as rn +from t1 full join t2 on t1.a = t2.a +order by a; + +--echo # Equivalent: row-number over LEFT UNION RIGHT. +select a, row_number() over (order by a) as rn +from (select coalesce(t1.a, t2.a) as a + from t1 left join t2 on t1.a = t2.a + union + select coalesce(t1.a, t2.a) + from t1 right join t2 on t1.a = t2.a) u +order by a; + +--echo # RANK() with PARTITION BY over FULL JOIN. +--sorted_result +select coalesce(t1.grp, t2.grp) as grp, + coalesce(t1.val, 0) + coalesce(t2.val, 0) as v, + rank() over (partition by coalesce(t1.grp, t2.grp) + order by coalesce(t1.val, 0) + coalesce(t2.val, 0)) as rk +from t1 full join t2 on t1.a = t2.a; +--sorted_result +select coalesce(t1grp, t2grp) as grp, + coalesce(t1val, 0) + coalesce(t2val, 0) as v, + rank() over (partition by coalesce(t1grp, t2grp) + order by coalesce(t1val, 0) + coalesce(t2val, 0)) as rk +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, + t2.a as t2a, t2.grp as t2grp, t2.val as t2val + from t1 left join t2 on t1.a = t2.a + union + select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val + from t1 right join t2 on t1.a = t2.a) u; + +--echo # DENSE_RANK() over FULL JOIN. +--sorted_result +select coalesce(t1.grp, t2.grp) as grp, + dense_rank() over (order by coalesce(t1.grp, t2.grp)) as dr +from t1 full join t2 on t1.a = t2.a; +--sorted_result +select coalesce(t1grp, t2grp) as grp, + dense_rank() over (order by coalesce(t1grp, t2grp)) as dr +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, + t2.a as t2a, t2.grp as t2grp, t2.val as t2val + from t1 left join t2 on t1.a = t2.a + union + select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val + from t1 right join t2 on t1.a = t2.a) u; + +--echo # LEAD() and LAG() over FULL JOIN. +select coalesce(t1.a, t2.a) as a, + lag(coalesce(t1.a, t2.a)) + over (order by coalesce(t1.a, t2.a)) as prev_a, + lead(coalesce(t1.a, t2.a)) + over (order by coalesce(t1.a, t2.a)) as next_a +from t1 full join t2 on t1.a = t2.a +order by a; +select coalesce(t1a, t2a) as a, + lag(coalesce(t1a, t2a)) + over (order by coalesce(t1a, t2a)) as prev_a, + lead(coalesce(t1a, t2a)) + over (order by coalesce(t1a, t2a)) as next_a +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, + t2.a as t2a, t2.grp as t2grp, t2.val as t2val + from t1 left join t2 on t1.a = t2.a + union + select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val + from t1 right join t2 on t1.a = t2.a) u +order by a; + +--echo # SUM() window with rows-BETWEEN frame over FULL JOIN. +select coalesce(t1.a, t2.a) as a, + coalesce(t1.val, 0) + coalesce(t2.val, 0) as v, + sum(coalesce(t1.val, 0) + coalesce(t2.val, 0)) + over (order by coalesce(t1.a, t2.a) + rows between 1 preceding and 1 following) as window_sum +from t1 full join t2 on t1.a = t2.a +order by a; +select coalesce(t1a, t2a) as a, + coalesce(t1val, 0) + coalesce(t2val, 0) as v, + sum(coalesce(t1val, 0) + coalesce(t2val, 0)) + over (order by coalesce(t1a, t2a) + rows between 1 preceding and 1 following) as window_sum +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, + t2.a as t2a, t2.grp as t2grp, t2.val as t2val + from t1 left join t2 on t1.a = t2.a + union + select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val + from t1 right join t2 on t1.a = t2.a) u +order by a; + +--echo # COUNT() window partitioned by group, ordered within group. +--sorted_result +select coalesce(t1.grp, t2.grp) as grp, + coalesce(t1.a, t2.a) as a, + count(*) over (partition by coalesce(t1.grp, t2.grp) + order by coalesce(t1.a, t2.a) + rows between unbounded preceding and current row) as cnt +from t1 full join t2 on t1.a = t2.a; +--sorted_result +select coalesce(t1grp, t2grp) as grp, + coalesce(t1a, t2a) as a, + count(*) over (partition by coalesce(t1grp, t2grp) + order by coalesce(t1a, t2a) + rows between unbounded preceding and current row) as cnt +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, + t2.a as t2a, t2.grp as t2grp, t2.val as t2val + from t1 left join t2 on t1.a = t2.a + union + select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val + from t1 right join t2 on t1.a = t2.a) u; + +--echo # Window function combined with GROUP BY on the FULL JOIN. +--echo # Exercises the AGGR_OP::end_send path after the null-complement +--echo # pass completes (this previously asserted in create_sort_index). +--sorted_result +select coalesce(t1.grp, t2.grp) as grp, + sum(coalesce(t1.val, 0) + coalesce(t2.val, 0)) as s, + rank() over (order by sum(coalesce(t1.val, 0) + coalesce(t2.val, 0))) as rk +from t1 full join t2 on t1.a = t2.a +group by coalesce(t1.grp, t2.grp); +--sorted_result +select coalesce(t1grp, t2grp) as grp, + sum(coalesce(t1val, 0) + coalesce(t2val, 0)) as s, + rank() over (order by sum(coalesce(t1val, 0) + coalesce(t2val, 0))) as rk +from (select t1.a as t1a, t1.grp as t1grp, t1.val as t1val, + t2.a as t2a, t2.grp as t2grp, t2.val as t2val + from t1 left join t2 on t1.a = t2.a + union + select t1.a, t1.grp, t1.val, t2.a, t2.grp, t2.val + from t1 right join t2 on t1.a = t2.a) u +group by coalesce(t1grp, t2grp); + +drop table t1, t2; + + +--echo # ======================================================== +--echo # Section 10: CTEs +--echo # ======================================================== + +create table t1 (id int, val varchar(10)); +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (id int, val varchar(10)); +insert into t2 values (2,'x'), (3,'y'), (4,'z'); + +--echo # Simple CTE wrapping a FULL JOIN. +--sorted_result +with fj as ( + select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 + from t1 full join t2 on t1.id = t2.id +) +select * from fj; +--sorted_result +select * from t1 left join t2 on t1.id = t2.id union select * from t1 right join t2 on t1.id = t2.id; + +--echo # CTE on the left side of a FULL JOIN. +--sorted_result +with vals as (select id, val from t1 where id <= 2) +select v1.id as id1, v1.val as val1, t2.id as id2, t2.val as val2 +from vals v1 full join t2 on v1.id = t2.id; +--sorted_result +with vals as (select id, val from t1 where id <= 2) +select v1.id as id1, v1.val as val1, t2.id as id2, t2.val as val2 +from vals v1 left join t2 on v1.id = t2.id +union +select v1.id as id1, v1.val as val1, t2.id as id2, t2.val as val2 +from vals v1 right join t2 on v1.id = t2.id; + +--echo # Recursive CTE used in a FULL JOIN. +--sorted_result +with recursive seq as ( + select 1 as n + union all + select n + 1 from seq where n < 4 +) +select s1.n as n1, s2.id as n2 +from seq s1 full join t2 s2 on s1.n = s2.id; +--sorted_result +with recursive seq as ( + select 1 as n + union all + select n + 1 from seq where n < 4 +) +select s1.n as n1, s2.id as n2 +from seq s1 left join t2 s2 on s1.n = s2.id +union +select s1.n as n1, s2.id as n2 +from seq s1 right join t2 s2 on s1.n = s2.id; + +--echo # CTE on the left side of a FULL JOIN with filtering. +--sorted_result +with left_cte as (select * from t1 where id in (1,2)) +select l.id as lid, l.val as lval, t2.id as rid, t2.val as rval +from left_cte l full join t2 on l.id = t2.id; +--sorted_result +with left_cte as (select * from t1 where id in (1,2)) +select l.id as lid, l.val as lval, t2.id as rid, t2.val as rval +from left_cte l left join t2 on l.id = t2.id +union +select l.id as lid, l.val as lval, t2.id as rid, t2.val as rval +from left_cte l right join t2 on l.id = t2.id; + +drop table t1, t2; + + +--echo # ======================================================== +--echo # Section 11: Views over FULL JOIN +--echo # ======================================================== + +create table t1 (a int, b int); +insert into t1 values (1,10), (2,20), (3,30); +create table t2 (a int, b int); +insert into t2 values (2,200), (3,300), (4,400); + +--echo # Simple view over a FULL JOIN. +create view v_full as + select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 + from t1 full join t2 on t1.a = t2.a; +--sorted_result +select * from v_full; +--sorted_result +select * from t1 left join t2 on t1.a = t2.a union select * from t1 right join t2 on t1.a = t2.a; + +--echo # Query the view with additional filtering. +--sorted_result +select * from v_full where a1 is not null and a2 is not null; +--sorted_result +select * from t1 inner join t2 on t1.a = t2.a; + +--echo # View joined with another table via FULL JOIN. +create table t3 (a int, c varchar(10)); +insert into t3 values (1,'x'), (2,'y'), (4,'z'); +--sorted_result +select v_full.a1, v_full.a2, t3.c +from v_full full join t3 on coalesce(v_full.a1, v_full.a2) = t3.a; +--echo # The LEFT JOIN is allowed (v_full is the outer side, so the +--echo # FULL JOIN inside v_full is on the outer side of the LEFT JOIN). +--echo # The RIGHT JOIN is equivalent to +--echo # `t3 LEFT JOIN v_full`, which puts v_full (the FULL JOIN) on the +--echo # inner side -- rejected. +--sorted_result +select v_full.a1, v_full.a2, t3.c +from v_full left join t3 on coalesce(v_full.a1, v_full.a2) = t3.a; +--error ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN +select v_full.a1, v_full.a2, t3.c +from v_full right join t3 on coalesce(v_full.a1, v_full.a2) = t3.a; + +--echo # View that filters the FULL JOIN result. +create view v_full_filtered as + select t1.a as a1, t2.a as a2 + from t1 full join t2 on t1.a = t2.a + where t1.a is not null; +--sorted_result +select * from v_full_filtered; +--sorted_result +select t1.a as a1, t2.a as a2 from t1 left join t2 on t1.a = t2.a; + +drop view v_full, v_full_filtered; +drop table t1, t2, t3; + + +--echo # ======================================================== +--echo # Section 12: Prepared statements +--echo # ======================================================== + +create table t1 (a int, b varchar(10)); +insert into t1 values (1,'one'), (2,'two'), (3,'three'); +create table t2 (a int, b varchar(10)); +insert into t2 values (2,'TWO'), (3,'THREE'), (4,'FOUR'); + +--echo # Basic prepared statement with FULL JOIN. +prepare stmt1 from + 'select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 + from t1 full join t2 on t1.a = t2.a'; +--sorted_result +execute stmt1; +--sorted_result +select * from t1 left join t2 on t1.a = t2.a union select * from t1 right join t2 on t1.a = t2.a; + +--echo # Re-execute to verify PS re-execution stability. +--sorted_result +execute stmt1; +deallocate prepare stmt1; + +--echo # Parameter in the ON clause. +prepare stmt2 from + 'select t1.a as a1, t2.a as a2 + from t1 full join t2 on t1.a = t2.a and t1.a > ?'; +set @threshold = 1; +--sorted_result +execute stmt2 using @threshold; +--sorted_result +select t1.a as a1, t2.a as a2 +from t1 left join t2 on t1.a = t2.a and t1.a > 1 +union +select t1.a as a1, t2.a as a2 +from t1 right join t2 on t1.a = t2.a and t1.a > 1; + +--echo # Re-execute with a different parameter value. +set @threshold = 2; +--sorted_result +execute stmt2 using @threshold; +--sorted_result +select t1.a as a1, t2.a as a2 +from t1 left join t2 on t1.a = t2.a and t1.a > 2 +union +select t1.a as a1, t2.a as a2 +from t1 right join t2 on t1.a = t2.a and t1.a > 2; +deallocate prepare stmt2; + +--echo # Parameter in the WHERE clause. +prepare stmt3 from + 'select t1.a, t2.a + from t1 full join t2 on t1.a = t2.a + where t1.a is not null or t2.a > ?'; +set @minval = 3; +--sorted_result +execute stmt3 using @minval; +--sorted_result +execute stmt3 using @minval; +deallocate prepare stmt3; + +drop table t1, t2; + + +--echo # ======================================================== +--echo # Section 13: Stored procedures +--echo # ======================================================== + +create table t1 (a int, b varchar(20)); +insert into t1 values (1,'alpha'), (2,'beta'), (3,'gamma'); +create table t2 (a int, b varchar(20)); +insert into t2 values (2,'BETA'), (4,'DELTA'), (5,'EPSILON'); + +delimiter |; + +--echo # SP that performs a FULL JOIN. +create procedure sp_full_join() +begin + select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 + from t1 full join t2 on t1.a = t2.a; +end| + +--echo # SP with a FULL JOIN and a parameter. +create procedure sp_full_join_param(in min_a int) +begin + select t1.a as a1, t2.a as a2 + from t1 full join t2 on t1.a = t2.a + where coalesce(t1.a, t2.a) >= min_a; +end| + +--echo # SP using FULL JOIN with INSERT ... SELECT. +create procedure sp_full_join_insert() +begin + create temporary table t3 (a1 int, b1 varchar(20), a2 int, b2 varchar(20)); + insert into t3 + select t1.a, t1.b, t2.a, t2.b + from t1 full join t2 on t1.a = t2.a; + select * from t3; + drop temporary table t3; +end| + +delimiter ;| + +--sorted_result +call sp_full_join(); +--sorted_result +select * from t1 left join t2 on t1.a = t2.a union select * from t1 right join t2 on t1.a = t2.a; + +--sorted_result +call sp_full_join_param(3); +--sorted_result +select t1.a as a1, t2.a as a2 +from t1 left join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) >= 3 +union +select t1.a as a1, t2.a as a2 +from t1 right join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) >= 3; + +--echo # Call the SPs twice to test re-execution. +--sorted_result +call sp_full_join(); +--sorted_result +call sp_full_join_param(1); + +--sorted_result +call sp_full_join_insert(); + +drop procedure sp_full_join; +drop procedure sp_full_join_param; +drop procedure sp_full_join_insert; +drop table t1, t2; + + +--echo # ======================================================== +--echo # Section 14: Subqueries and semijoins with FULL JOIN +--echo # ======================================================== + +create table t1 (a int, b int); +insert into t1 values (1,10), (2,20), (3,30); +create table t2 (a int, b int); +insert into t2 values (2,200), (3,300), (4,400); +create table t3 (a int); +insert into t3 values (1), (3), (5); + +--echo # IN subquery (semijoin) on the result of a FULL JOIN. +--sorted_result +select * from t1 full join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) in (select a from t3); +--sorted_result +select * from t1 left join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) in (select a from t3) +union +select * from t1 right join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) in (select a from t3); + +--echo # EXISTS subquery filtering a FULL JOIN. +--sorted_result +select * from t1 full join t2 on t1.a = t2.a +where exists (select 1 from t3 where t3.a = coalesce(t1.a, t2.a)); +--sorted_result +select * from t1 left join t2 on t1.a = t2.a +where exists (select 1 from t3 where t3.a = coalesce(t1.a, t2.a)) +union +select * from t1 right join t2 on t1.a = t2.a +where exists (select 1 from t3 where t3.a = coalesce(t1.a, t2.a)); + +--echo # NOT IN (anti-semijoin) with a FULL JOIN. +--sorted_result +select * from t1 full join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) not in (select a from t3); +--sorted_result +select * from t1 left join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) not in (select a from t3) +union +select * from t1 right join t2 on t1.a = t2.a +where coalesce(t1.a, t2.a) not in (select a from t3); + +--echo # FULL JOIN inside a subquery used as a semijoin predicate. +select * from t3 +where t3.a in ( + select coalesce(t1.a, t2.a) + from t1 full join t2 on t1.a = t2.a +); +select * from t3 +where t3.a in ( + select coalesce(t1.a, t2.a) + from t1 left join t2 on t1.a = t2.a + union + select coalesce(t1.a, t2.a) + from t1 right join t2 on t1.a = t2.a +); + +--echo # Correlated subquery with FULL JOIN. +--sorted_result +select * from t3 +where exists ( + select 1 from t1 full join t2 on t1.a = t2.a + where coalesce(t1.a, t2.a) = t3.a +); +--sorted_result +select * from t3 +where exists ( + select 1 from t1 left join t2 on t1.a = t2.a + where coalesce(t1.a, t2.a) = t3.a + union + select 1 from t1 right join t2 on t1.a = t2.a + where coalesce(t1.a, t2.a) = t3.a +); + +drop table t1, t2, t3; + + +--echo # ======================================================== +--echo # Section 15: Indexed access (PK, secondary, composite, unique) +--echo # +--echo # Exercises JT_EQ_REF / JT_REF access paths on the right +--echo # side of a FULL JOIN, including NULLable unique keys. +--echo # ======================================================== + +--echo # Primary key join (JT_EQ_REF). +create table t1 (id int primary key, val varchar(10)); +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (id int primary key, val varchar(10)); +insert into t2 values (2,'x'), (3,'y'), (4,'z'); + +--sorted_result +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 full join t2 on t1.id = t2.id; +--sorted_result +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 left join t2 on t1.id = t2.id +union +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 right join t2 on t1.id = t2.id; + +--echo # PK join with WHERE filter. +--sorted_result +select t1.id as id1, t2.id as id2 +from t1 full join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) > 2; +--sorted_result +select t1.id as id1, t2.id as id2 +from t1 left join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) > 2 +union +select t1.id as id1, t2.id as id2 +from t1 right join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) > 2; + +--echo # PK join with aggregate. +select count(*) from t1 full join t2 on t1.id = t2.id; +select count(*) from (select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 + from t1 left join t2 on t1.id = t2.id + union + select t1.id, t1.val, t2.id, t2.val + from t1 right join t2 on t1.id = t2.id) dt; + +drop table t1, t2; + +--echo # Secondary index join (JT_REF) with duplicates. +create table t1 (id int, grp int, val varchar(10), key(grp)); +insert into t1 values (1,10,'a'), (2,20,'b'), (3,20,'c'), (4,30,'d'); +create table t2 (id int, grp int, val varchar(10), key(grp)); +insert into t2 values (5,20,'x'), (6,30,'y'), (7,30,'z'), (8,40,'w'); + +--sorted_result +select t1.id as id1, t1.grp as g1, t2.id as id2, t2.grp as g2 +from t1 full join t2 on t1.grp = t2.grp; +--sorted_result +select t1.id as id1, t1.grp as g1, t2.id as id2, t2.grp as g2 +from t1 left join t2 on t1.grp = t2.grp +union +select t1.id as id1, t1.grp as g1, t2.id as id2, t2.grp as g2 +from t1 right join t2 on t1.grp = t2.grp; + +select count(*) from t1 full join t2 on t1.grp = t2.grp; +select count(*) from (select t1.id as id1, t1.grp as g1, t1.val as v1, + t2.id as id2, t2.grp as g2, t2.val as v2 + from t1 left join t2 on t1.grp = t2.grp + union + select t1.id, t1.grp, t1.val, t2.id, t2.grp, t2.val + from t1 right join t2 on t1.grp = t2.grp) dt; + +drop table t1, t2; + +--echo # Composite index join. +create table t1 (a int, b int, val varchar(10), primary key(a, b)); +insert into t1 values (1,1,'p'), (1,2,'q'), (2,1,'r'); +create table t2 (a int, b int, val varchar(10), primary key(a, b)); +insert into t2 values (1,2,'s'), (2,1,'t'), (2,2,'u'); + +--sorted_result +select t1.a as a1, t1.b as b1, t1.val as v1, + t2.a as a2, t2.b as b2, t2.val as v2 +from t1 full join t2 on t1.a = t2.a and t1.b = t2.b; +--sorted_result +select t1.a as a1, t1.b as b1, t1.val as v1, + t2.a as a2, t2.b as b2, t2.val as v2 +from t1 left join t2 on t1.a = t2.a and t1.b = t2.b +union +select t1.a as a1, t1.b as b1, t1.val as v1, + t2.a as a2, t2.b as b2, t2.val as v2 +from t1 right join t2 on t1.a = t2.a and t1.b = t2.b; + +--echo # Composite index: join on partial key (first column only). +--sorted_result +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +--sorted_result +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 right join t2 on t1.a = t2.a; + +drop table t1, t2; + +--echo # UNIQUE index with NULLable column. +create table t1 (id int auto_increment primary key, val int, unique key(val)); +insert into t1 (val) values (1), (NULL), (3), (NULL); +create table t2 (id int auto_increment primary key, val int, unique key(val)); +insert into t2 (val) values (2), (3), (NULL), (NULL); + +--sorted_result +select t1.val as v1, t2.val as v2 +from t1 full join t2 on t1.val = t2.val; +--sorted_result +select t1.val as v1, t2.val as v2 +from t1 left join t2 on t1.val = t2.val +union +select t1.val as v1, t2.val as v2 +from t1 right join t2 on t1.val = t2.val; + +drop table t1, t2; + +--echo # Vector index on the right side of FULL JOIN. The vector index +--echo # does not affect FULL JOIN semantics, but the right table now +--echo # carries an hlindex object; this confirms the rowid weedout +--echo # and null-complement pass still work when it does. Three rows +--echo # are expected: one left-only (1, NULL, NULL), one match +--echo # (2, 2, [1,0]), one right-only (NULL, 3, [2,0]). +create table t1 (a int); +insert into t1 values (1), (2); +create table t2 (id int, v vector(2) not null, vector(v)); +insert into t2 values (2, vec_fromtext('[1,0]')), + (3, vec_fromtext('[2,0]')); + +--sorted_result +select t1.a as a1, t2.id as id2, vec_totext(t2.v) as v2 +from t1 full join t2 on t1.a = t2.id; +--sorted_result +select t1.a as a1, t2.id as id2, vec_totext(t2.v) as v2 +from t1 left join t2 on t1.a = t2.id +union +select t1.a as a1, t2.id as id2, vec_totext(t2.v) as v2 +from t1 right join t2 on t1.a = t2.id; + +drop table t1, t2; + +--echo # Vector indexes on both sides of FULL JOIN. Three rows are +--echo # expected: one left-only (1, [1,0], NULL, NULL), one match +--echo # (2, [2,0], 2, [3,0]), one right-only (NULL, NULL, 3, [4,0]). +create table t1 (id int, v vector(2) not null, vector(v)); +insert into t1 values (1, vec_fromtext('[1,0]')), + (2, vec_fromtext('[2,0]')); +create table t2 (id int, v vector(2) not null, vector(v)); +insert into t2 values (2, vec_fromtext('[3,0]')), + (3, vec_fromtext('[4,0]')); + +--sorted_result +select t1.id as id1, vec_totext(t1.v) as v1, + t2.id as id2, vec_totext(t2.v) as v2 +from t1 full join t2 on t1.id = t2.id; +--sorted_result +select t1.id as id1, vec_totext(t1.v) as v1, + t2.id as id2, vec_totext(t2.v) as v2 +from t1 left join t2 on t1.id = t2.id +union +select t1.id as id1, vec_totext(t1.v) as v1, + t2.id as id2, vec_totext(t2.v) as v2 +from t1 right join t2 on t1.id = t2.id; + +drop table t1, t2; + +--echo # There were rows missing because not all null-complements were +--echo # generated. +create table t10 (a int, b int, index(a)); +create table t11 (a int, b int, index(a)); +insert into t10 select seq, seq from seq_1_to_10; +insert into t11 select seq*2, seq*2 from seq_1_to_10; +create table t20 (a varchar(100), b varchar(100), index(a)); +create table t21 (a varchar(100), b varchar(100), index(a)); +insert into t20 values('match','match'), ('no-match-t20', 'no-match-t20'); +insert into t21 values('match','match'), ('no-match-t21', 'no-match-t21'); +--sorted_result +select * from (t10 full outer join t11 on t10.a=t11.a) , (t20 full outer join t21 on t20.a=t21.a); +--echo # Join order under straight_join with FULL JOIN +create table two (c int); +insert into two values (1),(2); +explain select * from two, (t10 full outer join t11 on t10.a=t11.a); +--sorted_result +select * from two, (t10 full outer join t11 on t10.a=t11.a); +explain select * from (t10 full outer join t11 on t10.a=t11.a), two; +--sorted_result +select * from (t10 full outer join t11 on t10.a=t11.a), two; +explain select straight_join * from two, (t10 full outer join t11 on t10.a=t11.a); +--sorted_result +select straight_join * from two, (t10 full outer join t11 on t10.a=t11.a); +explain select straight_join * from (t10 full outer join t11 on t10.a=t11.a), two; +--sorted_result +select straight_join * from (t10 full outer join t11 on t10.a=t11.a), two; + +--echo # FULL JOIN tables must be contiguous but no longer must appear +--echo # at the start of the join order. +create table ten(a int primary key); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table one_k(a int primary key); +insert into one_k select a.a + b.a* 10 + c.a * 100 from ten a, ten b, ten c; + +create table t1 ( + a int, + b int +); +create table t2 ( + a int, + b int +); +insert into t1 select a, a from one_k where a between 1 and 100; +insert into t2 select a, a from one_k where a between 95 and 195; +--sorted_result +select * from t1 full outer join t2 on (t1.a=t2.a and t1.b>90 and t2.b<110); + +drop table t10, t11, t20, t21, two, ten, one_k, t1, t2; + +--echo # ======================================================== +--echo # Section 16: Storage engines (MyISAM, Aria, mixed) +--echo # +--echo # The null-complement rescan must work regardless of +--echo # underlying storage engine and across mixed-engine joins +--echo # (different rowid formats in the weedout temp table). +--echo # ======================================================== + +--echo # Both sides MyISAM. +create table t1 (a int, b varchar(10)) engine=MyISAM; +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (a int, b varchar(10)) engine=MyISAM; +insert into t2 values (2,'x'), (3,'y'), (4,'z'); + +--sorted_result +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +--sorted_result +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 right join t2 on t1.a = t2.a; + +select count(*) from t1 full join t2 on t1.a = t2.a; +select count(*) from (select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 + from t1 left join t2 on t1.a = t2.a + union + select t1.a, t1.b, t2.a, t2.b + from t1 right join t2 on t1.a = t2.a) dt; + +drop table t1, t2; + +--echo # Both sides Aria. +create table t1 (a int, b varchar(10)) engine=Aria; +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (a int, b varchar(10)) engine=Aria; +insert into t2 values (2,'x'), (3,'y'), (4,'z'); + +--sorted_result +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +--sorted_result +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 right join t2 on t1.a = t2.a; + +select count(*) from t1 full join t2 on t1.a = t2.a; +select count(*) from (select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 + from t1 left join t2 on t1.a = t2.a + union + select t1.a, t1.b, t2.a, t2.b + from t1 right join t2 on t1.a = t2.a) dt; + +drop table t1, t2; + +--echo # Mixed engines: InnoDB and MyISAM. +create table t1 (a int, b varchar(10)) engine=InnoDB; +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (a int, b varchar(10)) engine=MyISAM; +insert into t2 values (2,'x'), (3,'y'), (4,'z'); + +--echo # InnoDB on left, MyISAM on right. +--sorted_result +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +--sorted_result +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 right join t2 on t1.a = t2.a; + +--echo # MyISAM on left, InnoDB on right. +--sorted_result +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 full join t1 on t2.a = t1.a; +--sorted_result +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 left join t1 on t2.a = t1.a +union +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 right join t1 on t2.a = t1.a; + +drop table t1, t2; + +--echo # Mixed engines: InnoDB and Aria. +create table t1 (a int, b varchar(10)) engine=InnoDB; +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (a int, b varchar(10)) engine=Aria; +insert into t2 values (2,'x'), (3,'y'), (4,'z'); + +--echo # InnoDB on left, Aria on right. +--sorted_result +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 full join t2 on t1.a = t2.a; +--sorted_result +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 left join t2 on t1.a = t2.a +union +select t1.a as a1, t1.b as b1, t2.a as a2, t2.b as b2 +from t1 right join t2 on t1.a = t2.a; + +--echo # Aria on left, InnoDB on right. +--sorted_result +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 full join t1 on t2.a = t1.a; +--sorted_result +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 left join t1 on t2.a = t1.a +union +select t2.a as a1, t2.b as b1, t1.a as a2, t1.b as b2 +from t2 right join t1 on t2.a = t1.a; + +drop table t1, t2; + +--echo # Three-way mixed engine FULL JOIN. +--echo # No UNION companion: see the note above the chained-FULL-JOIN +--echo # section — the LEFT/RIGHT permutation UNION over-approximates +--echo # for chained FULL JOINs. +create table t1 (a int) engine=InnoDB; +insert into t1 values (1), (2), (3); +create table t2 (a int) engine=MyISAM; +insert into t2 values (2), (3), (4); +create table t3 (a int) engine=Aria; +insert into t3 values (3), (4), (5); + +--sorted_result +select t1.a as a1, t2.a as a2, t3.a as a3 +from t1 + full join t2 on t1.a = t2.a + full join t3 on t2.a = t3.a; + +drop table t1, t2, t3; + +--echo # Indexed mixed-engine FULL JOIN chain. +create table t1 (id int primary key, val varchar(10)) engine=InnoDB; +insert into t1 values (1,'a'), (2,'b'), (3,'c'); +create table t2 (id int primary key, val varchar(10)) engine=MyISAM; +insert into t2 values (2,'x'), (3,'y'), (4,'z'); +create table t3 (id int primary key, val varchar(10)) engine=Aria; +insert into t3 values (3,'p'), (4,'q'), (5,'r'); + +--echo # InnoDB PK full join MyISAM PK. +--sorted_result +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 full join t2 on t1.id = t2.id; +--sorted_result +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 left join t2 on t1.id = t2.id +union +select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 +from t1 right join t2 on t1.id = t2.id; + +--echo # MyISAM PK full join Aria PK. +--sorted_result +select t2.id as id1, t2.val as v1, t3.id as id2, t3.val as v2 +from t2 full join t3 on t2.id = t3.id; +--sorted_result +select t2.id as id1, t2.val as v1, t3.id as id2, t3.val as v2 +from t2 left join t3 on t2.id = t3.id +union +select t2.id as id1, t2.val as v1, t3.id as id2, t3.val as v2 +from t2 right join t3 on t2.id = t3.id; + +--echo # Three-way: InnoDB PK, MyISAM PK, Aria PK. +--echo # No UNION companion: see the note above the chained-FULL-JOIN +--echo # section — the LEFT/RIGHT permutation UNION over-approximates +--echo # for chained FULL JOINs. +--sorted_result +select t1.id as id1, t2.id as id2, t3.id as id3 +from t1 + full join t2 on t1.id = t2.id + full join t3 on t2.id = t3.id; + +--echo # Indexed mixed-engine with WHERE filter. +--sorted_result +select t1.id as id1, t2.id as id2 +from t1 full join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) between 2 and 3; +--sorted_result +select t1.id as id1, t2.id as id2 +from t1 left join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) between 2 and 3 +union +select t1.id as id1, t2.id as id2 +from t1 right join t2 on t1.id = t2.id +where coalesce(t1.id, t2.id) between 2 and 3; + +--echo # Indexed mixed-engine with aggregate. +select count(*) from t1 full join t2 on t1.id = t2.id; +select count(*) from (select t1.id as id1, t1.val as v1, t2.id as id2, t2.val as v2 + from t1 left join t2 on t1.id = t2.id + union + select t1.id, t1.val, t2.id, t2.val + from t1 right join t2 on t1.id = t2.id) dt; + +drop table t1, t2, t3; + + +--echo # ======================================================== +--echo # Section 17: Complex feature combinations +--echo # +--echo # Queries that stress FULL JOIN alongside other features at +--echo # the same time: semi-joins with ranges, FULL-JOIN-in-FULL- +--echo # JOIN inside a semi-join, CTEs combined with window +--echo # functions, and HAVING over aggregates with FULL JOIN. +--echo # ======================================================== + +create table t3o (x int); +insert into t3o values (1), (2), (3), (4), (5), (6), (7); +create table t3l (k int, tag varchar(8)); +insert into t3l values (1,'L1'), (2,'L2'), (3,'L3'), (4,'L4'); +create table t3r (k int, tag varchar(8)); +insert into t3r values (3,'R3'), (4,'R4'), (5,'R5'), (6,'R6'); +create table t3m (k int, tag varchar(8)); +insert into t3m values (2,'M2'), (5,'M5'), (7,'M7'); + +--echo # 17.1 FULL JOIN inside a materialized semijoin. +--echo # Exercises get_allowed_nj_tables() with emb_sjm_nest set: +--echo # the FULL JOIN adjacency check must apply inside the SJM +--echo # nest so that siblings cannot interleave between partners. +set @save_optimizer_switch= @@optimizer_switch; +set optimizer_switch='materialization=on,semijoin=on'; + +--sorted_result +select * from t3o +where t3o.x in ( + select coalesce(t3l.k, t3r.k) from t3l full join t3r on t3l.k = t3r.k +); +--sorted_result +select * from t3o +where t3o.x in ( + select coalesce(t3l.k, t3r.k) from t3l left join t3r on t3l.k = t3r.k + union + select coalesce(t3l.k, t3r.k) from t3l right join t3r on t3l.k = t3r.k +); + +--echo # 17.2 FULL JOIN in semijoin with a range predicate on the +--echo # FULL JOIN's coalesced key. +--sorted_result +select * from t3o +where t3o.x in ( + select coalesce(t3l.k, t3r.k) c + from t3l full join t3r on t3l.k = t3r.k + where coalesce(t3l.k, t3r.k) between 2 and 5 +); +--sorted_result +select * from t3o +where t3o.x in ( + select coalesce(t3l.k, t3r.k) c + from t3l left join t3r on t3l.k = t3r.k + where coalesce(t3l.k, t3r.k) between 2 and 5 + union + select coalesce(t3l.k, t3r.k) c + from t3l right join t3r on t3l.k = t3r.k + where coalesce(t3l.k, t3r.k) between 2 and 5 +); + +--echo # 17.3 Nested FULL JOINs (a FULL JOIN of a FULL JOIN) inside +--echo # a semijoin. +--sorted_result +select * from t3o +where t3o.x in ( + select coalesce(coalesce(t3l.k, t3r.k), t3m.k) + from (t3l full join t3r on t3l.k = t3r.k) + full join t3m on coalesce(t3l.k, t3r.k) = t3m.k +); +--sorted_result +select * from t3o +where t3o.x in ( + select coalesce(coalesce(t3l.k, t3r.k), t3m.k) + from (t3l left join t3r on t3l.k = t3r.k) + left join t3m on coalesce(t3l.k, t3r.k) = t3m.k + union + select coalesce(coalesce(t3l.k, t3r.k), t3m.k) + from (t3l right join t3r on t3l.k = t3r.k) + left join t3m on coalesce(t3l.k, t3r.k) = t3m.k + union + select coalesce(coalesce(t3l.k, t3r.k), t3m.k) + from (t3l left join t3r on t3l.k = t3r.k) + right join t3m on coalesce(t3l.k, t3r.k) = t3m.k + union + select coalesce(coalesce(t3l.k, t3r.k), t3m.k) + from (t3l right join t3r on t3l.k = t3r.k) + right join t3m on coalesce(t3l.k, t3r.k) = t3m.k +); + +set optimizer_switch= @save_optimizer_switch; + +--echo # 17.4 CTE that produces a FULL JOIN result, consumed by a +--echo # window function in the outer query. +--sorted_result +with fj as ( + select coalesce(t3l.k, t3r.k) as k, + t3l.tag as ltag, + t3r.tag as rtag + from t3l full join t3r on t3l.k = t3r.k +) +select k, ltag, rtag, + row_number() over (order by k) as rn, + count(*) over (order by k rows between unbounded preceding and current row) as running_cnt +from fj; +--sorted_result +with fj as ( + select coalesce(t3l.k, t3r.k) as k, t3l.tag as ltag, t3r.tag as rtag + from t3l left join t3r on t3l.k = t3r.k + union + select coalesce(t3l.k, t3r.k), t3l.tag, t3r.tag + from t3l right join t3r on t3l.k = t3r.k +) +select k, ltag, rtag, + row_number() over (order by k) as rn, + count(*) over (order by k rows between unbounded preceding and current row) as running_cnt +from fj; + +--echo # 17.5 FULL JOIN + HAVING + ORDER BY + aggregate window. +--sorted_result +select coalesce(t3l.k, t3r.k) as k, + count(*) as cnt, + rank() over (order by count(*) desc) as rk +from t3l full join t3r on t3l.k = t3r.k +group by coalesce(t3l.k, t3r.k) +having count(*) >= 1 +order by rk, k; +--sorted_result +select coalesce(t3lk, t3rk) as k, + count(*) as cnt, + rank() over (order by count(*) desc) as rk +from (select t3l.k as t3lk, t3l.tag as ltag, t3r.k as t3rk, t3r.tag as rtag + from t3l left join t3r on t3l.k = t3r.k + union + select t3l.k, t3l.tag, t3r.k, t3r.tag + from t3l right join t3r on t3l.k = t3r.k) dt +group by coalesce(t3lk, t3rk) +having count(*) >= 1 +order by rk, k; + +--echo # 17.6 FULL JOIN + EXISTS subquery that itself contains a +--echo # FULL JOIN, all filtered by a range condition. +--sorted_result +select coalesce(t3l.k, t3r.k) as k, t3l.tag as ltag, t3r.tag as rtag +from t3l full join t3r on t3l.k = t3r.k +where coalesce(t3l.k, t3r.k) between 1 and 5 + and exists ( + select 1 from t3l l2 full join t3m on l2.k = t3m.k + where coalesce(l2.k, t3m.k) = coalesce(t3l.k, t3r.k) + ); +--sorted_result +select coalesce(t3l.k, t3r.k) as k, t3l.tag as ltag, t3r.tag as rtag +from t3l full join t3r on t3l.k = t3r.k +where coalesce(t3l.k, t3r.k) between 1 and 5 + and coalesce(t3l.k, t3r.k) in ( + select coalesce(l2.k, t3m.k) from t3l l2 left join t3m on l2.k = t3m.k + union + select coalesce(l2.k, t3m.k) from t3l l2 right join t3m on l2.k = t3m.k + ); + +--echo # STRAIGHT_JOIN combined with FULL JOIN. Each FULL JOIN pair +--echo # must stay contiguous in the join order so its null-complement +--echo # rescan can fire at the right partner with the LEFT JOIN pass +--echo # complete, but non FULL JOIN tables may appear before, after, +--echo # or between distinct FULL JOIN pairs. STRAIGHT_JOIN cannot +--echo # violate per-pair contiguity because the SQL grammar puts each +--echo # pair's L and R syntactically adjacent in the FROM clause. + +--echo # FULL JOIN tables first. +explain +select straight_join coalesce(t3l.k, t3r.k) as k, t3o.x +from t3l full join t3r on t3l.k = t3r.k, t3o +where t3o.x between 3 and 4; +--sorted_result +select straight_join coalesce(t3l.k, t3r.k) as k, t3o.x +from t3l full join t3r on t3l.k = t3r.k, t3o +where t3o.x between 3 and 4; +--sorted_result +select coalesce(t3l.k, t3r.k) as k, t3o.x +from t3l left join t3r on t3l.k = t3r.k, t3o +where t3o.x between 3 and 4 +union +select coalesce(t3l.k, t3r.k) as k, t3o.x +from t3l right join t3r on t3l.k = t3r.k, t3o +where t3o.x between 3 and 4; + +--echo # Non FULL JOIN table first. +explain +select straight_join coalesce(t3l.k, t3r.k) as k, t3o.x +from t3o, t3l full join t3r on t3l.k = t3r.k +where t3o.x between 3 and 4; +--sorted_result +select straight_join coalesce(t3l.k, t3r.k) as k, t3o.x +from t3o, t3l full join t3r on t3l.k = t3r.k +where t3o.x between 3 and 4; +--sorted_result +select coalesce(t3l.k, t3r.k) as k, t3o.x +from t3o, t3l left join t3r on t3l.k = t3r.k +where t3o.x between 3 and 4 +union +select coalesce(t3l.k, t3r.k) as k, t3o.x +from t3o, t3l right join t3r on t3l.k = t3r.k +where t3o.x between 3 and 4; + +--echo # Two distinct FULL JOIN pairs with a non FULL JOIN table +--echo # between them, under STRAIGHT_JOIN. Each pair is internally +--echo # contiguous so STRAIGHT_JOIN is honored and the result matches +--echo # the cross of the two pairs' LEFT/RIGHT UNION oracle. +create table t3n (k int); +insert into t3n values (1), (3), (5), (7); +explain +select straight_join + coalesce(t3l.k, t3r.k) as klr, t3o.x, + coalesce(t3m.k, t3n.k) as kmn +from (t3l full join t3r on t3l.k = t3r.k), + t3o, + (t3m full join t3n on t3m.k = t3n.k) +where t3o.x between 3 and 4; +--sorted_result +select straight_join + coalesce(t3l.k, t3r.k) as klr, t3o.x, + coalesce(t3m.k, t3n.k) as kmn +from (t3l full join t3r on t3l.k = t3r.k), + t3o, + (t3m full join t3n on t3m.k = t3n.k) +where t3o.x between 3 and 4; +--sorted_result +select lr.klr, t3o.x, mn.kmn +from (select coalesce(t3l.k, t3r.k) as klr + from t3l left join t3r on t3l.k = t3r.k + union + select coalesce(t3l.k, t3r.k) + from t3l right join t3r on t3l.k = t3r.k) lr, + t3o, + (select coalesce(t3m.k, t3n.k) as kmn + from t3m left join t3n on t3m.k = t3n.k + union + select coalesce(t3m.k, t3n.k) + from t3m right join t3n on t3m.k = t3n.k) mn +where t3o.x between 3 and 4; +drop table t3n; + +drop table t3o, t3l, t3r, t3m; + + +--echo # ======================================================== +--echo # Section 18: Regressions +--echo # +--echo # Specific bugs that were fixed; kept as targeted cases so +--echo # they don't silently regress. +--echo # ======================================================== + +--echo # FULL JOIN with GROUP BY: previously crashed on the +--echo # create_sort_index assertion (filesort_result != 0) when +--echo # AGGR_OP::end_send was called twice. +create table t1 (grp char(1), val int); +insert into t1 values ('a',10), ('a',20), ('b',30), ('c',40); +create table t2 (grp char(1), val int); +insert into t2 values ('b',100), ('c',200), ('c',300), ('d',400); +--sorted_result +select coalesce(t1.grp, t2.grp) as grp, + count(*) as cnt, + sum(t1.val) as s1, + sum(t2.val) as s2 +from t1 full join t2 on t1.grp = t2.grp +group by coalesce(t1.grp, t2.grp); +--sorted_result +select coalesce(dt.grp1, dt.grp2) as grp, + count(*) as cnt, + sum(dt.val1) as s1, + sum(dt.val2) as s2 +from (select t1.grp as grp1, t1.val as val1, t2.grp as grp2, t2.val as val2 + from t1 left join t2 on t1.grp = t2.grp + union + select t1.grp, t1.val, t2.grp, t2.val + from t1 right join t2 on t1.grp = t2.grp) dt +group by coalesce(dt.grp1, dt.grp2); +drop table t1, t2; + +--echo # simplify_joins: do not flatten an FULL JOIN nest when +--echo # it is not at the top level. +--echo # +--echo # simplify_joins moves ON into WHERE and clears on_expr on the +--echo # nest, which would normally make the nest eligible for flattening. +--echo # Flattening here would expose t1 and t2 at top level next to +--echo # t3, allow the optimizer pick a plan that interleaves t3 between +--echo # the FULL JOIN tables. +create table t1 (a int); +insert into t1 values (2), (1), (7), (1), (2); +create table t2 (b int); +insert into t2 values (4),(5),(9), (4),(1),(7); +create table t3 (c int); +insert into t3 values (3),(1),(3),(9); +create index idx_a on t1(a); +--sorted_result +select * from t3 inner join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5); +--sorted_result +select * from t3 inner join (t1 left join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5) +union all +select * from t3 inner join (t1 right join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5) +where t1.a is null; +drop table t1, t2, t3; + +--echo # simplify_joins: DO flatten an FJ-containing nest when it is the +--echo # sole nest of its embedding. +--echo # +--echo # Allow the fj-nest to flatten `t1 FULL JOIN t2 ...` when it +--echo # has no siblings, because adjacency is then trivially preserved. +create table t1 (a int, b int); +insert into t1 values (1,10), (2,20), (3,30); +create table t2 (a int, b int); +insert into t2 values (2,200), (3,300), (4,400); +create table t3 (a int, c varchar(10)); +insert into t3 values (1,'x'), (2,'y'), (4,'z'); +--sorted_result +select dt.a1, dt.a2, t3.c +from (select t1.a as a1, t2.a as a2 from t1 full join t2 on t1.a=t2.a) dt + inner join t3 on coalesce(dt.a1, dt.a2) = t3.a; +--sorted_result +select dt.a1, dt.a2, t3.c +from (select t1.a as a1, t2.a as a2 from t1 left join t2 on t1.a=t2.a + union + select t1.a as a1, t2.a as a2 from t1 right join t2 on t1.a=t2.a) dt + inner join t3 on coalesce(dt.a1, dt.a2) = t3.a; +drop table t1, t2, t3; + +--echo # simplify_joins: do not flatten a nest that carries JOIN_TYPE_FULL +--echo # on itself. +create table t1 (a int); +insert into t1 values (1),(2),(3); +create table t2 (a int); +insert into t2 values (2),(3),(4); +create table t3 (a int); +insert into t3 values (3),(4),(5); +--sorted_result +select t1.a as a1, t2.a as a2, t3.a as a3 +from (t1 join t2 on t1.a=t2.a) full join t3 on t2.a=t3.a; +--sorted_result +select t1.a as a1, t2.a as a2, t3.a as a3 +from (t1 join t2 on t1.a=t2.a) left join t3 on t2.a=t3.a +union all +select t1.a as a1, t2.a as a2, t3.a as a3 +from (t1 join t2 on t1.a=t2.a) right join t3 on t2.a=t3.a +where t2.a is null; +drop table t1, t2, t3; + +--echo # INNER / CROSS JOIN of an outer table with a FULL JOIN must not +--echo # interleave the outer table between the FULL JOIN tables. +create table t1 (a int); +insert into t1 values (2), (1), (7), (1), (2); +create table t2 (b int); +insert into t2 values (4),(5),(9), (4),(1),(7); +create table t3 (c int); +insert into t3 values (3),(1),(3),(9); +create index idx_a on t1(a); + +--sorted_result +select * from t3 inner join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5); +--sorted_result +select * from t3 cross join (t1 full join t2 on t1.a=t2.b); + +# t3 LEFT JOIN (FJ) -- FJ is the inner of the outer join -- rejected. +--error ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN +select * from t3 left join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5) and t3.c != 9; +# t3 RIGHT JOIN (FJ) canonicalizes to (FJ) LEFT JOIN t3; t3 is the +# inner, so the FULL JOIN ends up on the outer side -- allowed. +--sorted_result +select * from t3 right join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5) and t3.c != 9; + +drop table t1, t2, t3; + +--echo # JOIN_ORDER hints around a FULL JOIN. The partners of each +--echo # FULL JOIN must remain contiguous in the join order so the +--echo # null-complement pass can fire at the right partner with the +--echo # LEFT JOIN pass complete; outside tables may sit before or +--echo # after the pair. A hint that names an outside table between +--echo # the two partners is rejected with a conflict warning; a hint +--echo # that names an outside table before (or after) the pair is +--echo # honored. +create table t1 (a int); +insert into t1 values (2), (1), (7), (1), (2); +create table t2 (b int); +insert into t2 values (4), (5), (9), (4), (1), (7); +create table t3 (c int); +insert into t3 values (3), (1), (3), (9); +--echo # No hint, base case, should match other results. +explain extended select * +from t3 inner join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5); +--sorted_result +select * +from t3 inner join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5); + +--echo # Outside table before the FULL JOIN pair, honored. +explain extended select /*+ join_order(t3,t1,t2) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5); +--sorted_result +select /*+ join_order(t3,t1,t2) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5); + +--echo # Outside table after the FULL JOIN pair, honored. +explain extended select /*+ join_order(t1,t2,t3) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5); +--sorted_result +select /*+ join_order(t1,t2,t3) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5); + +--echo # Outside table between the two FULL JOIN partners, rejected. +explain extended select /*+ join_order(t1,t3,t2) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5); +--sorted_result +select /*+ join_order(t1,t3,t2) */ * +from t3 inner join (t1 full join t2 on t1.a=t2.b) + on (t3.c=t1.a or t2.b=5); +drop table t1, t2, t3; + +--echo # ((t1, t2) FULL JOIN t3) INNER JOIN t4 with the FULL JOIN on +--echo # the left side of the INNER JOIN, the configuration phase 2 +--echo # supports. The FULL JOIN block covers three tables; t4 sits +--echo # outside and may go before or after the block but never +--echo # between any two of t1, t2, t3. The FULL JOIN ON +--echo # condition is a simple equi-join so the query translates +--echo # to PostgreSQL; the INNER JOIN ON is disjunctive so the +--echo # FULL JOIN is not eligible for the LEFT/RIGHT/INNER +--echo # rewrite and survives into the join optimizer. +create table t1 (a int); +insert into t1 values (1), (2); +create table t2 (b int); +insert into t2 values (3), (4); +create table t3 (c int); +insert into t3 values (1), (5); +create table t4 (d int); +insert into t4 values (1), (5); + +--echo # No hint, base case. +explain extended select * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); +--sorted_result +select * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); + +--echo # Outside table before the FULL JOIN block, honored. +explain extended select /*+ join_order(t4,t1,t2,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); +--sorted_result +select /*+ join_order(t4,t1,t2,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); + +--echo # Outside table after the FULL JOIN block, honored. +explain extended select /*+ join_order(t1,t2,t3,t4) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); +--sorted_result +select /*+ join_order(t1,t2,t3,t4) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); + +--echo # Swap t1 and t2 within the (t1, t2) nest while keeping +--echo # the FULL JOIN block contiguous, honored. +explain extended select /*+ join_order(t4,t2,t1,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); +--sorted_result +select /*+ join_order(t4,t2,t1,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); + +--echo # Outside table sandwiched between two FULL JOIN partners +--echo # of the (t1, t2) nest, rejected. +explain extended select /*+ join_order(t1,t4,t2,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); +--sorted_result +select /*+ join_order(t1,t4,t2,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); + +--echo # Outside table sandwiched between (t1, t2) and t3 +--echo # across the FULL JOIN, rejected. +explain extended select /*+ join_order(t1,t2,t4,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); +--sorted_result +select /*+ join_order(t1,t2,t4,t3) */ * +from ((t1, t2) full join t3 on t3.c=t1.a) inner join t4 + on (t4.d=t3.c or t4.d=5); +drop table t1, t2, t3, t4; + +--echo # JOIN_PREFIX and JOIN_SUFFIX hints around a FULL JOIN, +--echo # with the FULL JOIN on the left side of the INNER JOIN +--echo # (the configuration phase 2 supports). JOIN_PREFIX forces +--echo # the listed tables to lead the join order; JOIN_SUFFIX +--echo # forces them to trail it. Both must respect the FULL JOIN +--echo # contiguity invariant. A hint that ends up putting a +--echo # non-FULL-JOIN table between two FULL JOIN tables, once the +--echo # implicit prefix or suffix dependencies are added, is +--echo # rejected. +create table t1 (a int); +insert into t1 values (2), (1), (7), (1), (2); +create table t2 (b int); +insert into t2 values (4), (5), (9), (4), (1), (7); +create table t3 (c int); +insert into t3 values (3), (1), (3), (9); + +--echo # JOIN_PREFIX with the outside table, honored (was rejected +--echo # before the FULL JOIN check was relaxed). +explain extended select /*+ join_prefix(t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); +--sorted_result +select /*+ join_prefix(t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); + +--echo # JOIN_PREFIX with the FULL JOIN pair, honored. +explain extended select /*+ join_prefix(t1, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); +--sorted_result +select /*+ join_prefix(t1, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); + +--echo # JOIN_PREFIX naming one FULL JOIN partner and the outside +--echo # table forces the other partner after both, splitting the +--echo # FULL JOIN block, rejected. +explain extended select /*+ join_prefix(t1, t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); +--sorted_result +select /*+ join_prefix(t1, t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); + +--echo # JOIN_SUFFIX with the outside table, honored. +explain extended select /*+ join_suffix(t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); +--sorted_result +select /*+ join_suffix(t3) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); + +--echo # JOIN_SUFFIX with the FULL JOIN pair, honored (was +--echo # rejected before the FULL JOIN check was relaxed). +explain extended select /*+ join_suffix(t1, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); +--sorted_result +select /*+ join_suffix(t1, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); + +--echo # JOIN_SUFFIX naming the outside table and one FULL JOIN +--echo # partner forces the other partner before both, splitting +--echo # the FULL JOIN block, rejected. +explain extended select /*+ join_suffix(t3, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); +--sorted_result +select /*+ join_suffix(t3, t2) */ * +from (t1 full join t2 on t1.a=t2.b) inner join t3 + on (t3.c=t1.a or t2.b=5); +drop table t1, t2, t3; + +--echo # Empty MyISAM FULL JOIN partner. MyISAM reports an exact 0 row +--echo # count, so the table used to be promoted to a JT_SYSTEM const +--echo # NULL row at optimize time. The synthesized NULL row drove the +--echo # LEFT JOIN component to emit a bogus row of NULLs when the +--echo # other partner was also empty. Earlier still, the duplicate +--echo # filter setup crashed in this scenario. FULL JOIN tables +--echo # bypass the const table optimization. +create table t1 (pk int) ENGINE=MyISAM; +create table t2 (pk int) ENGINE=InnoDB; +--sorted_result +select t1.pk from t1 full join t2 on (true); +--sorted_result +select t1.pk from t1 left join t2 on (true) +union +select t1.pk from t1 right join t2 on (true); +drop table t1, t2; + +--echo # Both partners empty MyISAM. +create table t1 (pk int) ENGINE=MyISAM; +create table t2 (pk int) ENGINE=MyISAM; +--sorted_result +select * from t1 full join t2 on (true); +--sorted_result +select * from t1 left join t2 on (true) +union +select * from t1 right join t2 on (true); +drop table t1, t2; + +--echo # Empty MyISAM on the left, InnoDB on the right with rows. The +--echo # bug was masked here because the phantom left NULL row happened +--echo # to produce the same rows the null complement pass would. +create table t1 (pk int) ENGINE=MyISAM; +create table t2 (pk int) ENGINE=InnoDB; +insert into t2 values (10), (20); +--sorted_result +select * from t1 full join t2 on (true); +--sorted_result +select * from t1 left join t2 on (true) +union +select * from t1 right join t2 on (true); +drop table t1, t2; + +--echo # Empty MyISAM on the right (the side that does not normally +--echo # become const) and empty InnoDB on the left. +create table t1 (pk int) ENGINE=InnoDB; +create table t2 (pk int) ENGINE=MyISAM; +--sorted_result +select * from t1 full join t2 on (true); +--sorted_result +select * from t1 left join t2 on (true) +union +select * from t1 right join t2 on (true); +drop table t1, t2; + +--echo # MyISAM partners with one row each. Without the guards, the +--echo # stats.records <= 1 path would also have promoted them. +create table t1 (a int) ENGINE=MyISAM; +create table t2 (b int) ENGINE=MyISAM; +insert into t1 values (1); +insert into t2 values (2); +--sorted_result +select * from t1 full join t2 on t1.a = t2.b; +--sorted_result +select * from t1 left join t2 on t1.a = t2.b +union +select * from t1 right join t2 on t1.a = t2.b; +drop table t1, t2; + +--echo # simplify_joins must not rewrite a FULL JOIN whose left side +--echo # still contains an unrewritten FULL JOIN. Without the fix, +--echo # rewrite_full_to_right put the inner FULL JOIN on the inner +--echo # side of the resulting LEFT JOIN, a shape the optimizer could +--echo # not plan, and tripped the found_tables > 0 assertion in +--echo # best_extension_by_limited_search. +--echo # +--echo # The trigger is a WHERE predicate that rejects nulls on the +--echo # right base table of the outermost FULL JOIN in a chained FULL +--echo # JOIN. The fix keeps the FULL JOIN; the null complement pass +--echo # runs and the WHERE filters any rows the rewrite would have +--echo # eliminated. Per Section 6, the LEFT JOIN UNION RIGHT JOIN +--echo # oracle is not valid for chained FULL JOINs, so the recorded +--echo # result is the oracle. +create table t1 (a int); +create table t2 (a int); +create table t3 (a int); +create table t4 (a int); +insert into t1 values (1); +insert into t2 values (10); +insert into t3 values (100); +insert into t4 values (1000), (2000); +--sorted_result +select t1.a, t2.a, t3.a, t4.a +from t1 left join t2 on (true) + full join t3 on (true) + full join t4 on (true) +where t4.a <= 3000; +explain extended select t1.a, t2.a, t3.a, t4.a +from t1 left join t2 on (true) + full join t3 on (true) + full join t4 on (true) +where t4.a <= 3000; +--echo # IS NOT NULL drives the same rewrite path. +--sorted_result +select t1.a, t2.a, t3.a, t4.a +from t1 left join t2 on (true) + full join t3 on (true) + full join t4 on (true) +where t4.a is not null; +explain extended select t1.a, t2.a, t3.a, t4.a +from t1 left join t2 on (true) + full join t3 on (true) + full join t4 on (true) +where t4.a is not null; +--echo # RIGHT JOIN canonicalizes to LEFT JOIN, same trigger applies. +--sorted_result +select t1.a, t2.a, t3.a, t4.a +from t1 right join t2 on (true) + full join t3 on (true) + full join t4 on (true) +where t4.a <= 3000; +explain extended select t1.a, t2.a, t3.a, t4.a +from t1 right join t2 on (true) + full join t3 on (true) + full join t4 on (true) +where t4.a <= 3000; +--echo # Reproducer matching the original ticket. All left tables are +--echo # empty so only null complement rows for t4 reach the WHERE, +--echo # and GROUP BY collapses them. +delete from t1; +delete from t2; +delete from t3; +--sorted_result +select t1.a as field1 +from t1 left join t2 on (true) + full join t3 on (true) + full join t4 on (true) +where t4.a <= 3000 +group by field1; +explain extended select t1.a as field1 +from t1 left join t2 on (true) + full join t3 on (true) + full join t4 on (true) +where t4.a <= 3000 +group by field1; +--echo # The guard must not block the normal FULL JOIN rewrite when +--echo # the left side is a base table. A plain t1 FULL JOIN t4 with +--echo # the same WHERE predicate still rewrites and the UNION oracle +--echo # matches. +insert into t1 values (1); +--sorted_result +select t1.a, t4.a from t1 full join t4 on (true) where t4.a <= 3000; +explain extended +select t1.a, t4.a from t1 full join t4 on (true) where t4.a <= 3000; +--sorted_result +select t1.a, t4.a from t1 left join t4 on (true) where t4.a <= 3000 +union +select t1.a, t4.a from t1 right join t4 on (true) where t4.a <= 3000; +explain extended +select t1.a, t4.a from t1 left join t4 on (true) where t4.a <= 3000 +union +select t1.a, t4.a from t1 right join t4 on (true) where t4.a <= 3000; +drop table t1, t2, t3, t4; + +--echo # Taller-tree variant of the chained FULL JOIN regression above. +--echo # The guard at each level of rewrite_full_outer_joins runs after +--echo # simplify_nested_join has descended into left_table, so a longer +--echo # chain is handled recursively: each FULL JOIN whose left side +--echo # still contains an unrewritten FULL JOIN stays as a FULL JOIN. +--echo # With the null-rejecting WHERE only on the outermost right, no +--echo # inner FULL JOIN can rewrite, so every FULL JOIN in the chain +--echo # is preserved. +create table t1 (a int); +create table t2 (a int); +create table t3 (a int); +create table t4 (a int); +create table t5 (a int); +create table t6 (a int); +insert into t1 values (1); +insert into t2 values (10); +insert into t3 values (100); +insert into t4 values (1000); +insert into t5 values (10000); +insert into t6 values (100000), (200000); +--sorted_result +select t1.a, t2.a, t3.a, t4.a, t5.a, t6.a +from t1 left join t2 on (true) + full join t3 on (true) + full join t4 on (true) + full join t5 on (true) + full join t6 on (true) +where t6.a <= 300000; +explain extended select t1.a, t2.a, t3.a, t4.a, t5.a, t6.a +from t1 left join t2 on (true) + full join t3 on (true) + full join t4 on (true) + full join t5 on (true) + full join t6 on (true) +where t6.a <= 300000; +drop table t1, t2, t3, t4, t5, t6; + +--echo # Companion case: the guard must not fire pessimistically. When +--echo # a null-rejecting WHERE predicate sits on a middle base table +--echo # rather than the outermost right, the inner FULL JOIN rewrites +--echo # (its left side is a base-table-only LEFT JOIN, not yet +--echo # containing a FULL JOIN), and the outer FULL JOIN's left side +--echo # no longer contains a FULL JOIN by the time the guard checks +--echo # it, so the outer FULL JOIN also rewrites. Both FULL JOINs +--echo # collapse to LEFT JOINs and the query is plannable without the +--echo # null complement pass. +--echo # +--echo # Note: t3 is detected as a system const table in the recorded +--echo # plan. MDEV-38508 (commit 7d6a036c3a1) skips constant table +--echo # promotion for tables whose outer_join carries JOIN_TYPE_FULL, +--echo # but that guard reads the current outer_join bits and +--echo # simplify_joins clears JOIN_TYPE_FULL when it rewrites a FULL +--echo # JOIN to a LEFT JOIN. Here both FULL JOINs are rewritten +--echo # before make_join_statistics runs, so t3 is no longer an FJ +--echo # partner at that point and the standard LEFT JOIN const-table +--echo # path applies. This is safe: the FULL JOIN's null complement +--echo # pass is what made const promotion dangerous in MDEV-38508, and +--echo # there is no null complement pass after the rewrite; any +--echo # predicate that drove the rewrite is null-rejecting on the +--echo # rewritten table and so rejects the synthesized NULL row anyway. +create table t1 (a int); +create table t2 (a int); +create table t3 (a int); +create table t4 (a int); +insert into t1 values (1); +insert into t2 values (10); +insert into t3 values (100); +insert into t4 values (1000); +--sorted_result +select t1.a, t2.a, t3.a, t4.a +from t1 left join t2 on (true) + full join t3 on (true) + full join t4 on (true) +where t3.a <= 1000; +--echo # Verify the simplified shape: a plain LEFT JOIN chain, no FULL. +explain extended select t1.a +from t1 left join t2 on (true) + full join t3 on (true) + full join t4 on (true) +where t3.a <= 1000; +drop table t1, t2, t3, t4; + +--echo # MDEV-39605: a merged derived table containing a FULL JOIN previously +--echo # hit an assertion in table elimination. simplify_joins +--echo # preserves the derived's nested join (no on_expr, not a semi-join) +--echo # to keep the FULL JOIN tables adjacent in the parent join_list, but +--echo # the table elimination iterator must descend into that nest +--echo # without trying to eliminate it. +create table t1 (a int); +insert into t1 values (1), (2); +--sorted_result +select * from t1, (select alias1.a from t1 as alias1 + full join t1 as alias2 on (true)) dt; +--sorted_result +select * from t1, (select alias1.a from t1 as alias1 + left join t1 as alias2 on (true) + union all + select alias1.a from t1 as alias1 + right join t1 as alias2 on (true) + where alias1.a is null) dt; +drop table t1; + +--echo # When a constant table is a subquery, the duplicate filter +--echo # allocation loop used to walk off the end of JOIN::join_tab list. +create table t1 (pk int not null, primary key (pk)); +create table t2 (pk int); +select t1.pk from (t1 full join t2 on (true)) +where exists (select t2.pk from (t1 right join t2 on (t2.pk = t1.pk))); +insert into t1 values (1), (2); +insert into t2 values (1), (null); +--sorted_result +select t1.pk from (t1 full join t2 on (true)) +where exists (select t2.pk from (t1 right join t2 on (t2.pk = t1.pk))); +--sorted_result +select t1.pk from (t1 left join t2 on (true)) +where exists (select t2.pk from (t1 right join t2 on (t2.pk = t1.pk))) +union +select t1.pk from (t1 right join t2 on (true)) +where exists (select t2.pk from (t1 right join t2 on (t2.pk = t1.pk))); +drop table t1, t2; + +--echo # In a correlated subquery, the FULL JOIN null complement rescan +--echo # on the right table used to disable the keyread on the +--echo # index without re-enabling it. The next iteration of the +--echo # subquery then hit an assertion because the keyread +--echo # was expected to still be active. +create table t1 (pk int primary key); +create table t2 (pk int primary key, a int); +create table t3 (pk int primary key, a int, key idx(a)); +insert into t1 values (1), (2); +insert into t2 values (1, 10); +insert into t3 values (1, 100), (2, 200); +--sorted_result +select (select min(t2.pk) from (t2 full join t3 on true) + where t3.pk = t1.pk or t2.a = t2.a) as f +from t1; +--sorted_result +select (select min(dt.t2pk) from + (select t2.pk as t2pk, t2.a as t2a, t3.pk as t3pk + from t2 left join t3 on true + union + select t2.pk, t2.a, t3.pk from t2 right join t3 on true + where t2.pk is null) dt + where dt.t3pk = t1.pk or dt.t2a = dt.t2a) as f +from t1; +drop table t1, t2, t3; + +--echo # First, an inner FULL JOIN leaf that +--echo # carries its own on_expr did not inherit dep_tables from the +--echo # enclosing nests, so its dependency on the leftmost FULL JOIN +--echo # table was lost and the optimizer would pick it as the first +--echo # table. Second, compute_full_join_nest_tables iterated only +--echo # FULL JOIN leaves and walked their embedding chain, missing +--echo # any FULL JOIN table whose left side was a nest of inner +--echo # joins (where the nest carries JOIN_TYPE_FULL but its leaves +--echo # do not). These two issues led to an invalid FULL JOIN table +--echo # order. +--echo # +--echo # The shape places a nested join on the right side of a FULL JOIN, +--echo # which the post simplify_joins check now rejects, so the query no +--echo # longer reaches the table ordering code. It is kept as a guard that +--echo # the shape stays rejected rather than silently dropping rows. +create table t1 (pk int); +create table t2 (pk int); +create table t3 (pk int); +create table t4 (pk int); +create table t5 (pk int); +--error ER_FULL_JOIN_BASE_TABLES_ONLY +select t2.pk as field1 from t1 + full join t2 + full join t4 + full join t5 right join t3 on (true) + on (true) + full join t5 as alias2 on (true) + on (true) + on t1.pk = alias2.pk; +drop table t1, t2, t3, t4, t5; + +--echo # The FULL JOIN table can be a nest of inner joins, so in that +--echo # case the nest carries JOIN_TYPE_FULL but its leaves do not. +--echo # compute_full_join_nest_tables must still pick those leaves +--echo # up. +create table t1 (a int); +create table t2 (a int); +create table t3 (a int); +select * from t1 inner join t2 full join t3 on t1.a=t3.a; +drop table t1, t2, t3; + +--echo # Both FULL JOIN tables carried the ON expression which broke +--echo # dependency propagation during make_join_statistics. Rather +--echo # than make changes there, just carry the ON expression on only +--echo # one of the FULL JOIN tables, like the other join types do. +--echo # +--echo # This shape also places a nested join on the right side of a FULL +--echo # JOIN and is now rejected after simplify_joins. Kept as a guard +--echo # that the shape stays rejected. +create table t1 (pk int); +create table t2 (pk int); +create table t3 (pk int); +create table t4 (pk int); +insert into t1 values (1), (2); +--error ER_FULL_JOIN_BASE_TABLES_ONLY +select max(t3.pk) from t1 + full join t2 + full join t3 on t2.pk = t3.pk + full join t1 as alias5 + full join t4 on (true) + on t3.pk = alias5.pk + on t1.pk = alias5.pk; +drop table t1, t2, t3, t4; + +--echo # Inner FULL JOIN tables carrying a constant ON expression did not +--echo # inherit the dep_tables of their enclosing FULL JOIN nest because +--echo # make_join_statistics, for any table whose on_expr was set, took +--echo # an early exit that skipped embedding dependency propagation. +create table t1 (pk int); +create table t2 (pk int); +create table t3 (pk int); +create table t4 (pk int); +insert into t1 values (1), (2); +select t1.pk from t1 + full join t2 + full outer join t3 on (true) + full join t4 on (true) + on t1.pk = t4.pk; +drop table t1, t2, t3, t4; + +--echo # When an inner table of a FULL JOIN sat inside a nest that was +--echo # itself the LEFT side of an enclosing FULL JOIN, +--echo # make_outerjoin_info skipped that nest while building the outer +--echo # join scope chain, leaving the inner table's first_upper +--echo # unlinked. add_found_match_trig_cond walked off the broken +--echo # chain and dereferenced NULL when make_join_select pushed an +--echo # ON condition to that table. +--echo # +--echo # Separately, the outermost FULL JOIN's right operand was a +--echo # nested join expression rather than a single base table (the +--echo # parser places it there when the FULL JOIN chain's ONs are all +--echo # trailing), and the null complement pass had no JOIN_TAB +--echo # carrying FULL|RIGHT to attach an fj_dups filter to, so the +--echo # unmatched rows from the right side were never emitted. +create table t1 (pk int); +create table t2 (pk int); +create table t3 (pk int); +create table t4 (pk int); +create table t5 (pk int); +insert into t1 values (1), (2); +insert into t2 values (1), (3); +insert into t3 values (1), (4); +insert into t4 values (1), (5); +insert into t5 values (1), (6); +--sorted_result +select t1.pk + from t1 + full join t2 + full outer join t3 on (true) + full join t4 on t3.pk = t4.pk + left join t5 on t2.pk = t5.pk + on t1.pk = t4.pk; +drop table t1, t2, t3, t4, t5; + +--echo # When an IN subquery in the ON clause of a JOIN to a FULL JOIN +--echo # nest was converted into a semijoin, the semijoin nest was inserted +--echo # as a child of the FULL JOIN nest. Because the FULL JOIN nest cannot +--echo # be flattened, the semijoin nest remained inside it. +--echo # check_interleaving_with_nj walked through the semijoin nest +--echo # transparently and incremented the FULL JOIN nest's counter once for +--echo # every semijoin table instead of once for the whole semijoin nest, +--echo # eventually overflowing past n_tables and triggering the assertion in +--echo # greedy_search. +create table t1 (pk int); +create table t2 (pk int); +select table1.pk from t1 as table1 + straight_join (select alias1.* from t2 as alias1 + full join t1 as alias2 on (true)) as table2 + on (table1.pk in (select t2.pk from t1 left join t2 on (true))); +drop table t1, t2; + +--echo # t1 is detected as a const table through the keyuse based const +--echo # detection in make_join_statistics. The HAVING predicate field2 = 8 +--echo # is field2 = t1.pk, and condition pushdown from HAVING moves it down +--echo # as the equality t1.pk = 8. That binds every part of t1's primary +--echo # key to a constant, so make_join_statistics reads the single matching +--echo # row at optimization time and marks t1 const. That promotion path +--echo # does not exclude FULL JOIN tables, so t1 folds to const even though +--echo # it is the left side of a FULL JOIN. +--echo # When the left side of a FULL JOIN has const table optimizations, +--echo # then the right side table becomes the inner-most join tab and +--echo # also its own null complement target. sub_select started PFS batch +--echo # mode for the inner-most tab and then, before ending it, ran the +--echo # null complement pass, which calls sub_select again on that same tab +--echo # and started batch mode a second time, tripping the m_psi_batch_mode +--echo # assertion. +create table t1 (pk int not null, primary key(pk)); +create table t2 (pk int); +insert into t1 values (8); +select t1.pk as field2 from t1 full join t2 on (true) + group by field2 having field2 = 8; +select t1.pk as field2 from t1 full join t2 on (true) + group by field2 having t1.pk = 8; +insert into t2 values (1), (8), (null); +--sorted_result +select t1.pk as a, t2.pk as b from t1 full join t2 on (t1.pk = t2.pk); +--sorted_result +select t1.pk as a, t2.pk as b from t1 left join t2 on (t1.pk = t2.pk) +union +select t1.pk as a, t2.pk as b from t1 right join t2 on (t1.pk = t2.pk); +drop table t1, t2; + +--echo # A surviving FULL JOIN must have the right side ordered after the left. +--echo # When the ON expression does not tie the right side to the left (a +--echo # constant predicate, or a predicate over the right side alone) the +--echo # right side had no dependency forcing that order, so the optimizer +--echo # could place it first, either by const folding a constant key lookup +--echo # or by cost when it was the cheaper scan, dropping the right side +--echo # unmatched rows or producing a cross product. +create table t1 (a int); +insert into t1 values (1), (2); +create table t2 (pk int primary key, b int); +insert into t2 values (5, 50), (6, 60), (7, 70); +--echo # Constant lookup on the right side's primary key. +--sorted_result +select * from t1 full join t2 on t2.pk = 5; +--sorted_result +select * from t1 left join t2 on t2.pk = 5 +union +select * from t1 right join t2 on t2.pk = 5; +--echo # Constant predicate over a non-indexed right side column, with the +--echo # left side as the cheaper scan so it is ordered first. +--sorted_result +select * from t1 full join t2 on t2.b = 50; +--sorted_result +select * from t1 left join t2 on t2.b = 50 +union +select * from t1 right join t2 on t2.b = 50; +--echo # Impossible ON over the right side's primary key. +--sorted_result +select * from t1 full join t2 on t2.pk = 5 and t2.pk = 6; +--sorted_result +select * from t1 left join t2 on t2.pk = 5 and t2.pk = 6 +union +select * from t1 right join t2 on t2.pk = 5 and t2.pk = 6; +drop table t1, t2; +--echo # The right side is the cheaper scan, so the optimizer ordered it +--echo # first by cost, producing a cross product. +create table t1 (a int); +insert into t1 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +create table t2 (pk int primary key, b int); +insert into t2 values (5, 50), (6, 60), (7, 70); +--sorted_result +select * from t1 full join t2 on t2.b = 50; +--sorted_result +select * from t1 left join t2 on t2.b = 50 +union +select * from t1 right join t2 on t2.b = 50; +drop table t1, t2; + +--echo # ==================================================================== +--echo # Section 19: Right side of FULL JOIN must be a base table +--echo # ==================================================================== +--echo # +--echo # Derived tables, views, and subqueries on the right side of a FULL +--echo # JOIN are not supported and must produce an error. + +create table t1 (a int); +create table t2 (a int); + +--echo # Derived table on the right side of a simple FULL JOIN +--error ER_FULL_JOIN_BASE_TABLES_ONLY +select * from t1 full join (select * from t2) dt on t1.a = dt.a; + +--echo # View on the right side of a simple FULL JOIN +create view v1 as select * from t2; +--error ER_FULL_JOIN_BASE_TABLES_ONLY +select * from t1 full join v1 on t1.a = v1.a; +drop view v1; + +--echo # Derived table on the right side of a nested FULL JOIN +create table t3 (a int); +--error ER_FULL_JOIN_BASE_TABLES_ONLY +select * from t1 full join t2 on t1.a = t2.a + full join (select * from t3) dt on t2.a = dt.a; +drop table t1, t2, t3; + +--echo # ==================================================================== +--echo # Section 20: FULL JOIN not allowed on the inner side of a +--echo # LEFT or RIGHT JOIN +--echo # ==================================================================== +--echo # +--echo # The FULL JOIN null-complement pass emits right-unmatched rows at +--echo # the end of its right partner's scan; it has no mechanism to pair +--echo # those rows with each outer row of an enclosing LEFT/RIGHT JOIN +--echo # when the FULL JOIN sits on that join's inner (null-complemented) +--echo # side. Rather than silently return wrong results, reject such +--echo # queries. +--echo # +--echo # MariaDB's convert_right_join rewrites every RIGHT JOIN into an +--echo # equivalent LEFT JOIN at parse time, so the notion of "inner" is +--echo # unambiguous in the post-conversion shape -- it is the side that +--echo # gets null-complemented. The equivalences the check relies on: +--echo # +--echo # t3 LEFT JOIN (FJ) : (FJ) is inner -> rejected +--echo # t3 RIGHT JOIN (FJ) : (FJ) LEFT JOIN t3 -> t3 is inner, +--echo # FJ is outer +--echo # -> allowed +--echo # (FJ) LEFT JOIN t3 : t3 is inner -> allowed +--echo # (FJ) RIGHT JOIN t3 : t3 LEFT JOIN (FJ) -> (FJ) is inner +--echo # -> rejected +--echo # +--echo # INNER JOIN has no inner/outer distinction; a FULL JOIN on either +--echo # side of an INNER JOIN is allowed. + +create table t1 (a int); +insert into t1 values (1), (2); +create table t2 (b int); +insert into t2 values (1), (3); +create table t3 (c int); +insert into t3 values (1), (4); + +--echo # t3 LEFT JOIN (FJ) -- FJ is inner -- rejected. +--error ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN +select * from t3 left join (t1 full join t2 on t1.a = t2.b) + on t3.c = t1.a; + +--echo # t3 RIGHT JOIN (FJ) -- canonicalizes to (FJ) LEFT JOIN t3; +--echo # t3 is inner, FJ is outer -- allowed. +select * from t3 right join (t1 full join t2 on t1.a = t2.b) + on t3.c = t1.a; + +--echo # t3 INNER JOIN (FJ) -- allowed. +select * from t3 inner join (t1 full join t2 on t1.a = t2.b) + on t3.c = t1.a; + +--echo # t3 , (FJ) (comma = inner) -- allowed. +select * from t3, (t1 full join t2 on t1.a = t2.b) + where t3.c = t1.a; + +--echo # (FJ) LEFT JOIN t3 -- t3 is inner -- allowed. +select * from (t1 full join t2 on t1.a = t2.b) left join t3 + on t1.a = t3.c; + +--echo # (FJ) RIGHT JOIN t3 -- canonicalizes to t3 LEFT JOIN (FJ); +--echo # FJ is inner -- rejected. +--error ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN +select * from (t1 full join t2 on t1.a = t2.b) right join t3 + on t1.a = t3.c; + +--echo # Derived table with FULL JOIN on the inner side -- rejected. +--error ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN +select dt.a1, dt.a2, t3.c +from (select t1.a as a1, t2.b as a2 from t1 full join t2 on t1.a=t2.b) dt + right join t3 on coalesce(dt.a1, dt.a2) = t3.c; + +--echo # FULL JOIN in a more deeply nested LEFT JOIN's inner side -- +--echo # rejected. Ensures the check descends through intermediate +--echo # INNER JOIN nesting. +--error ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN +select * from t3 left join ((t1 full join t2 on t1.a = t2.b) join t3 t3b) + on t3.c = t1.a; + +--echo # CREATE VIEW must reject the same offending queries at VIEW +--echo # creation time, not just at execution. +--error ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN +create view v_bad_left as + select * from t3 left join (t1 full join t2 on t1.a = t2.b) + on t3.c = t1.a; +--error ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN +create view v_bad_fjright as + select * from (t1 full join t2 on t1.a = t2.b) right join t3 + on t1.a = t3.c; + +--echo # CREATE VIEW with allowed FULL JOIN shapes. +create view v_ok_left as + select * from (t1 full join t2 on t1.a = t2.b) left join t3 + on t1.a = t3.c; +create view v_ok_right as + select * from t3 right join (t1 full join t2 on t1.a = t2.b) + on t3.c = t1.a; +create view v_ok_inner as + select * from t3 inner join (t1 full join t2 on t1.a = t2.b) + on t3.c = t1.a; +drop view v_ok_left, v_ok_right, v_ok_inner; + +drop table t1, t2, t3; + +--echo # End of 12.3 tests diff --git a/mysql-test/main/func_json.result b/mysql-test/main/func_json.result index 99f3e3da1e1c9..5a3c106499fbe 100644 --- a/mysql-test/main/func_json.result +++ b/mysql-test/main/func_json.result @@ -5556,9 +5556,9 @@ json_schema_valid('{"enum":[0]}', sformat('"{:#>200}"','0')) # # MDEV-36808 json_array_intersect incorrect results after returning NULL in table scan # -CREATE TABLE t1 (full text, overlap text); +CREATE TABLE t1 (`full` text, overlap text); INSERT INTO t1 VALUES ('["2"]', '["2"]'), ('["2"]', '["0"]'), ('["2"]', '["2"]'); -SELECT full, overlap, json_array_intersect(full, overlap) as jai from t1; +SELECT `full`, overlap, json_array_intersect(`full`, overlap) as jai from t1; full overlap jai ["2"] ["2"] ["2"] ["2"] ["0"] NULL diff --git a/mysql-test/main/func_json.test b/mysql-test/main/func_json.test index 8399e4d2a04c2..880f8026db292 100644 --- a/mysql-test/main/func_json.test +++ b/mysql-test/main/func_json.test @@ -4327,10 +4327,10 @@ select json_schema_valid('{"enum":[0]}', sformat('"{:#>200}"','0')); --echo # MDEV-36808 json_array_intersect incorrect results after returning NULL in table scan --echo # -CREATE TABLE t1 (full text, overlap text); +CREATE TABLE t1 (`full` text, overlap text); INSERT INTO t1 VALUES ('["2"]', '["2"]'), ('["2"]', '["0"]'), ('["2"]', '["2"]'); -SELECT full, overlap, json_array_intersect(full, overlap) as jai from t1; +SELECT `full`, overlap, json_array_intersect(`full`, overlap) as jai from t1; DROP TABLE t1; diff --git a/mysql-test/main/natural_full_join_grants.result b/mysql-test/main/natural_full_join_grants.result new file mode 100644 index 0000000000000..ec554acfd5755 --- /dev/null +++ b/mysql-test/main/natural_full_join_grants.result @@ -0,0 +1,170 @@ +# +# NATURAL FULL JOIN column privileges +# +# The unqualified common column resolves to COALESCE of the operand +# columns, so reading it requires SELECT on every underlying column +# the COALESCE reads, including each leaf of a chained join. A user +# holding only one side is denied. The same enforcement covers +# views; the column grant required is on the view, not on its base +# table. +# +# A database other than test is used because the default test and +# test_% databases carry a wildcard grant that would mask the +# column level grants under test here. +# +create database mysqltest_nfj; +use mysqltest_nfj; +create table t1 (a int, b int); +create table t2 (a int, c int); +create table t3 (a int, d int); +insert into t1 values (1,10),(2,20); +insert into t2 values (2,200),(3,300); +insert into t3 values (2,2000),(9,9000); +create user nfj_user@localhost; +grant select (a,b) on mysqltest_nfj.t1 to nfj_user@localhost; +grant select (c) on mysqltest_nfj.t2 to nfj_user@localhost; +grant select (d) on mysqltest_nfj.t3 to nfj_user@localhost; +connect nfj_con, localhost, nfj_user,, mysqltest_nfj; +# nfj_user lacks SELECT on t2.a, so the coalesced a is denied in +# the SELECT list and in WHERE. +select a from t1 natural full join t2; +ERROR 42000: SELECT command denied to user 'nfj_user'@'localhost' for column 'a' in table 't2' +select b from t1 natural full join t2 where a = 3; +ERROR 42000: SELECT command denied to user 'nfj_user'@'localhost' for column 'a' in table 't2' +# A qualified reference to a granted column still works. +select t1.b from t1 natural full join t2 where t1.a = 2; +b +20 +connection default; +grant select (a) on mysqltest_nfj.t2 to nfj_user@localhost; +connection nfj_con; +# With SELECT on both t1.a and t2.a the coalesced read works. +select a from t1 natural full join t2; +a +1 +2 +3 +# Chained join: t1.a and t2.a are granted but t3.a is not, so the +# denial is on t3.a -- the walk reached the deepest leaf. +select a from (t1 natural full join t2) natural full join t3; +ERROR 42000: SELECT command denied to user 'nfj_user'@'localhost' for column 'a' in table 't3' +connection default; +grant select (a) on mysqltest_nfj.t3 to nfj_user@localhost; +connection nfj_con; +# Every underlying column granted now, so the chained read works. +select a from (t1 natural full join t2) natural full join t3; +a +1 +2 +3 +9 +connection default; +disconnect nfj_con; +# The same check covers views, and across view kinds. A view on the +# right side of a FULL JOIN is rejected as a non-base table, so the +# executable shape is a view on the left. Reading the coalesced +# column then requires the column grant on the view, however the view +# is materialized. +# +# v_merge mergeable view +# v_tmp temptable (materialized) view +# v_expr view whose common column is a computed expression +# v_over view defined over another view +# v_union view whose body is a UNION +create view v_merge as select a, b from t1; +create algorithm=temptable view v_tmp as select a, b from t1; +create view v_expr as select a + 0 as a, b from t1; +create view v_over as select a, b from v_merge; +create view v_union as select a, b from t1 union select a, b from t1; +# A view on the right side is rejected (run as the table owner). +select a from t2 natural full join v_merge; +ERROR HY000: FULL JOIN is only supported with base tables on the right side; 'v_merge' is not a base table +create user nfj_vuser@localhost; +grant select (a,c) on mysqltest_nfj.t2 to nfj_vuser@localhost; +grant select (b) on mysqltest_nfj.v_merge to nfj_vuser@localhost; +grant select (b) on mysqltest_nfj.v_tmp to nfj_vuser@localhost; +grant select (b) on mysqltest_nfj.v_expr to nfj_vuser@localhost; +grant select (b) on mysqltest_nfj.v_over to nfj_vuser@localhost; +grant select (b) on mysqltest_nfj.v_union to nfj_vuser@localhost; +connect nfj_vcon, localhost, nfj_vuser,, mysqltest_nfj; +# nfj_vuser holds the base t2.a but not the view's a column, so the +# coalesced a is denied on the view for every view kind. +select a from v_merge natural full join t2; +ERROR 42000: SELECT command denied to user 'nfj_vuser'@'localhost' for column 'a' in table 'v_merge' +select a from v_tmp natural full join t2; +ERROR 42000: SELECT command denied to user 'nfj_vuser'@'localhost' for column 'a' in table 'v_tmp' +select a from v_expr natural full join t2; +ERROR 42000: SELECT command denied to user 'nfj_vuser'@'localhost' for column 'a' in table 'v_expr' +select a from v_over natural full join t2; +ERROR 42000: SELECT command denied to user 'nfj_vuser'@'localhost' for column 'a' in table 'v_over' +select a from v_union natural full join t2; +ERROR 42000: SELECT command denied to user 'nfj_vuser'@'localhost' for column 'a' in table 'v_union' +connection default; +grant select (a) on mysqltest_nfj.v_merge to nfj_vuser@localhost; +grant select (a) on mysqltest_nfj.v_tmp to nfj_vuser@localhost; +grant select (a) on mysqltest_nfj.v_expr to nfj_vuser@localhost; +grant select (a) on mysqltest_nfj.v_over to nfj_vuser@localhost; +grant select (a) on mysqltest_nfj.v_union to nfj_vuser@localhost; +connection nfj_vcon; +# With the view's a column granted the coalesced read works for +# every view kind. +select a from v_merge natural full join t2; +a +1 +2 +3 +select a from v_tmp natural full join t2; +a +1 +2 +3 +select a from v_expr natural full join t2; +a +1 +2 +3 +select a from v_over natural full join t2; +a +1 +2 +3 +select a from v_union natural full join t2; +a +1 +2 +3 +connection default; +disconnect nfj_vcon; +drop user nfj_vuser@localhost; +drop view v_merge, v_tmp, v_expr, v_over, v_union; +# +# CREATE VIEW with SELECT * over a NATURAL FULL JOIN runs the "any +# privilege" column check while expanding the *. A user that holds +# the common column but lacks a side column the * expands to is +# denied, and the error names the table of the missing column. This +# check applies only during view creation, not on a plain SELECT. +# +create user nfj_star@localhost; +grant select (a) on mysqltest_nfj.t1 to nfj_star@localhost; +grant select (a,c) on mysqltest_nfj.t2 to nfj_star@localhost; +grant create view on mysqltest_nfj.* to nfj_star@localhost; +connect nfj_starcon, localhost, nfj_star,, mysqltest_nfj; +# nfj_star lacks SELECT on t1.b, a column the * expands to. +create view v_star as select * from t1 natural full join t2; +ERROR 42000: ANY command denied to user 'nfj_star'@'localhost' for table `mysqltest_nfj`.`t1` +connection default; +grant select (b) on mysqltest_nfj.t1 to nfj_star@localhost; +connection nfj_starcon; +# With t1.b granted the view is created. +create view v_star as select * from t1 natural full join t2; +connection default; +select * from v_star; +a b c +1 10 NULL +2 20 200 +3 NULL 300 +disconnect nfj_starcon; +drop view v_star; +drop user nfj_star@localhost; +drop user nfj_user@localhost; +drop database mysqltest_nfj; diff --git a/mysql-test/main/natural_full_join_grants.test b/mysql-test/main/natural_full_join_grants.test new file mode 100644 index 0000000000000..64bc47acd83de --- /dev/null +++ b/mysql-test/main/natural_full_join_grants.test @@ -0,0 +1,170 @@ +# Uses GRANT, which the embedded server does not enforce. +--source include/not_embedded.inc + +--echo # +--echo # NATURAL FULL JOIN column privileges +--echo # +--echo # The unqualified common column resolves to COALESCE of the operand +--echo # columns, so reading it requires SELECT on every underlying column +--echo # the COALESCE reads, including each leaf of a chained join. A user +--echo # holding only one side is denied. The same enforcement covers +--echo # views; the column grant required is on the view, not on its base +--echo # table. +--echo # +--echo # A database other than test is used because the default test and +--echo # test_% databases carry a wildcard grant that would mask the +--echo # column level grants under test here. +--echo # + +create database mysqltest_nfj; +use mysqltest_nfj; +create table t1 (a int, b int); +create table t2 (a int, c int); +create table t3 (a int, d int); +insert into t1 values (1,10),(2,20); +insert into t2 values (2,200),(3,300); +insert into t3 values (2,2000),(9,9000); +create user nfj_user@localhost; +grant select (a,b) on mysqltest_nfj.t1 to nfj_user@localhost; +grant select (c) on mysqltest_nfj.t2 to nfj_user@localhost; +grant select (d) on mysqltest_nfj.t3 to nfj_user@localhost; + +connect (nfj_con, localhost, nfj_user,, mysqltest_nfj); + +--echo # nfj_user lacks SELECT on t2.a, so the coalesced a is denied in +--echo # the SELECT list and in WHERE. +--error ER_COLUMNACCESS_DENIED_ERROR +select a from t1 natural full join t2; +--error ER_COLUMNACCESS_DENIED_ERROR +select b from t1 natural full join t2 where a = 3; + +--echo # A qualified reference to a granted column still works. +select t1.b from t1 natural full join t2 where t1.a = 2; + +connection default; +grant select (a) on mysqltest_nfj.t2 to nfj_user@localhost; +connection nfj_con; + +--echo # With SELECT on both t1.a and t2.a the coalesced read works. +--sorted_result +select a from t1 natural full join t2; + +--echo # Chained join: t1.a and t2.a are granted but t3.a is not, so the +--echo # denial is on t3.a -- the walk reached the deepest leaf. +--error ER_COLUMNACCESS_DENIED_ERROR +select a from (t1 natural full join t2) natural full join t3; + +connection default; +grant select (a) on mysqltest_nfj.t3 to nfj_user@localhost; +connection nfj_con; + +--echo # Every underlying column granted now, so the chained read works. +--sorted_result +select a from (t1 natural full join t2) natural full join t3; + +connection default; +disconnect nfj_con; + +--echo # The same check covers views, and across view kinds. A view on the +--echo # right side of a FULL JOIN is rejected as a non-base table, so the +--echo # executable shape is a view on the left. Reading the coalesced +--echo # column then requires the column grant on the view, however the view +--echo # is materialized. +--echo # +--echo # v_merge mergeable view +--echo # v_tmp temptable (materialized) view +--echo # v_expr view whose common column is a computed expression +--echo # v_over view defined over another view +--echo # v_union view whose body is a UNION +create view v_merge as select a, b from t1; +create algorithm=temptable view v_tmp as select a, b from t1; +create view v_expr as select a + 0 as a, b from t1; +create view v_over as select a, b from v_merge; +create view v_union as select a, b from t1 union select a, b from t1; + +--echo # A view on the right side is rejected (run as the table owner). +--error ER_FULL_JOIN_BASE_TABLES_ONLY +select a from t2 natural full join v_merge; + +create user nfj_vuser@localhost; +grant select (a,c) on mysqltest_nfj.t2 to nfj_vuser@localhost; +grant select (b) on mysqltest_nfj.v_merge to nfj_vuser@localhost; +grant select (b) on mysqltest_nfj.v_tmp to nfj_vuser@localhost; +grant select (b) on mysqltest_nfj.v_expr to nfj_vuser@localhost; +grant select (b) on mysqltest_nfj.v_over to nfj_vuser@localhost; +grant select (b) on mysqltest_nfj.v_union to nfj_vuser@localhost; +connect (nfj_vcon, localhost, nfj_vuser,, mysqltest_nfj); + +--echo # nfj_vuser holds the base t2.a but not the view's a column, so the +--echo # coalesced a is denied on the view for every view kind. +--error ER_COLUMNACCESS_DENIED_ERROR +select a from v_merge natural full join t2; +--error ER_COLUMNACCESS_DENIED_ERROR +select a from v_tmp natural full join t2; +--error ER_COLUMNACCESS_DENIED_ERROR +select a from v_expr natural full join t2; +--error ER_COLUMNACCESS_DENIED_ERROR +select a from v_over natural full join t2; +--error ER_COLUMNACCESS_DENIED_ERROR +select a from v_union natural full join t2; + +connection default; +grant select (a) on mysqltest_nfj.v_merge to nfj_vuser@localhost; +grant select (a) on mysqltest_nfj.v_tmp to nfj_vuser@localhost; +grant select (a) on mysqltest_nfj.v_expr to nfj_vuser@localhost; +grant select (a) on mysqltest_nfj.v_over to nfj_vuser@localhost; +grant select (a) on mysqltest_nfj.v_union to nfj_vuser@localhost; +connection nfj_vcon; + +--echo # With the view's a column granted the coalesced read works for +--echo # every view kind. +--sorted_result +select a from v_merge natural full join t2; +--sorted_result +select a from v_tmp natural full join t2; +--sorted_result +select a from v_expr natural full join t2; +--sorted_result +select a from v_over natural full join t2; +--sorted_result +select a from v_union natural full join t2; + +connection default; +disconnect nfj_vcon; +drop user nfj_vuser@localhost; +drop view v_merge, v_tmp, v_expr, v_over, v_union; + +--echo # +--echo # CREATE VIEW with SELECT * over a NATURAL FULL JOIN runs the "any +--echo # privilege" column check while expanding the *. A user that holds +--echo # the common column but lacks a side column the * expands to is +--echo # denied, and the error names the table of the missing column. This +--echo # check applies only during view creation, not on a plain SELECT. +--echo # +create user nfj_star@localhost; +grant select (a) on mysqltest_nfj.t1 to nfj_star@localhost; +grant select (a,c) on mysqltest_nfj.t2 to nfj_star@localhost; +grant create view on mysqltest_nfj.* to nfj_star@localhost; +connect (nfj_starcon, localhost, nfj_star,, mysqltest_nfj); + +--echo # nfj_star lacks SELECT on t1.b, a column the * expands to. +--error ER_TABLEACCESS_DENIED_ERROR +create view v_star as select * from t1 natural full join t2; + +connection default; +grant select (b) on mysqltest_nfj.t1 to nfj_star@localhost; +connection nfj_starcon; + +--echo # With t1.b granted the view is created. +create view v_star as select * from t1 natural full join t2; + +connection default; +--sorted_result +select * from v_star; + +disconnect nfj_starcon; +drop view v_star; +drop user nfj_star@localhost; + +drop user nfj_user@localhost; +drop database mysqltest_nfj; diff --git a/mysql-test/main/table_elim.result b/mysql-test/main/table_elim.result index 2677146d9bba8..92e7af9af27b0 100644 --- a/mysql-test/main/table_elim.result +++ b/mysql-test/main/table_elim.result @@ -1112,3 +1112,25 @@ DROP TABLE t1, t2; # # End of 10.11 tests # +# +# MDEV-38136 Prevent elimination of tables in a FULL OUTER JOIN +# +create table t1 (a int); +insert into t1 values (0),(1),(2),(3); +create table t2 (a int primary key, b int) +as select a, a as b from t1 where a in (1,2); +create table t3 (a int primary key, b int) +as select a, a as b from t1 where a in (1,3); +# These will not be eliminated because contains a FULL OUTER JOIN. +explain extended select t1.a from t1 full join t2 on t2.a=t1.a; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 100.00 +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1 100.00 Using where; Using index +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` full join `test`.`t2` on(`test`.`t2`.`a` = `test`.`t1`.`a` and `test`.`t1`.`a` is not null) where 1 +explain extended select t1.a from t1 full join (t2 full join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a; +ERROR HY000: FULL JOIN is only supported with base tables on the right side; '(nest_last_join)' is not a base table +explain extended select t1.a from t1 full join (t2 left join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a; +ERROR HY000: FULL JOIN is only supported with base tables on the right side; '(nest_last_join)' is not a base table +drop table t1, t2, t3; +# End of 12.3 tests diff --git a/mysql-test/main/table_elim.test b/mysql-test/main/table_elim.test index 4158d2ca5ac8c..4183543b28732 100644 --- a/mysql-test/main/table_elim.test +++ b/mysql-test/main/table_elim.test @@ -844,3 +844,26 @@ DROP TABLE t1, t2; --echo # --echo # End of 10.11 tests --echo # + +--echo # +--echo # MDEV-38136 Prevent elimination of tables in a FULL OUTER JOIN +--echo # +create table t1 (a int); +insert into t1 values (0),(1),(2),(3); + +create table t2 (a int primary key, b int) + as select a, a as b from t1 where a in (1,2); + +create table t3 (a int primary key, b int) + as select a, a as b from t1 where a in (1,3); + +--echo # These will not be eliminated because contains a FULL OUTER JOIN. +explain extended select t1.a from t1 full join t2 on t2.a=t1.a; +--error ER_FULL_JOIN_BASE_TABLES_ONLY +explain extended select t1.a from t1 full join (t2 full join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a; +--error ER_FULL_JOIN_BASE_TABLES_ONLY +explain extended select t1.a from t1 full join (t2 left join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a; + +drop table t1, t2, t3; + +--echo # End of 12.3 tests diff --git a/sql/item.cc b/sql/item.cc index e7cca88d9e7d9..05820d817c859 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -6517,7 +6517,8 @@ bool Item_field::fix_fields(THD *thd, Item **reference) thd->lex->use_only_table_context ? REPORT_ALL_ERRORS : IGNORE_EXCEPT_NON_UNIQUE, - !any_privileges, + !any_privileges && + !synthesized_join_operand, TRUE)) == not_found_field) { diff --git a/sql/item.h b/sql/item.h index d47087ff619d4..05d1ba65142e7 100644 --- a/sql/item.h +++ b/sql/item.h @@ -3807,6 +3807,22 @@ class Item_field :public Item_ident, /* field need any privileges (for VIEW creation) */ bool any_privileges; + /* + True when this Item_field is an operand of the equijoin condition + synthesized for a NATURAL or USING join (see + natural_join_eq_operand). The user never wrote this column + reference, so resolving it must not run the column level privilege + check. Non-PS execution never reaches the check because the + operand is created already fixed and setup_conds skips it. PS + execution clears the bound field on cleanup and resolves the + operand by name on every execute, which would otherwise run the + check and deny a column the statement only joins on. The + privilege on the join column is still enforced through the user's + own reference to it, and for a NATURAL FULL JOIN through + check_coalesce_column_grants. + */ + bool synthesized_join_operand= false; + private: /* Indicates whether this Item_field refers to a regular or some kind of @@ -6696,6 +6712,27 @@ class Item_direct_view_ref :public Item_direct_ref if (!view->is_inner_table_of_outer_join() || !(null_ref_table= view->get_real_join_table())) null_ref_table= NO_NULL_TABLE; + + if (view->is_inner_table_of_outer_join() && + view->contains_full_join()) + { + /* + For a derived table containing a FULL JOIN, every column + reference into the merged table share the same null_ref_table, + leading to a missing null-complement result from the right + side of the FULL JOIN. Prefer the field's underlying actual + table for null_ref_table instead of the derived table's + leftmost real table. + */ + Item *real= ref ? (*ref)->real_item() : nullptr; + if (real && real->type() == FIELD_ITEM && + ((Item_field*) real)->field && + ((Item_field*) real)->field->table) + null_ref_table= ((Item_field*) real)->field->table; + else if (TABLE *t= view->get_real_join_table()) + null_ref_table= t; + } + if (null_ref_table && null_ref_table != NO_NULL_TABLE) set_maybe_null(); } diff --git a/sql/opt_hints.cc b/sql/opt_hints.cc index 7d92f3c0adbe6..8216aac49e6ec 100644 --- a/sql/opt_hints.cc +++ b/sql/opt_hints.cc @@ -1513,6 +1513,48 @@ bool Opt_hints_qb::set_join_hint_deps(JOIN *join, nullptr, nullptr, nullptr, hint); return true; } + + /* + The runtime keeps every FULL JOIN nest table contiguous in the + join order (see sql_select.cc, the function + restrict_to_unplaced_fj_tables). Tables outside the FULL JOIN + block may sit before or after it, but never between two FULL + JOIN tables. This invariant applies uniformly to JOIN_ORDER, + JOIN_PREFIX, and JOIN_SUFFIX hints because the differences + between them live entirely in the dependencies that + get_other_dep adds above; by the time we get here those extra + dependencies have already been merged in. No total order can + satisfy both the hint and that invariant when, after + propagate_dependencies has formed the transitive closure, some + non-FULL-JOIN table T is a successor of one FULL JOIN table + and a predecessor of another, because that forces T to sit + between two FULL JOIN tables. Detect this by intersecting the + predecessors of any FULL JOIN table with the successors of any + FULL JOIN table; if the intersection is non-empty the hint is + unachievable, so ignore it with a warning. + */ + if (join->full_join_nest_tables) + { + table_map fj_preds= 0; + table_map fj_succs= 0; + for (uint i= 0; i < join->table_count; i++) + { + const JOIN_TAB *tab= &join->join_tab[i]; + const table_map tab_map= tab->table->map; + if (tab_map & join->full_join_nest_tables) + fj_preds|= tab->dependent; + else if (tab->dependent & join->full_join_nest_tables) + fj_succs|= tab_map; + } + fj_preds&= ~(join->full_join_nest_tables | join->const_table_map); + if (fj_preds & fj_succs) + { + join->restore_table_dependencies(orig_dep_array); + print_warn(join->thd, ER_WARN_CONFLICTING_HINT, hint->hint_type, + true, nullptr, nullptr, nullptr, hint); + return true; + } + } return false; } diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 63fd9333c9803..883eddce8f321 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -1216,6 +1216,11 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables, *error=0; + /* + If there's no condition at all then NULLs could end up in + the result set. However, we can disallow that with + allow_null_cond == false. + */ if (!conds && !allow_null_cond) DBUG_RETURN(0); if (!(select= new (head->in_use->mem_root) SQL_SELECT)) diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc index 812c206540f17..b73cbe7a47154 100644 --- a/sql/opt_table_elimination.cc +++ b/sql/opt_table_elimination.cc @@ -825,7 +825,8 @@ eliminate_tables_for_list(JOIN *join, List *join_list, if (tbl->nested_join) { /* This is "... LEFT JOIN (join_nest) ON cond" */ - if (eliminate_tables_for_list(join, + if (!(tbl->outer_join & JOIN_TYPE_FULL) && + eliminate_tables_for_list(join, &tbl->nested_join->join_list, tbl->nested_join->used_tables, tbl->on_expr, @@ -840,7 +841,8 @@ eliminate_tables_for_list(JOIN *join, List *join_list, else { /* This is "... LEFT JOIN tbl ON cond" */ - if (!(tbl->table->map & outside_used_tables) && + if (!(tbl->outer_join & JOIN_TYPE_FULL) && + !(tbl->table->map & outside_used_tables) && check_func_dependency(join, tbl->table->map, NULL, tbl, tbl->on_expr)) { @@ -851,13 +853,31 @@ eliminate_tables_for_list(JOIN *join, List *join_list, } tables_used_on_left |= tbl->on_expr->used_tables(); } - else + else if (tbl->sj_on_expr) { - DBUG_ASSERT(!tbl->nested_join || tbl->sj_on_expr); //psergey-todo: is the following really correct or we'll need to descend - //down all ON clauses: ? - if (tbl->sj_on_expr) - tables_used_on_left |= tbl->sj_on_expr->used_tables(); + //down all ON clauses: ? + tables_used_on_left |= tbl->sj_on_expr->used_tables(); + } + else if (tbl->nested_join) + { + /* + simplify_joins preserves a nested join with neither on_expr + nor sj_on_expr when the nest contains FULL JOIN tables. + Flattening that nest would let the optimizer interleave + outside tables between FULL JOIN tables, which the + null-complement algorithm cannot handle. Recurse into the + nest to attempt elimination of any inner outer joins, but pass + on_expr=NULL so the nest itself is never considered for + elimination. + */ + table_map outside_used_tables= tables_used_elsewhere | + tables_used_on_left; + eliminate_tables_for_list(join, &tbl->nested_join->join_list, + tbl->nested_join->used_tables, NULL, + outside_used_tables, + trace_eliminate_tables); + all_eliminated= FALSE; } } @@ -2105,7 +2125,8 @@ void Dep_analysis_context::dbug_print_deps() char buf[128]; String str(buf, sizeof(buf), &my_charset_bin); str.length(0); - eq_mod->expr->print(&str, QT_ORDINARY); + if (eq_mod->expr) + eq_mod->expr->print(&str, QT_ORDINARY); if (eq_mod->field) { fprintf(DBUG_FILE, " equality%ld: %s -> %s.%s\n", diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index ef4fd75fc205b..9e9d05081e714 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -12406,3 +12406,7 @@ ER_WARN_QB_NAME_PATH_VIEW_NOT_FOUND eng "Hint %s is ignored. `%s` required at element #%u of the path is not found in the target query block." ER_WARN_QB_NAME_PATH_NOT_SUPPORTED_INSIDE_VIEW eng "Hint %s is ignored. QB_NAME hints with path are not supported inside view definitions." +ER_FULL_JOIN_BASE_TABLES_ONLY + eng "FULL JOIN is only supported with base tables on the right side; '%s' is not a base table" +ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN + eng "FULL JOIN is not allowed on the inner side of a LEFT or RIGHT JOIN" diff --git a/sql/sql_base.cc b/sql/sql_base.cc index d1d876985f81a..bb74336742501 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -6535,6 +6535,56 @@ find_field_in_natural_join(THD *thd, TABLE_LIST *table_ref, *ref= item; found_field= (Field*) view_ref_found; } + else if (nj_col->natural_full_join_field) + { + /* + NATURAL FULL JOIN common column over base tables. Route the + unqualified reference to the COALESCE(left_col, right_col) item + built by coalesce_natural_full_join so WHERE, HAVING, and outer + ON clauses see the same merged value the SELECT list shows. + + Return the natural-join's own TABLE_LIST as the actual_table. Its + table pointer is NULL (nested join), so the prepared-statement + cache shortcut in find_field_in_tables falls back to + find_field_in_table_ref, which re-enters this routine and applies + the COALESCE substitution on every execute. The privilege check + in find_field_in_table_ref is skipped for nested-join actual_tables; + privileges on the underlying base-table columns are checked when + the Item_field children of the COALESCE are fixed. + */ + Item *item= nj_col->natural_full_join_field; + if (item->fix_fields_if_needed(thd, + (Item **) &nj_col->natural_full_join_field)) + DBUG_RETURN(NULL); + item= nj_col->natural_full_join_field; + if (*ref && (*ref)->is_explicit_name()) + { + /* + The reference carries its own alias. The COALESCE on + natural_full_join_field is shared by every reference to this column + and by Natural_join_column::name, which derives the column's name + from it. Renaming that shared item would change the column's name + for name resolution, so a later reference to the column by its + original name would not resolve, and other references would show the + alias. Build a fresh COALESCE over the same two operands and name + that one instead, leaving the shared item untouched. + */ + Item_func_coalesce *shared= nj_col->natural_full_join_field; + DBUG_ASSERT(shared->argument_count() == 2); + item= new (thd->mem_root) Item_func_coalesce(thd, + shared->arguments()[0], + shared->arguments()[1]); + if (!item || item->fix_fields_if_needed(thd, &item)) + DBUG_RETURN(nullptr); + item->set_name(thd, (*ref)->name); + } + if (register_tree_change) + thd->change_item_tree(ref, item); + else + *ref= item; + *actual_table= table_ref; + DBUG_RETURN((Field*) view_ref_found); + } else { /* This is a base table. */ @@ -6559,7 +6609,7 @@ find_field_in_natural_join(THD *thd, TABLE_LIST *table_ref, } *actual_table= nj_col->table_ref; - + DBUG_RETURN(found_field); } @@ -6637,6 +6687,50 @@ find_field_in_table(THD *thd, TABLE *table, const Lex_ident_column &name, } +#ifndef NO_EMBEDDED_ACCESS_CHECKS +/* + Check column-level privileges on every column read by the COALESCE that + stands in for a NATURAL FULL JOIN common column. + + An unqualified reference to a NATURAL FULL JOIN common column resolves to + COALESCE(left_col, right_col), nested for a chain, so the value it returns + depends on each underlying column. Each one must satisfy the same column + grant a written reference would. check_column_grant_in_table_ref cannot run + on the natural join's own TABLE_LIST (it has no base table), so route each + leaf Item_field through it using the table reference the field belongs to. + That table reference is a base table or a view, and + check_column_grant_in_table_ref handles both, so view column grants are + enforced the same way. Nested COALESCEs are walked so a chained join checks + every leaf. + + Returns true when access to any underlying column is denied. +*/ + +static bool check_coalesce_column_grants(THD *thd, Item *item) +{ + if (!item) + return false; + if (item->type() == Item::FIELD_ITEM) + { + Field *field= ((Item_field*) item)->field; + if (field && field->table && field->table->pos_in_table_list) + return check_column_grant_in_table_ref(thd, + field->table->pos_in_table_list, + field->field_name, field); + return false; + } + if (item->type() == Item::FUNC_ITEM) + { + Item_func *func= (Item_func*) item; + for (uint i= 0; i < func->argument_count(); i++) + if (check_coalesce_column_grants(thd, func->arguments()[i])) + return true; + } + return false; +} +#endif /* NO_EMBEDDED_ACCESS_CHECKS */ + + /* Find field in a table reference. @@ -6808,10 +6902,18 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list, if (fld) { #ifndef NO_EMBEDDED_ACCESS_CHECKS - /* Check if there are sufficient access rights to the found field. */ + /* + Check if there are sufficient access rights to the found field. + When actual_table is a nested-join TABLE_LIST the field is a NATURAL + FULL JOIN COALESCE substitution; check_column_grant_in_table_ref cannot + run on it because its table pointer is NULL, so check the column grant + on each base-table column the COALESCE reads instead. + */ if (check_privileges && !table_list->is_derived() && - check_column_grant_in_table_ref(thd, *actual_table, name, fld)) + ((*actual_table)->nested_join ? + check_coalesce_column_grants(thd, *ref) : + check_column_grant_in_table_ref(thd, *actual_table, name, fld))) fld= WRONG_GRANT; else #endif @@ -7478,6 +7580,54 @@ set_new_item_local_context(THD *thd, Item_ident *item, TABLE_LIST *table_ref) } +/* + Build one operand of the synthesized equijoin condition for a common + column of a NATURAL or USING join. + + For an ordinary common column the operand is a freshly created + Item_ident hooked to a name resolution context that resolves it + within nj_col's table. + + When nj_col carries a natural_full_join_field the operand of the + enclosing join is itself a NATURAL FULL JOIN, so the column's value + is the COALESCE built for that join. Return that COALESCE directly + so the equality matches on the coalesced value instead of the left + side "raw" column, which is NULL on rows that came only from the + right side of the join. +*/ + +static Item *natural_join_eq_operand(THD *thd, Natural_join_column *nj_col) +{ + if (nj_col->natural_full_join_field) + return nj_col->natural_full_join_field; + + Item *item= nj_col->create_item(thd); + if (!item) + return nullptr; + + /* + With no_wrap_view_item == 0 create_item returns a sub-class of Item_ident. + */ + DBUG_ASSERT(!thd->lex->current_select->no_wrap_view_item); + DBUG_ASSERT(item->type() == Item::FIELD_ITEM || + item->type() == Item::REF_ITEM); + + if (set_new_item_local_context(thd, (Item_ident*) item, nj_col->table_ref)) + return nullptr; + + /* + A base table operand is the table's own Item_field. Mark it so its + column grant is not checked when the condition is resolved, keeping + prepared statement execution consistent with conventional execution. + See Item_field::synthesized_join_operand. + */ + if (item->type() == Item::FIELD_ITEM) + ((Item_field*) item)->synthesized_join_operand= true; + + return item; +} + + /* Find and mark the common columns of two table references. @@ -7640,48 +7790,20 @@ mark_common_columns(THD *thd, TABLE_LIST *table_ref_1, TABLE_LIST *table_ref_2, */ /* - Create non-fixed fully qualified field and let fix_fields to - resolve it. + Build the two operands of the synthesized equijoin condition. An + operand whose table reference is a NATURAL FULL JOIN keeps its + natural_full_join_field, so the equality is on the inner COALESCE and a + chain such as (t1 natural full join t2) natural full join t3 matches on + the coalesced value rather than the "raw" left side column. */ - Item *item_1= nj_col_1->create_item(thd); - Item *item_2= nj_col_2->create_item(thd); - Item_ident *item_ident_1, *item_ident_2; + Item *item_1= natural_join_eq_operand(thd, nj_col_1); + Item *item_2= natural_join_eq_operand(thd, nj_col_2); Item_func_eq *eq_cond; if (!item_1 || !item_2) goto err; // out of memory - /* - The following assert checks that the two created items are of - type Item_ident. - */ - DBUG_ASSERT(!thd->lex->current_select->no_wrap_view_item); - /* - In the case of no_wrap_view_item == 0, the created items must be - of sub-classes of Item_ident. - */ - DBUG_ASSERT(item_1->type() == Item::FIELD_ITEM || - item_1->type() == Item::REF_ITEM); - DBUG_ASSERT(item_2->type() == Item::FIELD_ITEM || - item_2->type() == Item::REF_ITEM); - - /* - We need to cast item_1,2 to Item_ident, because we need to hook name - resolution contexts specific to each item. - */ - item_ident_1= (Item_ident*) item_1; - item_ident_2= (Item_ident*) item_2; - /* - Create and hook special name resolution contexts to each item in the - new join condition . We need this to both speed-up subsequent name - resolution of these items, and to enable proper name resolution of - the items during the execute phase of PS. - */ - if (set_new_item_local_context(thd, item_ident_1, nj_col_1->table_ref) || - set_new_item_local_context(thd, item_ident_2, nj_col_2->table_ref)) - goto err; - - if (!(eq_cond= new (thd->mem_root) Item_func_eq(thd, item_ident_1, item_ident_2))) + if (!(eq_cond= new (thd->mem_root) Item_func_eq(thd, item_1, item_2))) goto err; /* Out of memory. */ /* @@ -7727,6 +7849,67 @@ mark_common_columns(THD *thd, TABLE_LIST *table_ref_1, TABLE_LIST *table_ref_2, } +/* + For some pair of tables (t1, t2) such that + t1 NATURAL FULL JOIN t2 + generate a set of output columns + COALESCE(t1.x_1, t2.y_1), ..., COALESCE(t1.x_n, t2.y_n) + such that NULL results won't appear in the NATURAL FULL JOIN. + + @parma thd the current thread + @param left_join_columns common columns originating in t1 + @param right_join_columns common columns originating in t2 + */ +void coalesce_natural_full_join(THD *thd, + List *left_join_columns, + List *right_join_columns) +{ + /* + It's a NATURAL JOIN so the number of columns from the left table better + match the number from the right table. + */ + DBUG_ASSERT(left_join_columns->elements == right_join_columns->elements); + + /* + Walk the left table and right table columns in lock-step, creating a + new COALESCE() over each pair of columns. The calling function relies + on the state of left_join_columns, so set the COALESCE() item instance + on members of that list. + */ + List_iterator left(*left_join_columns); + List_iterator right(*right_join_columns); + Natural_join_column *left_col= left++; + Natural_join_column *right_col= right++; + while (!left.at_end() && !right.at_end()) + { + /* + When an operand is itself a NATURAL FULL JOIN its common column is the + COALESCE built for that join. Nest it so a chain such as + (t1 natural full join t2) natural full join t3 + yields + COALESCE(COALESCE(t1.x, t2.x), t3.x) + */ + Item *left_field= left_col->natural_full_join_field ? + (Item*) left_col->natural_full_join_field : + left_col->get_item(); + Item *right_field= right_col->natural_full_join_field ? + (Item*) right_col->natural_full_join_field : + right_col->get_item(); + Item_func_coalesce *coal= new (thd->mem_root) Item_func_coalesce + (thd, left_field, right_field); + DBUG_ASSERT(coal); + + // Makes the field `COALESCE(left, right) AS left`. + coal->set_name(thd, left_field->name); + + // Save the result into the set of left_join_columns. + left_col->natural_full_join_field= coal; + + left_col= left++; + right_col= right++; + } +} + /* Materialize and store the row type of NATURAL/USING join. @@ -7775,7 +7958,8 @@ store_natural_using_join_columns(THD *thd, TABLE_LIST *natural_using_join, Query_arena *arena, backup; bool result= TRUE; List *non_join_columns; - List *join_columns; + List *left_join_columns; + List *right_join_columns; DBUG_ENTER("store_natural_using_join_columns"); DBUG_ASSERT(!natural_using_join->join_columns); @@ -7783,7 +7967,8 @@ store_natural_using_join_columns(THD *thd, TABLE_LIST *natural_using_join, arena= thd->activate_stmt_arena_if_needed(&backup); if (!(non_join_columns= new List) || - !(join_columns= new List)) + !(left_join_columns= new List) || + !(right_join_columns= new List)) goto err; /* Append the columns of the first join operand. */ @@ -7792,7 +7977,7 @@ store_natural_using_join_columns(THD *thd, TABLE_LIST *natural_using_join, nj_col_1= it_1.get_natural_column_ref(); if (nj_col_1->is_common) { - join_columns->push_back(nj_col_1, thd->mem_root); + left_join_columns->push_back(nj_col_1, thd->mem_root); /* Reset the common columns for the next call to mark_common_columns. */ nj_col_1->is_common= FALSE; } @@ -7812,7 +7997,7 @@ store_natural_using_join_columns(THD *thd, TABLE_LIST *natural_using_join, while ((using_field_name= using_fields_it++)) { List_iterator_fast - it(*join_columns); + it(*left_join_columns); Natural_join_column *common_field; for (;;) @@ -7839,14 +8024,24 @@ store_natural_using_join_columns(THD *thd, TABLE_LIST *natural_using_join, non_join_columns->push_back(nj_col_2, thd->mem_root); else { + right_join_columns->push_back(nj_col_2, thd->mem_root); + /* Reset the common columns for the next call to mark_common_columns. */ nj_col_2->is_common= FALSE; } } + /* + If this is a NATURAL FULL JOIN, then create a COALESCE() for each pair of + columns from the two joined tables. + */ + if ((table_ref_1->outer_join & JOIN_TYPE_FULL) && + (table_ref_2->outer_join & JOIN_TYPE_FULL)) + coalesce_natural_full_join(thd, left_join_columns, right_join_columns); + if (non_join_columns->elements > 0) - join_columns->append(non_join_columns); - natural_using_join->join_columns= join_columns; + left_join_columns->append(non_join_columns); + natural_using_join->join_columns= left_join_columns; natural_using_join->is_join_columns_complete= TRUE; result= FALSE; @@ -7937,7 +8132,8 @@ store_top_level_join_columns(THD *thd, TABLE_LIST *table_ref, swapped in the first loop. */ if (same_level_left_neighbor && - cur_table_ref->outer_join & JOIN_TYPE_RIGHT) + (cur_table_ref->outer_join & JOIN_TYPE_RIGHT) && + !(cur_table_ref->outer_join & JOIN_TYPE_FULL)) { /* This can happen only for JOIN ... ON. */ DBUG_ASSERT(table_ref->nested_join->join_list.elements == 2); @@ -8898,23 +9094,38 @@ insert_fields(THD *thd, Name_resolution_context *context, { DBUG_ASSERT((tables->field_translation == NULL && table) || tables->is_natural_join); - DBUG_ASSERT(item->type() == Item::FIELD_ITEM); - Item_field *fld= (Item_field*) item; - const char *field_db_name= field_iterator.get_db_name().str; - const char *field_table_name= field_iterator.get_table_name().str; - - if (!tables->schema_table && - !(fld->have_privileges= - (get_column_grant(thd, field_iterator.grant(), - field_db_name, - field_table_name, fld->field_name) & - VIEW_ANY_ACL))) + if (item->type() == Item::FIELD_ITEM) { - my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0), "ANY", - thd->security_ctx->priv_user, - thd->security_ctx->host_or_ip, - field_db_name, field_table_name); - DBUG_RETURN(TRUE); + Item_field *fld= (Item_field*) item; + const char *field_db_name= field_iterator.get_db_name().str; + const char *field_table_name= field_iterator.get_table_name().str; + + if (!tables->schema_table && + !(fld->have_privileges= + (get_column_grant(thd, field_iterator.grant(), + field_db_name, + field_table_name, fld->field_name) & + VIEW_ANY_ACL))) + { + my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0), "ANY", + thd->security_ctx->priv_user, + thd->security_ctx->host_or_ip, + field_db_name, field_table_name); + DBUG_RETURN(TRUE); + } + } + else + { + /* + For NATURAL FULL JOIN, common columns are represented + as COALESCE expressions rather than plain Item_field. The + per-column privilege check is skipped because the underlying + fields already had their privileges verified during name + resolution. Assert that this is indeed a FULL JOIN context. + */ + DBUG_ASSERT(tables->is_natural_join && tables->nested_join && + (tables->nested_join->join_list.head()->outer_join & + JOIN_TYPE_FULL)); } } #endif diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index beda9a6b03fc3..6e5045dcc532f 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1346,6 +1346,7 @@ void LEX::start(THD *thd_arg) memset(&trg_chistics, 0, sizeof(trg_chistics)); selects_for_hint_resolution.empty(); + full_join_count= 0; DBUG_VOID_RETURN; } @@ -4955,11 +4956,26 @@ static void fix_prepare_info_in_table_list(THD *thd, TABLE_LIST *tbl) { for (; tbl; tbl= tbl->next_local) { - if (tbl->on_expr && !tbl->prep_on_expr) + /* + Walk up the nested JOINs so that upper-level ON expressions also + get saved into their respective prep_on_expr's. + + We must do this to support PS: + prepare st from + 'select ... from t1 full join t2 on t1.a = t2.a'; + */ + TABLE_LIST *embedding= tbl; + do { - thd->check_and_register_item_tree(&tbl->prep_on_expr, &tbl->on_expr); - tbl->on_expr= tbl->on_expr->copy_andor_structure(thd); + if (embedding->on_expr && !embedding->prep_on_expr) + { + thd->check_and_register_item_tree(&embedding->prep_on_expr, + &embedding->on_expr); + embedding->on_expr= embedding->on_expr->copy_andor_structure(thd); + } } + while ((embedding= embedding->embedding)); + if (tbl->is_view_or_derived() && tbl->is_merged_derived()) { SELECT_LEX *sel= tbl->get_single_select(); diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 2e61fc1feb550..4ad54d46b4e1c 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -3531,6 +3531,9 @@ struct LEX: public Query_tables_list uint8 context_analysis_only; uint8 lex_options; // see OPTION_LEX_* + /* Zero by default, this counts the number of FULL JOINs in the query. */ + uint16 full_join_count; + Alter_info alter_info; Lex_prepared_stmt prepared_stmt; /* diff --git a/sql/sql_list.h b/sql/sql_list.h index 77fd69402e9bf..c53c8da283d66 100644 --- a/sql/sql_list.h +++ b/sql/sql_list.h @@ -432,6 +432,10 @@ class base_list_iterator { return (*el)->info; } + inline void *peek_ref() + { + return &(*el)->info; + } inline void *next_fast(void) { list_node *tmp; @@ -613,6 +617,34 @@ template class List_iterator :public base_list_iterator inline void remove() { base_list_iterator::remove(); } inline void after(T *a) { base_list_iterator::after(a); } inline T** ref(void) { return (T**) base_list_iterator::ref(); } + inline T** peek_ref() { return (T**) base_list_iterator::peek_ref(); } + + /* + Swap the current element with the next one in the list. + + If this iterator points to no element or to the last element, then this + method does nothing and returns nullptr. + + If this iter points to B in the following list + A, B, C, D, ... + then after this method returns, the list will be + A, C, B, D, ... + and this method returns C. This iter will point to the same location + in the list after this method returns as it did before, but the element at + that location will be C instead of B. + + Other iterators pointing to the same list remain valid and will see the + updated list order. + */ + T* swap_next() + { + if (!ref() || !*ref() || !peek()) + return nullptr; + T* next= peek(); + T* cur= replace(next); + *peek_ref()= cur; + return next; + } }; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index bcd6de564f550..b16d5d225fa9a 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -8642,7 +8642,8 @@ bool st_select_lex::add_cross_joined_table(TABLE_LIST *left_op, left_op->first_leaf_for_name_resolution(); } - if (!(tbl->outer_join & JOIN_TYPE_RIGHT)) + if (!(tbl->outer_join & JOIN_TYPE_RIGHT) || + (tbl->outer_join & JOIN_TYPE_FULL)) { pair_tbl= tbl; tbl= li++; @@ -8659,6 +8660,15 @@ bool st_select_lex::add_cross_joined_table(TABLE_LIST *left_op, cj_nest->on_expr= tbl->on_expr; cj_nest->embedding= tbl->embedding; cj_nest->join_list= jl; + /* + Transfer FULL JOIN state to the new nest: tbl is being replaced + by cj_nest in the join tree, so the partner pointer must follow + (and the partner's back-pointer must be retargeted). + */ + cj_nest->foj_partner= tbl->foj_partner; + cj_nest->on_context= tbl->on_context; + if (cj_nest->foj_partner) + cj_nest->foj_partner->foj_partner= cj_nest; cj_nest->alias.str= "(nest_last_join)"; cj_nest->alias.length= sizeof("(nest_last_join)")-1; li.replace(cj_nest); @@ -8684,6 +8694,8 @@ bool st_select_lex::add_cross_joined_table(TABLE_LIST *left_op, tbl->on_expr= 0; tbl->straight= straight_fl; tbl->natural_join= 0; + tbl->foj_partner= 0; + tbl->on_context= 0; tbl->embedding= cj_nest; tbl->join_list= cjl; @@ -8737,12 +8749,10 @@ bool st_select_lex::add_cross_joined_table(TABLE_LIST *left_op, TABLE_LIST *st_select_lex::convert_right_join() { - TABLE_LIST *tab2= join_list->pop(); - TABLE_LIST *tab1= join_list->pop(); DBUG_ENTER("convert_right_join"); - - join_list->push_front(tab2, parent_lex->thd->mem_root); - join_list->push_front(tab1, parent_lex->thd->mem_root); + List_iterator li(*join_list); + li++; // points iterator at first element and returns it + TABLE_LIST* tab1= li.swap_next(); tab1->outer_join|= JOIN_TYPE_RIGHT; DBUG_RETURN(tab1); @@ -9007,9 +9017,7 @@ Item *normalize_cond(THD *thd, Item *cond) /** - Add an ON condition to the second operand of a JOIN ... ON. - - Add an ON condition to the right operand of a JOIN ... ON clause. + Add an ON condition to the second (right) operand of a JOIN ... ON. @param b the second operand of a JOIN ... ON @param expr the condition to be added to the ON clause @@ -9037,6 +9045,7 @@ void add_join_on(THD *thd, TABLE_LIST *b, Item *expr) b->on_expr= new (thd->mem_root) Item_cond_and(thd, b->on_expr,expr); } b->on_expr->top_level_item(); + b->on_expr->base_flags|= item_base_t::IS_COND; } } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 9c2a99676d798..d4083ce58dc2a 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -201,7 +201,9 @@ static COND* substitute_for_best_equal_field(THD *thd, JOIN_TAB *context_tab, void *table_join_idx, bool do_substitution); static COND *simplify_joins(JOIN *join, List *join_list, - COND *conds, bool top, bool in_sj); + COND *conds, bool in_sj); +bool check_full_join_base_tables(List *join_list); +static bool check_full_join_after_simplify(List *join_list); static bool check_interleaving_with_nj(JOIN_TAB *next); static void restore_prev_nj_state(JOIN_TAB *last); static uint reset_nj_counters(JOIN *join, List *join_list); @@ -452,6 +454,134 @@ bool dbug_user_var_equals_str(THD *thd, const char *name, const char* value) } #endif /* DBUG_OFF */ + +/* + Duplicate Row Filter for FULL JOINs. + + During the first (LEFT JOIN) pass of a FULL JOIN, the filter records + the rowids of right-side rows that were matched. During the second + (null-complement) pass, the filter is consulted to skip rows that + were already emitted, so that only unmatched right-side rows produce + NULL-complemented output. + + Saved rowids are consulted at the end of each 'outer' JOIN_TAB's + execution to generate null-complements for the partial join (aka + join prefix). + + Internally this reuses the semi-join weedout infrastructure + (SJ_TMP_TABLE). +*/ +class full_join_duplicate_filter : public Sql_alloc +{ + // Weedout temp table that stores seen rowids. + SJ_TMP_TABLE tbl; + +public: + /* + Allocate and populate the weedout temp table for the right side of + a FULL JOIN. Builds an SJ_TMP_TABLE whose record is the right + table's rowid. Returns true on error. + */ + bool init(THD *thd, JOIN_TAB *right_tab) + { + DBUG_ASSERT(thd); + DBUG_ASSERT(right_tab); + + tbl.tmp_table= NULL; + tbl.is_degenerate= false; + tbl.have_degenerate_row= false; + tbl.next_flush_table= nullptr; + + if (!(tbl.tabs= thd->alloc(1))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATAL)); + return true; + } + + uint jt_rowid_offset= 0; + uint jt_null_bits= 0; + + tbl.tabs[0].join_tab= right_tab; + tbl.tabs[0].rowid_offset= jt_rowid_offset; + jt_rowid_offset+= right_tab->table->file->ref_length; + if (right_tab->table->maybe_null) + { + tbl.tabs[0].null_byte= jt_null_bits / 8; + tbl.tabs[0].null_bit= jt_null_bits++; + } + + tbl.tabs_end= tbl.tabs + 1; + tbl.rowid_len= jt_rowid_offset; + tbl.null_bits= jt_null_bits; + tbl.null_bytes= (jt_null_bits + 7) / 8; + + right_tab->table->prepare_for_position(); + right_tab->keep_current_rowid= TRUE; + + if (tbl.create_sj_weedout_tmp_table(thd)) + return true; + return false; + } + + /* + Record the current right-side rowid during the first (LEFT JOIN) + pass. Duplicate-key errors are silently ignored because, during + the first pass, we only need to remember that the rowid was seen at + least once. Returns 0 on success, 1 on error. + */ + int remember_rowids(THD *thd) + { + DBUG_ASSERT(thd); + int res= tbl.sj_weedout_check_row(thd); + if (res == -1) + return 1; + return 0; + } + + /* + Check whether the current right-side rowid was already emitted. + Called during the second (null-complement) pass: if the rowid is + already in the temp table, sets *is_duplicate so the caller can + skip emitting a NULL-complemented row for a right-side row that + was already matched. Returns 0 on success, 1 on error. + */ + int check_rowids(THD *thd, bool *is_duplicate) + { + DBUG_ASSERT(thd); + DBUG_ASSERT(is_duplicate); + int res= tbl.sj_weedout_check_row(thd); + if (res == -1) + return 1; + *is_duplicate= (res == 1); + return 0; + } + + /* + Delete all recorded rows but keep the temp table allocated + so it can be reused. + */ + void reset() + { + tbl.sj_weedout_delete_rows(); + } + + /* + Delete all recorded rows and free the weedout temp table. Must + be called after FULL JOIN execution is complete. + */ + void cleanup(THD *thd) + { + tbl.sj_weedout_delete_rows(); + if (tbl.tmp_table) + { + tbl.tmp_table->file->ha_index_or_rnd_end(); + free_tmp_table(thd, tbl.tmp_table); + tbl.tmp_table= NULL; + } + } +}; + + /* Intialize POSITION structure. */ @@ -2340,6 +2470,9 @@ JOIN::optimize_inner() } #endif + if (check_full_join_base_tables(join_list)) + DBUG_RETURN(1); + SELECT_LEX *sel= select_lex; if (sel->first_cond_optimization) { @@ -2359,7 +2492,22 @@ JOIN::optimize_inner() sel->first_cond_optimization= 0; /* Convert all outer joins to inner joins if possible */ - conds= simplify_joins(this, join_list, conds, TRUE, FALSE); + conds= simplify_joins(this, join_list, conds, FALSE); + + /* + simplify_joins may rewrite a FULL JOIN to a one-sided join or swap its + operands, so shapes that depend on the post-rewrite structure are + checked here, after the existing pre-rewrite check_full_join_base_tables + call. check_full_join_after_simplify rejects a surviving FULL JOIN whose + right operand is still a nested join, which the swap could not reduce to + a base table. + */ + if (check_full_join_after_simplify(join_list)) + { + if (arena) + thd->restore_active_arena(arena, &backup); + DBUG_RETURN(1); + } add_table_function_dependencies(join_list, table_map(-1), &error); @@ -5602,30 +5750,58 @@ void mark_join_nest_as_const(JOIN *join, @detail Figure out which condition we can use: - - For INNER JOIN, we use the WHERE, - - "t1 LEFT JOIN t2 ON ..." uses t2's ON expression + - For INNER JOIN, we use the WHERE. + - "t1 LEFT JOIN t2 ON ..." uses t2's ON expression. - "t1 LEFT JOIN (...) ON ..." uses the join nest's ON expression. + - "t1 FULL OUTER JOIN t2 ON ..." uses the ON expression. + - "t1 FULL OUTER (...) ON ..." uses the join nest's ON expression. */ static Item **get_sargable_cond(JOIN *join, TABLE *table) { - Item **retval; - if (table->pos_in_table_list->on_expr) + Item **retval= nullptr; + TABLE_LIST *sql_table= table->pos_in_table_list; + + if (sql_table->outer_join & JOIN_TYPE_FULL) + { + /* + 1. FULL OUTER JOIN requires an ON condition, so someone must have it + 2. Disregard the WHERE clause at this point, using only the ON + condition because we don't want to range analysis to + accidentally turn the FULL JOIN into an INNER JOIN. + 3. The ON condition holds for both tables so if we don't find it + associated with one table, then look it on the partner table. + */ + if (sql_table->on_expr) + return &sql_table->on_expr; + + TABLE_LIST *foj_partner= sql_table->foj_partner; + DBUG_ASSERT(foj_partner->outer_join & JOIN_TYPE_FULL); + if (foj_partner->on_expr) + return &foj_partner->on_expr; + + /* + We cannot end up here, otherwise the ON condition for the FULL + OUTER JOIN was lost. + */ + DBUG_ASSERT(false); + } + else if (sql_table->on_expr) { /* This is an inner table from a single-table LEFT JOIN, "t1 LEFT JOIN t2 ON cond". Use the condition cond. */ - retval= &table->pos_in_table_list->on_expr; + retval= &sql_table->on_expr; } - else if (table->pos_in_table_list->embedding && - !table->pos_in_table_list->embedding->sj_on_expr) + else if (sql_table->embedding && + !sql_table->embedding->sj_on_expr) { /* This is the inner side of a multi-table outer join. Use the - appropriate ON expression. + ON expression from the nested join containing the table. */ - retval= &(table->pos_in_table_list->embedding->on_expr); + retval= &(sql_table->embedding->on_expr); } else { @@ -5658,7 +5834,16 @@ make_join_statistics(JOIN *join, List &tables_list, TABLE **table_vector; JOIN_TAB *stat,*stat_end,*s,**stat_ref, **stat_vector; KEYUSE *keyuse,*start_keyuse; + + /* + outer_join here does not have the same meaning as TABLE_LIST::outer_join. + Here, outer_join is the union of all table numbers representing tables + that participate in this join. TABLE_LIST::outer_join marks how a + TABLE_LIST participates in a particular JOIN (as a right table, left table, + as part of a FULL JOIN, etc). + */ table_map outer_join=0; + table_map no_rows_const_tables= 0; SARGABLE_PARAM *sargables= 0; List_iterator ti(tables_list); @@ -5760,7 +5945,14 @@ make_join_statistics(JOIN *join, List &tables_list, if (*s->on_expr_ref) { /* s is the only inner table of an outer join */ - if (!table->is_filled_at_execution() && + /* + Const-folding site A: an empty inner table of an outer join. It + can only contribute null rows, so it is read once as a const + table. FULL JOIN tables are excluded because their unmatched + rows still need the null-complement pass. + */ + if (!(tables->outer_join & JOIN_TYPE_FULL) && + !table->is_filled_at_execution() && ((!table->file->stats.records && (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) || all_partitions_pruned_away) && !embedding) @@ -5773,7 +5965,18 @@ make_join_statistics(JOIN *join, List &tables_list, outer_join|= table->map; s->embedding_map= 0; for (;embedding; embedding= embedding->embedding) + { s->embedding_map|= embedding->nested_join->nj_map; + /* + A FULL JOIN with a constant ON expression does not encode + the outer table dependency in the on_expr because the + constant references no tables. The enclosing nest's + dep_tables, however, still carries that dependency, so + propagate it here. + */ + if (embedding->contains_full_join() && !embedding->sj_on_expr) + s->dependent|= embedding->dep_tables; + } continue; } if (embedding) @@ -5803,13 +6006,20 @@ make_join_statistics(JOIN *join, List &tables_list, if (inside_an_outer_join) continue; } + /* + Const-folding site B: a table with at most one row, a system table + or an exact zero or one row count, that no other table depends on. + Its single row is read once as a const table. FULL JOIN tables are + excluded. + */ if (!table->is_filled_at_execution() && (table->s->system || (table->file->stats.records <= 1 && (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) || all_partitions_pruned_away) && !s->dependent && - !table->fulltext_searched && !join->no_const_tables) + !table->fulltext_searched && !join->no_const_tables && + !(tables->outer_join & JOIN_TYPE_FULL)) { set_position(join,const_count++,s,(KEYUSE*) 0); no_rows_const_tables |= table->map; @@ -5872,6 +6082,7 @@ make_join_statistics(JOIN *join, List &tables_list, join->const_table_map= no_rows_const_tables; join->const_tables= const_count; eliminate_tables(join); + join->const_table_map &= ~no_rows_const_tables; const_count= join->const_tables; found_const_table_map= join->const_table_map; @@ -5919,13 +6130,18 @@ make_join_statistics(JOIN *join, List &tables_list, if (table->is_filled_at_execution()) continue; - /* - If equi-join condition by a key is null rejecting and after a - substitution of a const table the key value happens to be null - then we can state that there are no matches for this equi-join. - */ + /* + Const-folding site C: a null-rejecting equijoin key on the + single inner table of an outer join. If (1) an equijoin + condition by a key is null rejecting and (2) after a + substitution of a const table the key value happens to be + null, then we can state that there are no matches for this + equijoin, so the inner table is null-complemented and read as + a const table. FULL JOIN tables are excluded. + */ if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map && - !(table->map & join->eliminated_tables)) + !(table->map & join->eliminated_tables) && + !(s->tab_list->outer_join & JOIN_TYPE_FULL)) { /* When performing an outer join operation if there are no matching rows @@ -5960,11 +6176,17 @@ make_join_statistics(JOIN *join, List &tables_list, // All dep. must be constants if (s->dependent & ~(found_const_table_map)) continue; + /* + Const-folding site D: a dependent table whose dependencies are all + const and which has at most one row. It is read once as a system + const table. FULL JOIN tables are excluded. + */ if (table->file->stats.records <= 1L && (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && !table->pos_in_table_list->embedding && - !((outer_join & table->map) && - (*s->on_expr_ref)->is_expensive())) + !((outer_join & table->map) && + (*s->on_expr_ref)->is_expensive()) && + !(s->tab_list->outer_join & JOIN_TYPE_FULL)) { // system table int tmp= 0; s->type= JT_SYSTEM; @@ -6037,8 +6259,13 @@ make_join_statistics(JOIN *join, List &tables_list, base_eq_part.intersect(base_part); /* - We can read the const record if we are using a full unique key and - if the table is not an unopened to be materialized table/view. + Const-folding site E: a full unique or primary key bound + entirely to constants. We can read the const record if we + are using a full unique key and if the table is not an + unopened to be materialized table/view. This is the keyuse + based path. Unlike sites A through D it does not exclude + FULL JOIN tables, so a FULL JOIN side with a constant unique + key lookup can be const-folded here. */ if ((table->actual_key_flags(keyinfo) & HA_NOSAME) && (!s->table->pos_in_table_list->is_materialized_derived() || @@ -6278,10 +6505,25 @@ make_join_statistics(JOIN *join, List &tables_list, records= 0; } } + /* + A FULL JOIN table's unmatched rows are produced by the null + complement pass, so it must be read in full even when its ON + expression is impossible. Clear the impossible range state so it + is not folded to a const or null-complemented empty table below, + and so make_join_select does not abort the whole join as empty. + The left side of the FULL JOIN still produces its rows through the + ordinary scan, and the right side's rows come from the rescan. + */ + if (impossible_range && (s->tab_list->outer_join & JOIN_TYPE_FULL)) + { + s->table->reginfo.impossible_range= 0; + records= s->table->stat_records(); + impossible_range= FALSE; + } if (impossible_range) { /* - Impossible WHERE or ON expression + Const-folding site F: an impossible WHERE or ON expression. In case of ON, we mark that the we match one empty NULL row. In case of WHERE, don't set found_const_table_map to get the caller to abort with a zero row result. @@ -14228,6 +14470,17 @@ make_outerjoin_info(JOIN *join) if (tbl->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT)) { + /* + Skip the LEFT side of a FULL JOIN. Its null-complementing is + handled by the fj_dups mechanism, not by the standard nested + loop outer join machinery. Setting up an outer join scope + here would cause the ON condition to be pushed into + this table's select_cond, filtering out rows before they reach + the right side and preventing null-complement generation. + */ + if ((tbl->outer_join & JOIN_TYPE_FULL) && + (tbl->outer_join & JOIN_TYPE_LEFT)) + goto skip_outer_join_setup; /* Table tab is the only one inner table for outer join. (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a @@ -14241,7 +14494,8 @@ make_outerjoin_info(JOIN *join) } else if (!embedding) tab->table->reginfo.not_exists_optimize= 0; - + +skip_outer_join_setup: for ( ; embedding ; embedding= embedding->embedding) { if (embedding->is_active_sjm()) @@ -14258,6 +14512,13 @@ make_outerjoin_info(JOIN *join) tab->table->reginfo.not_exists_optimize= 0; continue; } + /* + Again, skip the LEFT side of a FULL JOIN (see above comment + for details). + */ + if ((embedding->outer_join & JOIN_TYPE_FULL) && + (embedding->outer_join & JOIN_TYPE_LEFT)) + continue; NESTED_JOIN *nested_join= embedding->nested_join; if (!nested_join->counter) { @@ -14271,7 +14532,21 @@ make_outerjoin_info(JOIN *join) if (embedding->embedding) tab->first_upper= embedding->embedding->nested_join->first_nested; } - if (!tab->first_inner) + else if (tab->first_inner == tab && !tab->first_upper) + { + /* + tab is the first inner of its own outer join scope (set in + the initial block above for a RIGHT|FULL leaf), and its + immediate embedding was a "skipped" nest (the left side of + an enclosing FULL JOIN, or a flat nest with no outer join), + so its first_upper was not linked to a valid enclosing + scope. This iteration has walked up to the nearest + enclosing outer join scope, so point first_upper at its + first_nested. + */ + tab->first_upper= nested_join->first_nested; + } + if (!tab->first_inner) tab->first_inner= nested_join->first_nested; if (++nested_join->counter < nested_join->n_tables) break; @@ -14351,6 +14626,125 @@ bool build_tmp_join_prefix_cond(JOIN *join, JOIN_TAB *last_tab, Item **ret) } +/* + Push parts of an outer-join ON expression to tables that appear + after the inner tables in the execution order. + + The caller (make_join_select) has a loop that walks the inner tables + of each outer join, extracting conjuncts from the ON expression + and pushing them to each inner table's select_cond. But that loop + only visits tables from [start_from...last_tab], building used_tables2 + as it goes. If the ON expression references a table that the + optimizer placed after last_tab, no conjunct is ever extracted for + it, and the condition is silently lost. + + This shape arises with FULL JOIN. For example, + + (A FULL JOIN B ON A.x = B.x) RIGHT JOIN C ON B.x = C.x + + can be rewritten as + + C LEFT JOIN (A, B) ON B.x = C.x + + and the constraint that keeps the FULL JOIN tables A and B contiguous + can place the tables in the order A, B, C. The inner scope of the + LEFT JOIN is {A, B} with last_tab = B, but the ON expression + "B.x = C.x" references C, which comes after B. The main loop never + reaches C, so "B.x = C.x" would be lost without this function. + + This function picks up where the main loop left off. It scans + tables after last_tab and, for each one referenced by on_expr, + extracts the relevant conjuncts, wraps them in the same trigcond + guards used for outer join conditions, and attaches them to that + table's select_cond. + + The scan below is currently unreachable in phase 2 but is kept as + defensive scaffolding for phase 3, which lifts the restriction. + + @param used_tables Cumulative bitmap of tables visited so far + (updated in place so the caller sees the new + tables we covered). + @return true on error. +*/ +static bool +push_on_expr_to_later_outer_tables(THD *thd, JOIN *join, JOIN_TAB *last_tab, + JOIN_TAB *first_inner_tab, Item *on_expr, + table_map *used_tables) +{ + /* + Every table referenced by on_expr is already in used_tables; + nothing left to push. + */ + if (!(on_expr->used_tables() & ~(*used_tables))) + return false; + + JOIN_TAB *end_tab= join->join_tab + join->top_join_tab_count; + for (JOIN_TAB *outer_tab= last_tab + 1; outer_tab < end_tab; outer_tab++) + { + if (!outer_tab->table) + continue; + + /* + Always add the table to used_tables even if on_expr doesn't + reference it. make_cond_for_table needs used_tables to include + all tables up to and including the one being extracted for. + */ + table_map current_map= outer_tab->table->map; + *used_tables|= current_map; + if (!(on_expr->used_tables() & current_map)) + continue; + + /* + Extract the conjuncts from on_expr that can be evaluated once + this table's row is available. + */ + COND *tmp_cond= make_cond_for_table(thd, on_expr, *used_tables, + current_map, -1, + FALSE, FALSE); + if (thd->is_error()) + return true; + if (!tmp_cond) + continue; + + /* + Wrap in the two "standard" outer-join trigcond guards: + + trigcond(found, ) -- disabled until a match is found + trigcond(not_null_compl, ...) -- disabled during null-complement + + This makes the pushed condition behave identically to conditions + pushed to inner tables by the main loop. + */ + DBUG_ASSERT(tmp_cond->fixed()); + if (!(tmp_cond= add_found_match_trig_cond(thd, first_inner_tab, + tmp_cond, 0))) + return true; + + tmp_cond= new (thd->mem_root) + Item_func_trig_cond(thd, tmp_cond, &first_inner_tab->not_null_compl); + if (!tmp_cond) + return true; + tmp_cond->quick_fix_field(); + + /* AND the wrapped condition into this table's select_cond. */ + DBUG_ASSERT(!outer_tab->select_cond || outer_tab->select_cond->fixed()); + outer_tab->select_cond= + !outer_tab->select_cond ? tmp_cond : + new (thd->mem_root) + Item_cond_and(thd, outer_tab->select_cond, tmp_cond); + if (!outer_tab->select_cond) + return true; + + outer_tab->select_cond->quick_fix_field(); + outer_tab->select_cond->update_used_tables(); + if (outer_tab->select) + outer_tab->select->cond= outer_tab->select_cond; + } + + return false; +} + + static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) { @@ -14960,33 +15354,33 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) join_tab != end_with; join_tab++) { - if (*join_tab->on_expr_ref) + if (!*join_tab->on_expr_ref || !join_tab->first_inner) + continue; + + JOIN_TAB *cond_tab= join_tab->first_inner; + COND *tmp_cond= make_cond_for_table(thd, *join_tab->on_expr_ref, + join->const_table_map, + (table_map) 0, -1, FALSE, FALSE); + if (!tmp_cond) { - JOIN_TAB *cond_tab= join_tab->first_inner; - COND *tmp_cond= make_cond_for_table(thd, *join_tab->on_expr_ref, - join->const_table_map, - (table_map) 0, -1, FALSE, FALSE); - if (!tmp_cond) - { - if (!thd->is_error()) - continue; - DBUG_RETURN(1); - } - tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond, - &cond_tab->not_null_compl); - if (!tmp_cond) - DBUG_RETURN(1); - tmp_cond->quick_fix_field(); - cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond : - new (thd->mem_root) Item_cond_and(thd, cond_tab->select_cond, - tmp_cond); - if (!cond_tab->select_cond) - DBUG_RETURN(1); - cond_tab->select_cond->quick_fix_field(); - cond_tab->select_cond->update_used_tables(); - if (cond_tab->select) - cond_tab->select->cond= cond_tab->select_cond; - } + if (!thd->is_error()) + continue; + DBUG_RETURN(1); + } + tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond, + &cond_tab->not_null_compl); + if (!tmp_cond) + DBUG_RETURN(1); + tmp_cond->quick_fix_field(); + cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond : + new (thd->mem_root) Item_cond_and(thd, cond_tab->select_cond, + tmp_cond); + if (!cond_tab->select_cond) + DBUG_RETURN(1); + cond_tab->select_cond->quick_fix_field(); + cond_tab->select_cond->update_used_tables(); + if (cond_tab->select) + cond_tab->select->cond= cond_tab->select_cond; } @@ -15121,7 +15515,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) cond_tab->select->cond= cond_tab->select_cond; } } - first_inner_tab= first_inner_tab->first_upper; + + if (push_on_expr_to_later_outer_tables(thd, join, last_tab, + first_inner_tab, on_expr, + &used_tables2)) + DBUG_RETURN(1); + first_inner_tab= first_inner_tab->first_upper; } if (!tab->bush_children) i++; @@ -15863,6 +16262,29 @@ end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) #endif */ + +/* + Return true if the given TABLE_LIST is a FULL JOIN operand or is + embedded (at any depth) within a FULL JOIN nest. Join caching is + disabled for such tables because the null-complement rescan requires a + plain sequential scan of the right side (rather than one that reads + from a buffered join cache). +*/ +static inline bool is_in_full_join_scope(TABLE_LIST *tl) +{ + if (tl->outer_join & JOIN_TYPE_FULL) + return true; + for (TABLE_LIST *embedding= tl->embedding; + embedding; + embedding= embedding->embedding) + { + if (embedding->outer_join & JOIN_TYPE_FULL) + return true; + } + return false; +} + + static uint check_join_cache_usage(JOIN_TAB *tab, ulonglong options, @@ -15891,6 +16313,9 @@ uint check_join_cache_usage(JOIN_TAB *tab, BKA_HINT_ENUM, false); join->return_tab= 0; + if (is_in_full_join_scope(tab->tab_list)) + goto no_join_cache; + if (tab->no_forced_join_cache || (hint_disables_bnl && no_bka_cache)) goto no_join_cache; @@ -19781,130 +20206,602 @@ propagate_cond_constants(THD *thd, I_List *save_list, } } + /** - Simplify joins replacing outer joins by inner joins whenever it's - possible. + Convenience function to wrap a recursive call to simplify_joins in the case + of a nested join, which requires updates to the NESTED_JOIN structure. - The function, during a retrieval of join_list, eliminates those - outer joins that can be converted into inner join, possibly nested. - It also moves the on expressions for the converted outer joins - and from inner joins to conds. - The function also calculates some attributes for nested joins: - - used_tables - - not_null_tables - - dep_tables. - - on_expr_dep_tables - The first two attributes are used to test whether an outer join can - be substituted for an inner join. The third attribute represents the - relation 'to be dependent on' for tables. If table t2 is dependent - on table t1, then in any evaluated execution plan table access to - table t2 must precede access to table t2. This relation is used also - to check whether the query contains invalid cross-references. - The forth attribute is an auxiliary one and is used to calculate - dep_tables. - As the attribute dep_tables qualifies possible orders of tables in the - execution plan, the dependencies required by the straight join - modifiers are reflected in this attribute as well. - The function also removes all braces that can be removed from the join - expression without changing its meaning. + @param join reference to the query info + @param table currently visited TABLE_LIST entry in the join_list + @param conds conditions to add on expressions for converted joins + @param top true <=> conds is the where condition + @param in_sj TRUE <=> processing semi-join nest's children + @parma used_tables_ptr IN/OUT parameter for the used_tables value + @parma not_null_tables_ptr IN/OUT parameter for the used_tables value - @note - An outer join can be replaced by an inner join if the where condition - or the on expression for an embedding nested join contains a conjunctive - predicate rejecting null values for some attribute of the inner tables. + @return the new condition on success, nullptr otherwise +*/ - E.g. in the query: - @code - SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5 - @endcode - the predicate t2.b < 5 rejects nulls. - The query is converted first to: - @code - SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5 - @endcode - then to the equivalent form: - @code - SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a - @endcode +static COND *simplify_nested_join(JOIN *join, TABLE_LIST *table, + COND *conds, bool in_sj, + table_map *used_tables, + table_map *not_null_tables) +{ + DBUG_ASSERT(used_tables); + DBUG_ASSERT(not_null_tables); + NESTED_JOIN *nested_join= table->nested_join; + DBUG_ASSERT(nested_join); + nested_join->used_tables= (table_map) 0; + nested_join->not_null_tables=(table_map) 0; + conds= simplify_joins(join, &nested_join->join_list, conds, + in_sj || table->sj_on_expr); + if (!conds && join->thd->is_error()) + return nullptr; + *used_tables= nested_join->used_tables; + *not_null_tables= nested_join->not_null_tables; + /* The following two might become unequal after table elimination: */ + nested_join->n_tables= nested_join->join_list.elements; + return conds; +} - Similarly the following query: - @code - SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b - WHERE t2.c < 5 - @endcode - is converted to: - @code - SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b - @endcode +/** + Rewrite a FULL JOIN to a LEFT JOIN by mutating the + left and right table state to make them appear as though + the user wrote the FULL JOIN as a LEFT JOIN originally. - One conversion might trigger another: - @code - SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a - LEFT JOIN t3 ON t3.b=t2.b - WHERE t3 IS NOT NULL => - SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3 - WHERE t3 IS NOT NULL AND t3.b=t2.b => - SELECT * FROM t1, t2, t3 - WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a - @endcode + @param left_table table t1 in t1 FULL JOIN t2 + @param right_table table t2 in t1 FULL JOIN t2 +*/ - The function removes all unnecessary braces from the expression - produced by the conversions. - E.g. - @code - SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b - @endcode - finally is converted to: - @code - SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b +static void rewrite_full_to_left(TABLE_LIST *left_table, + TABLE_LIST *right_table) +{ + // Grammar does not mark the left table at all + left_table->outer_join= 0; - @endcode + /* + Clear FULL JOIN flag and do as the grammar does by marking + the right table as JOIN_TYPE_LEFT. + */ + right_table->outer_join= JOIN_TYPE_LEFT; + /* + The right table must have an ON clause. NATURAL JOINs get + this not from the grammar but they're built before simplify_joins + is called. + */ + DBUG_ASSERT(right_table->on_expr); +} - It also will remove braces from the following queries: - @code - SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b - SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b. - @endcode - The benefit of this simplification procedure is that it might return - a query for which the optimizer can evaluate execution plan with more - join orders. With a left join operation the optimizer does not +/** + Swap the left and right operands of a FULL JOIN that survives + simplify_joins. + + FULL JOIN is symmetric on its operands, so swapping does not + change query semantics. The swap is needed because the null + complement pass keys off a JOIN_TAB carrying JOIN_TYPE_FULL | + JOIN_TYPE_RIGHT. alloc_full_join_duplicate_filters attaches the + fj_dups filter to that JOIN_TAB, and the rescan reads its rowid. + + This is a temporary limitation given that, at this point in time, + we don't support anything but base tables on the right side of + a FULL JOIN. + + When the parser places a nested join expression as the + right operand and a single base table as the left operand, + the FULL|RIGHT bits land on the nest, so no + fj_dups is allocated and the null complement pass never + fires. Swapping puts the leaf on the right where the filter can + be attached and the rescan can run. + + @param left_table table t1 in t1 FULL JOIN t2 + @param right_table table t2 in t1 FULL JOIN t2 +*/ + +static void swap_full_join_sides(TABLE_LIST *left_table, + TABLE_LIST *right_table) +{ + DBUG_ASSERT(test_all_bits(left_table->outer_join, + JOIN_TYPE_FULL | JOIN_TYPE_LEFT)); + DBUG_ASSERT(test_all_bits(right_table->outer_join, + JOIN_TYPE_FULL | JOIN_TYPE_RIGHT)); + DBUG_ASSERT(right_table->on_expr); + + /* + Swap the LEFT|RIGHT roles, keeping the FULL bit (and any other + bits, e.g., JOIN_TYPE_NATURAL) intact on both sides. + */ + left_table->outer_join= (left_table->outer_join & ~JOIN_TYPE_LEFT) + | JOIN_TYPE_RIGHT; + right_table->outer_join= (right_table->outer_join & ~JOIN_TYPE_RIGHT) + | JOIN_TYPE_LEFT; + + /* + The parser attaches the ON clause to the right operand of a FULL + JOIN. After the swap the new right operand (was left) carries it. + */ + left_table->on_expr= right_table->on_expr; + right_table->on_expr= nullptr; + + left_table->prep_on_expr= right_table->prep_on_expr; + right_table->prep_on_expr= nullptr; + + left_table->on_context= right_table->on_context; + right_table->on_context= nullptr; +} + + +/** + Rewrite a FULL JOIN to a RIGHT JOIN by mutating the + left and right table state to make them appear as though + the user wrote the FULL JOIN as a RIGHT JOIN originally. + + It's important to keep in mind that this function does its + work updating the tables to prepare them to be swapped in + the join order. Had the user written the query as a RIGHT + JOIN, it would've then been converted to a LEFT JOIN by + convert_right_join. The caller will swap them in the join + list, so we prepare them in place, then once they're swapped + they will have the correct respective state. + + Consequently, in this method, we change the right_table with + the understanding that it will swap places with the left_table + very shortly (similarly with respect to the right_table). + + @param left_table table t1 in t1 FULL JOIN t2 + @param right_table table t2 in t1 FULL JOIN t2 +*/ + +static void rewrite_full_to_right(TABLE_LIST *left_table, + TABLE_LIST *right_table) +{ + // Grammar does not mark the right table at all. + right_table->outer_join= 0; + + /* + Clear FULL JOIN flag and do as convert_right_join does which + has the effect of marking the left table as JOIN_TYPE_RIGHT. + */ + left_table->outer_join= JOIN_TYPE_RIGHT; + + /* + The right table must have an ON clause. NATURAL JOINs get + this from setup_natural_join_row_types(). + + The ON clause is moved from the right table to the left one + because, again, the tables will be swapped in the join list + to imitate the convert_right_join operation that would've been + done had the user written this query as a RIGHT JOIN instead + of a FULL JOIN. + */ + DBUG_ASSERT(right_table->on_expr); + left_table->on_expr= right_table->on_expr; + right_table->on_expr= nullptr; + + /* + Update prep_on_expr to match the post-rewrite state so that + reinit_before_use() restores the correct ON expressions for + prepared statement re-execution. The ON expression moved from + the right table to the left, so prep_on_expr must follow. + */ + left_table->prep_on_expr= right_table->prep_on_expr; + right_table->prep_on_expr= nullptr; + + /* + Prepare the right table to become the left table by + clearing its context. The left table retains the context + set by the grammar. + */ + right_table->on_context= nullptr; +} + + +/** + Attempt to rewrite [NATURAL] FULL JOIN to LEFT, RIGHT, or INNER JOIN, + depending on the WHERE clause and whether it rejects NULLs. For example, + the following queries are equivalent: + + SELECT * FROM t1 FULL JOIN t2 ON t1.v = t2.v WHERE t1.v IS NOT NULL; + SELECT * FROM t1 LEFT JOIN t2 ON t1.v = t2.v; + + The rewritten query, be it a LEFT or RIGHT JOIN, may yet again be + rewritten to an INNER JOIN if the WHERE clause permits. + + These parameters are the same as in simplify_joins: + @param join reference to the query info + @param join_list list representation of the join to be converted + @param conds WHERE expressions. Will be AND'ed with ON expressions + if rewrite happens. + @param top true <=> conds is the where condition + @param in_sj TRUE <=> processing semi-join nest's children + + The following parameters are IN/OUT parameters and are mutated by + this function: + @param table_ptr the current TABLE_LIST from the join list + @param li_ptr the iterator into the join list + @param used_tables_ptr used_tables from simplify_joins + @param not_null_tables_ptr not_null_tables from simplify_joins + + @return + - The new condition, if success + - nullptr, otherwise +*/ + +static COND *rewrite_full_outer_joins(JOIN *join, + COND *conds, + bool in_sj, + TABLE_LIST **right_table, + List_iterator *li, + table_map *used_tables, + table_map *not_null_tables) +{ + DBUG_ENTER("rewrite_full_outer_joins"); + + /* + The join_list enumerates the tables from t_n, ..., t_0 so we always + see the right table first. If, on this call to rewrite_full_outer_joins, + the current table is left member of the JOIN (e.g., left_member FULL JOIN + ...) it means we couldn't rewrite the FULL JOIN as a LEFT, RIGHT, or + INNER JOIN, so emit an error (unless we're in an EXPLAIN EXTENDED, permit + that). + */ + if ((*right_table)->outer_join & JOIN_TYPE_LEFT) + DBUG_RETURN(conds); + + /* + Must always see the right table before the left. Down below, we deal + with the left table at the same time as the right, so we'll never get + to this point with a single table remaining in the join_list. If + there's a right table remaining then there will be a left one, too. + */ + DBUG_ASSERT((*right_table)->outer_join & JOIN_TYPE_RIGHT); + + /* + If the left table is a nested join, then recursively rewrite any + FULL JOINs within it. Otherwise continue to attempt to rewrite + in the base case. + */ + TABLE_LIST *left_table= li->peek(); + table_map left_used_tables= 0; + table_map left_not_null_tables= 0; + DBUG_ASSERT(test_all_bits(left_table->outer_join, + JOIN_TYPE_FULL | JOIN_TYPE_LEFT)); + if (left_table->nested_join) + { + conds= simplify_nested_join(join, left_table, conds, in_sj, + &left_used_tables, &left_not_null_tables); + if (!conds && join->thd->is_error()) + DBUG_RETURN(nullptr); + } + else + { + left_used_tables= left_table->get_map(); + left_not_null_tables= *not_null_tables; + } + + /* + When left_table is a nested join with an unrewritten FULL JOIN + inside it, and rewriting that FULL JOIN would result in a query + with a FULL JOIN on the inner side of another join, reject the + rewrite. In phase 2, we cannot support FULL JOIN on the inner + side of another JOIN (support coming in phase 3). + */ + if (left_table->contains_full_join()) + { + *not_null_tables= 0; + DBUG_RETURN(conds); + } + + /* + If the right hand table is not NULL under the WHERE clause then we can + rewrite it as a RIGHT JOIN, mutating the data structures to make it + appear as though the user wrote the query as a RIGHT JOIN originally. + */ + if (*used_tables & *not_null_tables) + { + /* + RIGHT JOINs don't actually exist in MariaDB! This will do what + the grammar does and convert_right_join together do when given a + RIGHT JOIN. + */ + rewrite_full_to_right(left_table, *right_table); + + // This will be reflected to the caller, too. + *used_tables= left_used_tables; + + /* + Swap myself with the left as though we did convert_right_join(). + Then we will have effectively done the following transformation: + FULL -> RIGHT -> LEFT. + Again, RIGHT JOINs don't actually exist in MariaDB! + */ + *right_table= li->swap_next(); + --join->thd->lex->full_join_count; + } + else + { + /* + If the left table, be it a nested join or not, rejects nulls for + the WHERE condition, then rewrite. + */ + if (left_used_tables & *not_null_tables) + { + rewrite_full_to_left(left_table, *right_table); + --join->thd->lex->full_join_count; + } + else if (!left_table->nested_join && (*right_table)->nested_join && + (*right_table)->contains_full_join()) + { + /* + The FULL JOIN survives simplification with a leaf on the left + and a nested join on the right, so the FULL|RIGHT bits sit on + a nest, which is never a JOIN_TAB, and the null complement + pass has no JOIN_TAB to attach an fj_dups filter to. Swap so + the leaf carries those bits; see swap_full_join_sides. + */ + swap_full_join_sides(left_table, *right_table); + *used_tables= left_used_tables; + *right_table= li->swap_next(); + } + *not_null_tables= left_not_null_tables; + // else the FULL JOIN cannot be rewritten, pass it along. + } + + DBUG_RETURN(conds); +} + + +/** + FULL JOIN feature caveats: + + (1) The right side of a FULL JOIN must be a base table; violations + trigger ER_FULL_JOIN_BASE_TABLES_ONLY. + + (2) A FULL JOIN may not appear on the inner side of an enclosing + LEFT or RIGHT JOIN; violations trigger + ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN. FULL JOIN is allowed + on the right side of an INNER JOIN and on the outer side of + any outer join (including another FULL JOIN). The restriction + exists because the current FULL JOIN null-complement pass + cannot correctly pair its right-unmatched rows with each outer + row of an enclosing LEFT/RIGHT JOIN when the FULL JOIN sits on + that join's inner side. + + MariaDB converts every RIGHT JOIN into the equivalent LEFT + JOIN via convert_right_join at parse time, so "inner of an + outer join" is unambiguous, it's the table that carries either + JOIN_TYPE_LEFT or JOIN_TYPE_RIGHT (and no JOIN_TYPE_FULL, + which would identify it as a FULL JOIN table). +*/ + +bool +check_full_join_base_tables(List *join_list) +{ + TABLE_LIST *table; + List_iterator li(*join_list); + + while ((table= li++)) + { + /* + (1) The right partner of a FULL JOIN must be a base table. + */ + if ((table->outer_join & JOIN_TYPE_FULL) && + (table->outer_join & JOIN_TYPE_RIGHT)) + { + if (table->derived || table->is_view()) + { + my_error(ER_FULL_JOIN_BASE_TABLES_ONLY, MYF(0), table->alias.str); + return true; + } + } + + /* + (2) The inner of any outer join must not contain a FULL JOIN. + The inner is identified by JOIN_TYPE_LEFT or JOIN_TYPE_RIGHT + with JOIN_TYPE_FULL absent. + */ + if ((table->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT)) && + !(table->outer_join & JOIN_TYPE_FULL) && + table->contains_full_join()) + { + my_error(ER_FULL_JOIN_NOT_ALLOWED_IN_OUTER_JOIN, MYF(0)); + return true; + } + + if (table->nested_join && + check_full_join_base_tables(&table->nested_join->join_list)) + return true; // already set thd error state + } + + return false; +} + + +/** + Re-check FULL JOIN shapes after simplify_joins has run. + + check_full_join_base_tables runs before simplify_joins and rejects the + shapes that are apparent from the parse tree. simplify_joins then rewrites + FULL JOINs to one-sided joins where the WHERE clause allows, and + swap_full_join_sides moves a base table onto the right side of a surviving + FULL JOIN when the parser placed a nested join there. One disallowed shape + is only visible after that work: + + A surviving FULL JOIN whose right operand is still a nested join. The + null complement pass keys off a JOIN_TAB carrying JOIN_TYPE_FULL | + JOIN_TYPE_RIGHT; a nested join has no such JOIN_TAB, so the right + unmatched rows would be dropped. The swap leaves a nested join on the + right when it cannot move a base table there, e.g., both operands are + nested joins, the right operand is an inner-join nest, or the right + operand's own FULL JOIN was rewritten away before the swap check ran. + + Reject these with ER_FULL_JOIN_BASE_TABLES_ONLY rather than return wrong + results. + + @return true if a disallowed shape was found, with the error already raised +*/ + +static bool +check_full_join_after_simplify(List *join_list) +{ + TABLE_LIST *table; + List_iterator li(*join_list); + + while ((table= li++)) + { + if ((table->outer_join & JOIN_TYPE_FULL) && + (table->outer_join & JOIN_TYPE_RIGHT) && + table->nested_join) + { + my_error(ER_FULL_JOIN_BASE_TABLES_ONLY, MYF(0), + table->alias.str ? table->alias.str : "(nested join)"); + return true; + } + + if (table->nested_join && + check_full_join_after_simplify(&table->nested_join->join_list)) + return true; // already set thd error state + } + + return false; +} + + +/** + Simplify joins replacing outer joins by inner joins whenever it's + possible. + + The function, during a retrieval of join_list, eliminates those + OUTER JOINs that can be converted into INNER JOIN, possibly nested. + It also moves the ON expressions for the converted OUTER JOINs + and from INNER JOINs to conds. + The function also calculates some attributes for nested joins: + - used_tables + - not_null_tables + - dep_tables. + - on_expr_dep_tables + used_tables and not_null_tables are used to test whether an outer join can + be substituted for an INNER JOIN. dep_tables represents the + relation 'to be dependent on' for tables. If table t2 is dependent + on table t1, then in any evaluated execution plan table access to + table t2 must precede access to table t2. This relation is used also + to check whether the query contains invalid cross-references. + on_expr_dep_tables is an auxiliary one and is used to calculate + dep_tables. + As the attribute dep_tables qualifies possible orders of tables in the + execution plan, the dependencies required by the STRAIGHT JOIN + modifiers are reflected in this attribute as well. + The function also removes all braces that can be removed from the join + expression without changing its meaning. + + @note + An OUTER JOIN can be replaced by an INNER JOIN if the WHERE condition + or the ON expression for an embedding nested join contains a conjunctive + predicate rejecting NULL values for some attribute of the inner tables. + + E.g. in the query: + @code + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5 + @endcode + the predicate t2.b < 5 rejects nulls. + The query is converted first to: + @code + SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5 + @endcode + then to the equivalent form: + @code + SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a + @endcode + + + Similarly the following query: + @code + SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b + WHERE t2.c < 5 + @endcode + is converted to: + @code + SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b + @endcode + + One conversion might trigger another: + @code + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a + LEFT JOIN t3 ON t3.b=t2.b + WHERE t3 IS NOT NULL => + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3 + WHERE t3 IS NOT NULL AND t3.b=t2.b => + SELECT * FROM t1, t2, t3 + WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a + @endcode + + The function removes all unnecessary braces from the expression + produced by the conversions. + E.g. + @code + SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b + @endcode + finally is converted to: + @code + SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b + @endcode + + + It also will remove braces from the following queries: + @code + SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b + SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b. + @endcode + + + Here's an example where the converted OUTER JOIN has its ON + conditions migrated to the ON condition for the INNER JOIN. + @code + SELECT * FROM t1 LEFT JOIN (t2 LEFT JOIN t3 ON t3.a=t2.a) ON t3.a=t1.a; + @endcode + becomes + @code + SELECT * FROM t1 LEFT JOIN (t2 INNER JOIN t3) ON t3.a=t2.a AND t3.a=t1.a; + #endcode + + + The benefit of this simplification procedure is that it might return + a query for which the optimizer can evaluate execution plan with more + join orders. With a LEFT JOIN operation the optimizer does not consider any plan where one of the inner tables is before some of outer tables. IMPLEMENTATION The function is implemented by a recursive procedure. On the recursive - ascent all attributes are calculated, all outer joins that can be + ascent all attributes are calculated, all OUTER JOINs that can be converted are replaced and then all unnecessary braces are removed. As join list contains join tables in the reverse order sequential elimination of outer joins does not require extra recursive calls. SEMI-JOIN NOTES - Remove all semi-joins that have are within another semi-join (i.e. have + Remove all semi-joins that are within another semi-join (i.e. have an "ancestor" semi-join nest) EXAMPLES Here is an example of a join query with invalid cross references: @code - SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b + SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b @endcode @param join reference to the query info @param join_list list representation of the join to be converted @param conds conditions to add on expressions for converted joins - @param top true <=> conds is the where condition @param in_sj TRUE <=> processing semi-join nest's children @return - The new condition, if success - - 0, otherwise + - nullptr otherwise */ static COND * -simplify_joins(JOIN *join, List *join_list, COND *conds, bool top, - bool in_sj) +simplify_joins(JOIN *join, List *join_list, COND *conds, bool in_sj) { TABLE_LIST *table; NESTED_JOIN *nested_join; @@ -19913,12 +20810,46 @@ simplify_joins(JOIN *join, List *join_list, COND *conds, bool top, bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN); DBUG_ENTER("simplify_joins"); - /* + /* Try to simplify join operations from join_list. - The most outer join operation is checked for conversion first. + The most outer join operation is checked for conversion first. */ while ((table= li++)) { + /* + The join list is in reverse, so we see the right side of a FULL + JOIN before the left and attempted the rewrite on that earlier + iteration. If the FULL JOIN was rewritten, JOIN_TYPE_FULL is + cleared on both sides. If it is still set, then no rewrite + occurred and this is the LEFT side of a FULL JOIN. + + So we still need to propagate this side's bits into the embedding's + used_tables bitmap so downstream machinery knows that this table + participates in the nest. + */ + if ((table->outer_join & JOIN_TYPE_FULL) && + (table->outer_join & JOIN_TYPE_LEFT)) + { + table_map left_used_tables= + table->nested_join ? table->nested_join->used_tables + : table->get_map(); + if (table->embedding) + table->embedding->nested_join->used_tables |= left_used_tables; + + /* + When the ON expression ties the right side to the left + (e.g., t2.a = t1.a) the right side already depends on the left. + When it does not (a constant predicate, or a predicate over the + right side alone) the right side has no such dependency and the + optimizer may place it first, which breaks the rescan. Make the + right table depend on this left one, like how a LEFT JOIN inner + table depends on its outer table. + */ + if (table->foj_partner) + table->foj_partner->dep_tables|= left_used_tables; + continue; + } + table_map used_tables; table_map not_null_tables= (table_map) 0; @@ -19931,16 +20862,18 @@ simplify_joins(JOIN *join, List *join_list, COND *conds, bool top, if (table->on_expr) { Item *expr= table->on_expr; - /* - If an on expression E is attached to the table, + /* + If an on expression E is attached to the table, check all null rejected predicates in this expression. If such a predicate over an attribute belonging to an inner table of an embedded outer join is found, the outer join is converted to an inner join and - the corresponding on expression is added to E. - */ + the corresponding on expression is added to E. + */ expr= simplify_joins(join, &nested_join->join_list, - expr, FALSE, in_sj || table->sj_on_expr); + expr, in_sj || table->sj_on_expr); + if (!expr && join->thd->is_error()) + DBUG_RETURN(nullptr); if (!table->prep_on_expr || expr != table->on_expr) { @@ -19950,14 +20883,10 @@ simplify_joins(JOIN *join, List *join_list, COND *conds, bool top, table->prep_on_expr= expr->copy_andor_structure(join->thd); } } - nested_join->used_tables= (table_map) 0; - nested_join->not_null_tables=(table_map) 0; - conds= simplify_joins(join, &nested_join->join_list, conds, top, - in_sj || table->sj_on_expr); - used_tables= nested_join->used_tables; - not_null_tables= nested_join->not_null_tables; - /* The following two might become unequal after table elimination: */ - nested_join->n_tables= nested_join->join_list.elements; + conds= simplify_nested_join(join, table, conds, in_sj, + &used_tables, ¬_null_tables); + if (!conds && join->thd->is_error()) + DBUG_RETURN(nullptr); } else { @@ -19967,7 +20896,18 @@ simplify_joins(JOIN *join, List *join_list, COND *conds, bool top, if (conds) not_null_tables= conds->not_null_tables(); } - + + /* + Attempt to rewrite any FULL JOINs as LEFT or RIGHT JOINs. Any subsequent + JOINs that could be further rewritten to INNER JOINs are done below. + */ + if (table->outer_join & JOIN_TYPE_FULL) + conds= rewrite_full_outer_joins(join, conds, in_sj, &table, + &li, &used_tables, + ¬_null_tables); + if (!conds && join->thd->is_error()) + DBUG_RETURN(nullptr); + if (table->embedding) { table->embedding->nested_join->used_tables|= used_tables; @@ -20169,7 +21109,7 @@ simplify_joins(JOIN *join, List *join_list, COND *conds, bool top, continue; join->select_lex->sj_nests.push_back(table, join->thd->mem_root); - /* + /* Also, walk through semi-join children and mark those that are now top-level */ @@ -20181,11 +21121,21 @@ simplify_joins(JOIN *join, List *join_list, COND *conds, bool top, tbl->table->maybe_null= FALSE; } } - else if (nested_join && !table->on_expr) + else if (nested_join && !table->on_expr && + !(table->outer_join & JOIN_TYPE_FULL) && + (!table->contains_full_join() || + join_list->elements <= 1)) { + /* + In general, perform flattening when the nest isn't a FULL JOIN + and doesn't contain a FULL JOIN. Exception: the top-level has + no sibling tables that could get interleaved into a FULL JOIN + nest (such as a FULL JOIN of two base tables). + */ + TABLE_LIST *tbl; List_iterator it(nested_join->join_list); - List repl_list; + List repl_list; while ((tbl= it++)) { tbl->embedding= table->embedding; @@ -20198,7 +21148,7 @@ simplify_joins(JOIN *join, List *join_list, COND *conds, bool top, li.replace(repl_list); } } - DBUG_RETURN(conds); + DBUG_RETURN(conds); } @@ -20407,36 +21357,51 @@ static bool check_interleaving_with_nj(JOIN_TAB *next_tab) TABLE_LIST *next_emb= next_tab->table->pos_in_table_list->embedding; /* Do update counters for "pairs of brackets" that we've left (marked as - X,Y,Z in the above picture) + X,Y,Z in the above picture). + + Walk the embedding chain. A regular nest counts its own children, but a + semi-join nest acts as a single child of its parent. So when walking up + through a sj-nest we still bump its counter, but only propagate to its + parent when the sj-nest is fully placed. Without this, a sj-nest + parked inside a non-flattened outer nest (such as a FULL JOIN nest) + would have its parent's counter incremented once per sj-inner table + instead of once for the whole sj-nest. */ for (;next_emb && next_emb != join->emb_sjm_nest; next_emb= next_emb->embedding) { + NESTED_JOIN *nest= next_emb->nested_join; + ++nest->counter; + DBUG_ASSERT(nest->counter <= nest->n_tables); + if (!next_emb->sj_on_expr) { - next_emb->nested_join->counter++; - DBUG_ASSERT(next_emb->nested_join->counter <= next_emb->nested_join->n_tables); - if (next_emb->nested_join->counter == 1) + if (nest->counter == 1) { /* next_emb is the first table inside a nested join we've "entered". In the picture above, we're looking at the 'X' bracket. Don't exit yet as X bracket might have Y pair bracket. */ - join->cur_embedding_map |= next_emb->nested_join->nj_map; + join->cur_embedding_map |= nest->nj_map; } - - DBUG_ASSERT(next_emb->nested_join->n_tables >= - next_emb->nested_join->counter); - if (next_emb->nested_join->n_tables != - next_emb->nested_join->counter) + if (nest->n_tables != nest->counter) break; /* We're currently at Y or Z-bracket as depicted in the above picture. Mark that we've left it and continue walking up the brackets hierarchy. */ - join->cur_embedding_map &= ~next_emb->nested_join->nj_map; + join->cur_embedding_map &= ~nest->nj_map; + } + else + { + /* + Semi-join nests have nj_map==0 so cur_embedding_map is unaffected. + Walk up to the parent only once the sj-nest is fully placed. + */ + if (nest->n_tables != nest->counter) + break; } } return FALSE; @@ -20495,29 +21460,127 @@ static bool check_interleaving_with_nj(JOIN_TAB *next_tab) partial join order. */ -static void restore_prev_nj_state(JOIN_TAB *last) -{ - TABLE_LIST *last_emb= last->table->pos_in_table_list->embedding; - JOIN *join= last->join; - for (;last_emb != NULL && last_emb != join->emb_sjm_nest; - last_emb= last_emb->embedding) - { - if (!last_emb->sj_on_expr) - { - NESTED_JOIN *nest= last_emb->nested_join; - DBUG_ASSERT(nest->counter > 0); - - bool was_fully_covered= nest->is_fully_covered(); - - join->cur_embedding_map|= nest->nj_map; +static void restore_prev_nj_state(JOIN_TAB *last) +{ + TABLE_LIST *last_emb= last->table->pos_in_table_list->embedding; + JOIN *join= last->join; + for (;last_emb != NULL && last_emb != join->emb_sjm_nest; + last_emb= last_emb->embedding) + { + NESTED_JOIN *nest= last_emb->nested_join; + DBUG_ASSERT(nest->counter > 0); + + bool was_fully_covered= nest->is_fully_covered(); + + if (!last_emb->sj_on_expr) + { + join->cur_embedding_map|= nest->nj_map; + + if (--nest->counter == 0) + join->cur_embedding_map&= ~nest->nj_map; + } + else + { + /* + Mirror check_interleaving_with_nj: sj-nests track their own counter + and propagate up to the parent only when transitioning out of the + fully-covered state. + */ + --nest->counter; + } + + if (!was_fully_covered) + break; + } +} + + +/* + Walk the join tree and collect, into full_join_nest_tables, every + table that participates in any FULL JOIN. The optimizer must keep + those tables adjacent in the join order because the FULL JOIN + null-complement algorithm requires it. + + A FULL JOIN table can be an actual table or a nested join. We + recognize either by the JOIN_TYPE_FULL flag, then OR in all of the + tables it covers. +*/ + +static void +collect_full_join_tables(JOIN *join, List *lst) +{ + TABLE_LIST *tl= nullptr; + List_iterator it(*lst); + + while ((tl= it++)) + { + if (tl->outer_join & JOIN_TYPE_FULL) + { + if (tl->nested_join) + join->full_join_nest_tables|= tl->nested_join->used_tables; + else if (tl->table) + join->full_join_nest_tables|= tl->table->map; + } + + if (tl->nested_join) + collect_full_join_tables(join, &tl->nested_join->join_list); + } +} + + +static void +compute_full_join_nest_tables(JOIN *join, SELECT_LEX *lex) +{ + join->full_join_nest_tables= 0; + if (!join->thd->lex->full_join_count) + return; + + collect_full_join_tables(join, &lex->top_join_list); +} + + +/* + Keep the FULL JOIN block contiguous in the join order. Once any + FULL JOIN table has been placed, every subsequent table must also be + a FULL JOIN table until all FULL JOIN tables are placed. Other + tables may appear before or after the block of FULL JOIN tables. + + Returns the set of remaining FULL JOIN tables, otherwise 0 (no + restriction). +*/ + +static table_map +restrict_to_unplaced_fj_tables(JOIN *join, uint idx, table_map pool) +{ + // Nothing to place. + if (!join->full_join_nest_tables) + return 0; + + /* + Const tables come first in the join order, skip those as there + cannot be FULL JOIN tables that are constant (const table + optimization for FULL JOIN tables disabled). + */ + table_map placed_fj= 0; + for (uint i= join->const_tables; i < idx; i++) + placed_fj|= join->positions[i].table->table->map & + join->full_join_nest_tables; + + // Haven't entered the FULL JOIN block yet, no restriction. + if (!placed_fj) + return 0; + + // Already finished the FULL JOIN block, no restriction. + table_map remaining_fj= join->full_join_nest_tables & ~placed_fj; + if (!remaining_fj) + return 0; + + // Inside the block, only the remaining FULL JOIN tables are allowed. + table_map remaining= 0; + for (uint i= idx; i < join->table_count; i++) + remaining|= join->best_ref[i]->table->map; - if (--nest->counter == 0) - join->cur_embedding_map&= ~nest->nj_map; - - if (!was_fully_covered) - break; - } - } + return remaining_fj & remaining & pool; } @@ -20569,33 +21632,33 @@ void JOIN::calc_allowed_top_level_tables(SELECT_LEX *lex) embedding= embedding->embedding; } - // Ok we are in the parent nested outer join nest. - if (!embedding) - { - allowed_top_level_tables |= map; - continue; - } - embedding->nested_join->direct_children_map |= map; - - // Walk to grand-parent join nest. - embedding= embedding->embedding; - - // Walk out of any semi-join nests - while (embedding && !embedding->on_expr) + /* + Walk through all upper join nests, adding this table to each + nest's direct_children_map. This must traverse the entire chain + so that deeply nested FULL JOINs are handled correctly. + The original code only walked two levels (parent and + grandparent), which caused an assertion failure when four or + more tables were chained with FULL JOINs. + */ + while (true) { - DBUG_ASSERT(embedding->sj_on_expr); + if (!embedding) + { + allowed_top_level_tables |= map; + break; + } embedding->nested_join->direct_children_map |= map; embedding= embedding->embedding; + // Walk out of any semi-join nests + while (embedding && !embedding->on_expr) + { + embedding->nested_join->direct_children_map |= map; + embedding= embedding->embedding; + } } - - if (embedding) - { - DBUG_ASSERT(embedding->on_expr); // Impossible, see above - embedding->nested_join->direct_children_map |= map; - } - else - allowed_top_level_tables |= map; } + + compute_full_join_nest_tables(this, lex); DBUG_VOID_RETURN; } @@ -20625,10 +21688,25 @@ table_map JOIN::get_allowed_nj_tables(uint idx) } } } - // Return bitmap of tables not in any join nest - if (emb_sjm_nest) - return emb_sjm_nest->nested_join->direct_children_map; - return allowed_top_level_tables; + + /* + Select the set of tables the optimizer may pick from next. When + placing tables inside a materialized semijoin, that set is the + SJM nest's direct children. Otherwise it is the set of top-level + tables. + */ + const table_map pool= emb_sjm_nest + ? emb_sjm_nest->nested_join->direct_children_map + : allowed_top_level_tables; + + /* + If there are FULL JOIN tables present, then this function yields a + table_map keeping the FULL JOIN tables contiguous in the join order. + */ + if (table_map fj_only= restrict_to_unplaced_fj_tables(this, idx, pool)) + return fj_only; + + return pool; } @@ -24096,6 +25174,254 @@ Next_select_func setup_end_select_func(JOIN *join) -1 if error should be sent */ +/* + Helper function called by find_left_most_join_tab exclusively, + see that function's block comment for context before reading + this function. + + Test whether the TABLE_LIST dart is the same as target or + appears anywhere underneath it. +*/ +static bool table_on_full_join_left_side(TABLE_LIST *target, + TABLE_LIST *dart) +{ + // We found it. + if (target == dart) + return true; + + /* + If we didn't find it and target isn't a nested join, then + whatever candidate we last tested has to be it (caller saved + the last candidate). + */ + if (!target->nested_join) + return false; + + /* + Walk the join nest looking for the table that will correspond + to the left-most JOIN_TAB in the join order. + */ + List_iterator li(target->nested_join->join_list); + TABLE_LIST *child; + while ((child= li++)) + { + // Obviously we need to recurse on the tables in the join nest. + if (table_on_full_join_left_side(child, dart)) + return true; // found it + } + + // Ultimately didn't find it. + return false; +} + + +/* + Locate the left-most JOIN_TAB corresponding to the given right_tab. + Because full_join_nest_tables forces all tables of a FULL JOIN nest + to be placed contiguously, the FULL JOIN's left side tables are in a + contiguous range immediately to the left of right_tab. Walk + backward from right_tab-1, collecting tabs in the left side, but + stopping at the first tab outside it. The last collected tab is the + left-most JOIN_TAB. + + One might ask "why not just look at foj_partner" but that ignores + the case when the left side is a join nest (or nest of nest, etc) + wherein there may be many tables before we get to the left-most + JOIN_TAB in the join order. +*/ +static JOIN_TAB *find_left_most_join_tab(JOIN *join, JOIN_TAB *right_tab) +{ + DBUG_ASSERT(right_tab->tab_list->outer_join & + (JOIN_TYPE_FULL|JOIN_TYPE_RIGHT)); + + TABLE_LIST *left_side= right_tab->tab_list->foj_partner; + DBUG_ASSERT(left_side); + + /* + right_tab lives in either join->join_tab or, when the FULL JOIN is + inside a materialized semijoin, in the bush's JOIN_TAB_RANGE. + */ + JOIN_TAB *join_tab= nullptr; + int stopping_point= 0; + if (right_tab->bush_root_tab) + { + // Setup walk from right_tab back to the start of the bush children. + join_tab= right_tab->bush_root_tab->bush_children->start; + stopping_point= 0; + } + else + { + // Setup walk from right_tab back to the start of the joined tables. + join_tab= join->join_tab; + stopping_point= static_cast(join->const_tables); + } + const int starting_point= static_cast(right_tab - join_tab); + DBUG_ASSERT(starting_point >= 0); + JOIN_TAB *leftmost_jt= nullptr; + + /* + Each JOIN_TAB preceding right_tab is a candidate left-most + JOIN_TAB, so walk them starting from the first JOIN_TAB to the + left of right_tab and going backwards. + */ + for (int i= starting_point - 1; i >= stopping_point; --i) + { + /* + tab_list isn't a list, it's just the TABLE_LIST associated with + the i'th JOIN_TAB. Check to see if it is in the left side of + the FULL JOIN which would mean that we (might) have found the + left-most JOIN_TAB for the current FULL JOIN (but we will keep + looking until we're sure). This will return false when we've + walked past the left-most JOIN_TAB. + */ + if (!table_on_full_join_left_side(left_side, join_tab[i].tab_list)) + break; + + /* + join_tab[i] is the current candidate for left-most, but keep going + until we exhaust candidates, which happens when we break (above). + */ + leftmost_jt= &join_tab[i]; + } + + return leftmost_jt; +} + + +/* + Allocate a full_join_duplicate_filter for each right side FULL JOIN + table in the toplevel JOIN_TAB range [start_tab, start_tab+count). + + The filter records right side rowids matched during the LEFT JOIN + pass so the null-complement rescan can skip them. Only base tables + are supported on the right side of a FULL JOIN, but a query may + contain multiple (possibly nested) FULL JOINs, so each right side + JOIN_TAB gets its own filter. + + After allocating the filters, link each FULL JOIN right JOIN_TAB + into its corresponding left-most JOIN_TAB's fj_first_target list. + Append at the tail so chained FULL JOINs land in inside-out order: + the inner FULL JOIN's right JOIN_TAB runs its rescan before the + outer FULL JOIN's right JOIN_TAB, so the inner rescan's forwarded + rows can update the outer fj_dups filter through the normal forward + chain before the outer rescan reads it. + + Returns true on allocation failure (error already reported). +*/ + +static bool alloc_full_join_duplicate_filters(JOIN *join, JOIN_TAB *start_tab, + uint count) +{ + // No FULL JOINs in this query, do nothing. + if (!join->thd->lex->full_join_count) + return false; + + // First, initialize all pointers to NULL... + for (uint i= 0; i < count; ++i) + { + start_tab[i].fj_dups= nullptr; + start_tab[i].fj_first_target= nullptr; + start_tab[i].fj_next_target= nullptr; + } + + // ...then, setup the duplicate filters. + for (uint i= 0; i < count; ++i) + { + /* + Descend into a particular bush_child (most likely a materialized + semijoin) so its FULL JOIN tables get their own fj_dups filters + (well, so at least the right sides of any FULL JOINs get them, see + down below). + */ + if (start_tab[i].bush_children) + { + JOIN_TAB *bush_start= start_tab[i].bush_children->start; + uint bush_count= (uint)(start_tab[i].bush_children->end - bush_start); + if (alloc_full_join_duplicate_filters(join, bush_start, bush_count)) + return true; + } + + /* + Right side of FULL JOINs only beyond this point. All the + bookkeeping stuff goes on the right side of the FULL JOIN. + */ + if (!(start_tab[i].tab_list->outer_join & JOIN_TYPE_FULL) || + !(start_tab[i].tab_list->outer_join & JOIN_TYPE_RIGHT)) + continue; + + /* + If we're allocating a filter, then it's for a FULL JOIN and there + must be at least two tables in the JOIN. + */ + DBUG_ASSERT(count >= 2); + full_join_duplicate_filter *fj_dups= new full_join_duplicate_filter; + if (!fj_dups || fj_dups->init(join->thd, &start_tab[i])) + return true; + start_tab[i].fj_dups= fj_dups; + + /* + Link this JOIN_TAB (which must be on the right side of a FULL + JOIN) into the target list of the corresponding left-most + JOIN_TAB. The rescan that emits null-complement rows from the + right side of this FULL JOIN will fire at the end of that left + JOIN_TAB's sub_select call. + + Append at the tail of the list rather than at the head. + The enclosing loop walks JOIN_TABs in order, so for a + chained FULL JOIN like (A FJ B) FJ C the inner JOIN_TAB B lands + on A's list before the C. Order + matters because the inner rescan's emitted rows must reach + the R's fj_dups filter through next_select before + the rescan reads that filter. If we prepended, the + outer rescan would run first and emit already matched + right side rows again as unmatched. + */ + JOIN_TAB *leftmost_jt= find_left_most_join_tab(join, &start_tab[i]); + if (!leftmost_jt) + leftmost_jt= &start_tab[i]; + DBUG_ASSERT(leftmost_jt); + JOIN_TAB **slot= &leftmost_jt->fj_first_target; + while (*slot) // walk to the end of the linked list... + slot= &(*slot)->fj_next_target; + *slot= &start_tab[i]; // ...and stick start_tab[i] at the end. + } + return false; +} + + +/* + Release the temp tables backing each FULL JOIN duplicate filter + allocated by alloc_full_join_duplicate_filters. +*/ + +static void free_full_join_duplicate_filters(JOIN *join, JOIN_TAB *start_tab, + uint count) +{ + if (!join->thd->lex->full_join_count) + return; + + for (uint i= 0; i < count; ++i) + { + /* + Mirror alloc's descent into a materialized semijoin so filters + set up inside the bush are released, too. + */ + if (start_tab[i].bush_children) + { + JOIN_TAB *bush_start= start_tab[i].bush_children->start; + uint bush_count= (uint)(start_tab[i].bush_children->end - bush_start); + free_full_join_duplicate_filters(join, bush_start, bush_count); + } + + if (!(start_tab[i].tab_list->outer_join & JOIN_TYPE_FULL) || + start_tab[i].fj_dups == nullptr) + continue; + start_tab[i].fj_dups->cleanup(join->thd); + start_tab[i].fj_dups= nullptr; + } +} + + static int do_select(JOIN *join, Procedure *procedure) { @@ -24222,14 +25548,21 @@ do_select(JOIN *join, Procedure *procedure) join->join_tab[top_level_tables-1].cached_pfs_batch_update= join->join_tab[top_level_tables-1].pfs_batch_update(); - JOIN_TAB *join_tab= join->join_tab + + JOIN_TAB *start_tab= join->join_tab + (join->tables_list ? join->const_tables : 0); + + if (alloc_full_join_duplicate_filters(join, join->join_tab, + top_level_tables)) + DBUG_RETURN(-1); + if (join->outer_ref_cond && !join->outer_ref_cond->val_bool()) error= NESTED_LOOP_NO_MORE_ROWS; else - error= join->first_select(join,join_tab,0); + error= join->first_select(join,start_tab,0); if (error >= NESTED_LOOP_OK && likely(join->thd->killed != ABORT_QUERY)) - error= join->first_select(join,join_tab,1); + error= join->first_select(join,start_tab,1); + + free_full_join_duplicate_filters(join, join->join_tab, top_level_tables); } join->thd->limit_found_rows= join->send_records - join->duplicate_rows; @@ -24396,7 +25729,7 @@ sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) { rc= aggr->end_send(); if (rc >= NESTED_LOOP_OK) - rc= sub_select(join, join_tab, end_of_records); + rc= sub_select(join, join_tab, true); DBUG_RETURN(rc); } @@ -24619,6 +25952,123 @@ sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS. */ +/* + Rescan the right table of a FULL JOIN to emit null-complemented + rows for the right-side rows that were not matched during the first + (LEFT JOIN) pass. + + The rescan is forced to a plain sequential scan (not the original + JT_REF / JT_EQ_REF access method, which would look up keys derived + from the now-nullified left side and return zero rows). The pushed + SQL_SELECT and on_precond are cleared for the rescan and restored + afterwards so that subsequent executions (prepared statement + re-execution, correlated subquery iterations) see the originals. +*/ + +static enum_nested_loop_state +run_fj_null_complement_pass(JOIN *join, JOIN_TAB *join_tab) +{ + join_tab->writing_null_complements= true; + Item *saved_on_precond= join_tab->on_precond; + join_tab->on_precond= nullptr; + + /* + Restart reading from the right table as a full scan. The keyread + state must be saved and restored because a correlated subquery + will expect the keyread to be active during a later read. + */ + const int saved_keyread= join_tab->table->file->ha_end_active_keyread(); + if (join_tab->type == JT_FT) + join_tab->table->file->ha_ft_end(); + else + join_tab->table->file->ha_index_or_rnd_end(); + + // Save-off important state before restarting the full scan. + READ_RECORD saved_read_record= join_tab->read_record; + READ_RECORD::Setup_func saved_read_first= join_tab->read_first_record; + SQL_SELECT *saved_select= join_tab->select; + join_tab->read_first_record= join_init_read_record; + join_tab->select= nullptr; + + // full scan of right table and null-complement generation + enum_nested_loop_state nls= sub_select(join, join_tab, 0); + + // restore the saved-off state. + join_tab->read_first_record= saved_read_first; + join_tab->read_record= saved_read_record; + join_tab->select= saved_select; + join_tab->writing_null_complements= false; + join_tab->on_precond= saved_on_precond; + /* + join_init_read_record (via join_tab->read_first_record above) + started an RND scan and we must end it before restoring the keyread + state that we saved near the start of this function. + + TODO: probably this should be a scope_exit in case we ever have to + return early, before getting to this point. + */ + if (join_tab->table->file->inited) + join_tab->table->file->ha_index_or_rnd_end(); + join_tab->table->file->ha_restart_keyread(saved_keyread); + + if (nls == NESTED_LOOP_NO_MORE_ROWS) + nls= NESTED_LOOP_OK; + return nls; +} + + +static void +reset_fj_duplicate_filters(JOIN_TAB *join_tab) +{ + /* + If this tab is the left-most JOIN_TAB for one or more FULL JOIN + right side JOIN_TABs, then reset their duplicate filters so that + each fresh iteration of this tab accumulates a clean set of + matched right side rowids. The matching null-complement rescans + fire at the end of sub_select for this JOIN_TAB, below. + + Skip when this tab is itself in the middle of a null-complement + rescan (writing_null_complements is true). That path is + entered from run_fj_null_complement_pass and is not a fresh + outer scope iteration; resetting fj_dups here would wipe out + the matches the outer scan accumulated. + */ + if (!join_tab->writing_null_complements) + { + for (JOIN_TAB *target= join_tab->fj_first_target; + target; + target= target->fj_next_target) + { + target->fj_dups->reset(); + } + } +} + + +static enum_nested_loop_state +run_fj_null_complement_passes(JOIN *join, JOIN_TAB *join_tab) +{ + /* + At the end of this JOIN_TAB's scan, run the FULL JOIN null- + complement rescan for each right side tab whose left-most JOIN_TAB + is this tab. + */ + enum_nested_loop_state rc= NESTED_LOOP_OK; + if (!join_tab->writing_null_complements) + { + for (JOIN_TAB *target= join_tab->fj_first_target; + target; + target= target->fj_next_target) + { + rc= run_fj_null_complement_pass(join, target); + if (rc != NESTED_LOOP_OK) + break; + } + } + return rc; +} + + enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) { @@ -24652,6 +26102,9 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) (*join_tab->next_select)(join,join_tab+1,end_of_records); DBUG_RETURN(nls); } + + reset_fj_duplicate_filters(join_tab); + join_tab->tracker->r_scans++; rc= NESTED_LOOP_OK; @@ -24758,7 +26211,15 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) if (rc == NESTED_LOOP_NO_MORE_ROWS) { - if (join_tab->last_inner && !join_tab->found) + /* + Skip the standard outer-join null complement when we are doing a + FULL JOIN null-complement rescan of the right table. During + that rescan the evaluate_join_record() early-exit path handles + unmatched rows directly, and the normal "no match found" path + must not fire because join_tab->found is not being maintained. + */ + if (join_tab->last_inner && !join_tab->found && + !join_tab->writing_null_complements) { rc= evaluate_null_complemented_join_record(join, join_tab); if (rc == NESTED_LOOP_NO_MORE_ROWS) @@ -24768,12 +26229,118 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) rc= NESTED_LOOP_OK; } + /* + When the left side of a FULL JOIN has const table optimizations, + then the right side table becomes the inner-most JOIN_TAB and also + its own null complement target. So we must end the batch mode + before running the null-complement pass. That pass will cause + sub_select to be called again on that same JOIN_TAB which will + start batch mode a separate time for the separate table scan. + */ if (join_tab->cached_pfs_batch_update) join_tab->table->file->end_psi_batch_mode(); + if (rc == NESTED_LOOP_OK) + rc= run_fj_null_complement_passes(join, join_tab); + DBUG_RETURN(rc); } + +/* + Recursively mark all base tables within a TABLE_LIST as null rows. + Handles both single tables and nested joins (where table is NULL + but nested_join contains child TABLE_LISTs). +*/ +static void mark_table_list_as_null_row(TABLE_LIST *tl) +{ + if (tl->table) + mark_as_null_row(tl->table); + else if (tl->nested_join) + { + List_iterator li(tl->nested_join->join_list); + TABLE_LIST *child; + while ((child= li++)) + mark_table_list_as_null_row(child); + } +} + + +/* Reverse of mark_table_list_as_null_row: restore real row data. */ +static void unmark_table_list_as_null_row(TABLE_LIST *tl) +{ + if (tl->table) + unmark_as_null_row(tl->table); + else if (tl->nested_join) + { + List_iterator li(tl->nested_join->join_list); + TABLE_LIST *child; + while ((child= li++)) + unmark_table_list_as_null_row(child); + } +} + + +/* + Handle a single row read during a FULL JOIN null-complement rescan. + + Called from evaluate_join_record when writing_null_complements is + set and the current table has an fj_dups filter (i.e. it is the + right side of a FULL JOIN). The steps are: + + 1. Skip rows whose rowid was already recorded during the first + (LEFT JOIN) pass. + 2. Null-complement the FULL JOIN partner side. + 3. Apply WHERE (only) to the null-complemented row. select_cond + has the structure + trigcond(found, WHERE) AND trigcond(not_null_compl, ON) + so setting found=1 activates WHERE while not_null_compl=0 + disables ON (which returns TRUE when its trigcond is off). + Restore both flags after evaluation. + 4. Forward the row through the remaining join tabs. + 5. Unmark the partner side before returning. +*/ + +static enum_nested_loop_state +evaluate_fj_null_complement_row(JOIN *join, JOIN_TAB *join_tab, + COND *select_cond) +{ + bool is_dup= false; + if (join_tab->fj_dups->check_rowids(join->thd, &is_dup)) + return NESTED_LOOP_ERROR; + if (is_dup) + return NESTED_LOOP_OK; + + mark_table_list_as_null_row(join_tab->tab_list->foj_partner); + + if (select_cond) + { + bool saved_found= join_tab->found; + bool saved_nnc= join_tab->not_null_compl; + join_tab->found= 1; + join_tab->not_null_compl= 0; + bool where_ok= select_cond->val_bool(); + join_tab->found= saved_found; + join_tab->not_null_compl= saved_nnc; + if (!where_ok) + { + unmark_table_list_as_null_row(join_tab->tab_list->foj_partner); + return NESTED_LOOP_OK; + } + } + + enum_nested_loop_state rc= + (*join_tab->next_select)(join, join_tab+1, false); + join->thd->get_stmt_da()->inc_current_row_for_warning(); + + unmark_table_list_as_null_row(join_tab->tab_list->foj_partner); + + if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) + return rc; + return NESTED_LOOP_OK; +} + + /** @brief Process one row of the nested loop join. @@ -24813,6 +26380,10 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab, DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ } + // FULL JOIN null-complement generation. + if (join_tab->writing_null_complements && join_tab->fj_dups) + DBUG_RETURN(evaluate_fj_null_complement_row(join, join_tab, select_cond)); + join_tab->tracker->r_rows++; if (select_cond) @@ -24945,6 +26516,18 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab, DBUG_PRINT("counts", ("examined_rows: %llu found: %d", (ulonglong) join->thd->m_examined_row_count, (int) found)); + /* + For FULL JOIN: reaching this point means the ON condition matched + (because when 'found' is still 0, the WHERE trigcond is disabled). + Remember the right-side rowid so the null-complement pass skips + it, even if the WHERE later rejects the row and clears found. + */ + if (join_tab->fj_dups && !join_tab->writing_null_complements) + { + if (join_tab->fj_dups->remember_rowids(join->thd)) + DBUG_RETURN(NESTED_LOOP_ERROR); + } + if (found) { enum enum_nested_loop_state rc; @@ -26809,7 +28392,7 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, { table_map rand_table_bit= (table_map) RAND_TABLE_BIT; - if (used_table && !(cond->used_tables() & used_table)) + if (!cond || (used_table && !(cond->used_tables() & used_table))) return (COND*) 0; // Already checked if (cond->type() == Item::COND_ITEM) @@ -31952,8 +33535,11 @@ static void print_table_array(THD *thd, continue; } - /* JOIN_TYPE_OUTER is just a marker unrelated to real join */ - if (curr->outer_join & (JOIN_TYPE_LEFT|JOIN_TYPE_RIGHT)) + if (curr->outer_join & JOIN_TYPE_FULL) + { + str->append(STRING_WITH_LEN(" full join ")); + } + else if (curr->outer_join & (JOIN_TYPE_LEFT|JOIN_TYPE_RIGHT)) { /* MySQL converts right to left joins */ str->append(STRING_WITH_LEN(" left join ")); @@ -31964,8 +33550,9 @@ static void print_table_array(THD *thd, str->append(STRING_WITH_LEN(" semi join ")); else str->append(STRING_WITH_LEN(" join ")); - + curr->print(thd, eliminated_tables, str, query_type); + if (curr->on_expr) { str->append(STRING_WITH_LEN(" on(")); diff --git a/sql/sql_select.h b/sql/sql_select.h index 2507a871a6005..f485539b2ed76 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -249,10 +249,51 @@ class AGGR_OP; class Filesort; struct SplM_plan_info; class SplM_opt_info; +class full_join_duplicate_filter; typedef struct st_join_table { - TABLE *table; - TABLE_LIST *tab_list; + /* + Non-NULL only on the right side of a FULL JOIN. Tracks right-side + rowids matched during the LEFT JOIN pass so the null-complement + rescan can skip them. + */ + full_join_duplicate_filter *fj_dups; + + /* + True when sending null-complemented rows to the result sink when + evaluating the right side of a FULL JOIN. + */ + bool writing_null_complements{false}; + + /* + Linked list of FULL JOIN right side JOIN_TABs whose left-side + null-complement generation ends at this tab. At the end of + sub_select(this, 0), each target's null-complement rescan runs so + the rescan participates in the enclosing nested loop and + cross-products with any outer scope tables. This will be NULL on + tabs that are not the left-most JOIN_TAB of any FULL JOIN's left + side. Ordered inside-out for chained FULL JOINs so that an inner + FJ's rescan runs before an outer FJ's rescan and can update the + outer fj_dups filter through the normal forward chain. + + To visit this linked list, the first element is on fj_first_target + and subsequent elements are on fj_next_target (see function + alloc_full_join_duplicate_filters). + */ + struct st_join_table *fj_first_target; + + /* + Next pointer for the fj_first_target list. Not NULL only on FULL + JOIN right side JOIN_TABs that share a left-most JOIN_TAB with + another FULL JOIN right side JOIN_TAB (like in the case of chained + FULL JOINs). Put another way, one FULL JOIN's left-most JOIN_TAB + may be another's right JOIN_TAB. + */ + struct st_join_table *fj_next_target; + + TABLE *table; /**< pointer to table cursor */ + TABLE_LIST *tab_list; /**< pointer to query table, e.g. `t1` */ + KEYUSE *keyuse; /**< pointer to first used key */ KEY *hj_key; /**< descriptor of the used best hash join key not supported by any index */ @@ -1193,6 +1234,7 @@ class AGGR_OP :public Sql_alloc {}; enum_nested_loop_state put_record() { return put_record(false); }; + /* Send the result of operation further (to a next operation/client) This function is called after all records were put into tmp table. @@ -1414,6 +1456,12 @@ class JOIN :public Sql_alloc table_map eq_ref_tables; table_map allowed_top_level_tables; + + /* + Tables in nests that contain FULL JOINs, along with their nest siblings. + */ + table_map full_join_nest_tables; + ha_rows send_records,found_records, accepted_rows; /* diff --git a/sql/sql_view.cc b/sql/sql_view.cc index 2a65e860e1a4e..08ca1f8519568 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -41,6 +41,8 @@ const LEX_CSTRING view_type= { STRING_WITH_LEN("VIEW") }; +extern bool check_full_join_base_tables(List *); + static int mysql_register_view(THD *thd, DDL_LOG_STATE *ddl_log_state, TABLE_LIST *view, enum_view_create_mode mode, char *backup_file_name); @@ -568,6 +570,20 @@ bool mysql_create_view(THD *thd, TABLE_LIST *views, goto err; } + /* + Reject VIEW definitions that put a FULL JOIN on the right side of a + LEFT or RIGHT JOIN. CREATE VIEW only prepares the body (doesn't optimize), + so the same check that runs in JOIN::optimize_inner doesn't run here. + */ + for (sl= select_lex; sl; sl= sl->next_select()) + { + if (check_full_join_base_tables(&sl->top_join_list)) + { + res= TRUE; + goto err; + } + } + /* view list (list of view fields names) */ if (lex->view_list.elements) { @@ -1685,7 +1701,8 @@ bool mysql_make_view(THD *thd, TABLE_SHARE *share, TABLE_LIST *view_table_alias, parent_query_lex->set_stmt_unsafe_flags(view_query_lex->get_stmt_unsafe_flags()); view_is_mergeable= (view_table_alias->algorithm != VIEW_ALGORITHM_TMPTABLE && - view_query_lex->can_be_merged()); + view_query_lex->can_be_merged() && + !view_table_alias->contains_full_join()); if (view_is_mergeable) { @@ -1918,6 +1935,7 @@ bool mysql_make_view(THD *thd, TABLE_SHARE *share, TABLE_LIST *view_table_alias, DBUG_ASSERT(view_query_lex == thd->lex); thd->lex= parent_query_lex; // Needed for prepare_security result= !view_table_alias->prelocking_placeholder && view_table_alias->prepare_security(thd); + parent_query_lex->full_join_count+= view_query_lex->full_join_count; lex_end(view_query_lex); end: diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index df4395395dcde..aec9b16d81037 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1213,8 +1213,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); %token ST_COLLECT_SYM /* A dummy token to force the priority of table_ref production in a join. */ %left CONDITIONLESS_JOIN -%left JOIN_SYM INNER_SYM STRAIGHT_JOIN CROSS LEFT RIGHT ON_SYM USING - +%left JOIN_SYM INNER_SYM STRAIGHT_JOIN CROSS LEFT RIGHT ON_SYM USING FULL + %left SET_VAR %left OR_SYM OR2_SYM %left XOR @@ -12599,8 +12599,79 @@ join_table: if (unlikely(!($$= lex->current_select->convert_right_join()))) MYSQL_YYABORT; } - ; + /* FULL OUTER JOIN variants */ + | table_ref FULL opt_outer JOIN_SYM table_ref + ON + { + MYSQL_YYABORT_UNLESS($1 && $5); + + Select->add_joined_table($1); + $1->outer_join|= (JOIN_TYPE_LEFT | + JOIN_TYPE_FULL); + $1->foj_partner= $5; + + Select->add_joined_table($5); + $5->outer_join|= (JOIN_TYPE_RIGHT | + JOIN_TYPE_FULL); + $5->foj_partner= $1; + + /* Change the current name resolution context to a local context. */ + if (unlikely(push_new_name_resolution_context(thd, $1, $5))) + MYSQL_YYABORT; + Select->parsing_place= IN_ON; + } + expr + { + /* + Attach the ON expression to the right side only, the same + way LEFT JOIN does. Code that needs the ON for the left + side reaches it through foj_partner. + */ + add_join_on(thd, $5, $8); + $5->on_context= Lex->pop_context(); + Select->parsing_place= NO_MATTER; + $$= $1; + ++Lex->full_join_count; + } + | table_ref FULL opt_outer JOIN_SYM table_factor + { + MYSQL_YYABORT_UNLESS($1 && $5); + Select->add_joined_table($1); + $1->outer_join|= (JOIN_TYPE_LEFT | + JOIN_TYPE_FULL); + $1->foj_partner= $5; + + Select->add_joined_table($5); + $5->outer_join|= (JOIN_TYPE_RIGHT | + JOIN_TYPE_FULL); + $5->foj_partner= $1; + } + USING '(' using_list ')' + { + add_join_natural($1,$5,$9,Select); + ++Lex->full_join_count; + } + | table_ref NATURAL FULL opt_outer JOIN_SYM table_factor + { + MYSQL_YYABORT_UNLESS($1 && $6); + + Select->add_joined_table($1); + $1->outer_join|= (JOIN_TYPE_LEFT | + JOIN_TYPE_FULL | + JOIN_TYPE_NATURAL); + $1->foj_partner= $6; + + Select->add_joined_table($6); + $6->outer_join|= (JOIN_TYPE_RIGHT | + JOIN_TYPE_FULL | + JOIN_TYPE_NATURAL); + $6->foj_partner= $1; + + add_join_natural($6,$1,NULL,Select); + ++Lex->full_join_count; + } + ; inner_join: /* $$ set if using STRAIGHT_JOIN, false otherwise */ JOIN_SYM { $$ = 0; } @@ -16976,7 +17047,6 @@ keyword_func_sp_var_and_label: | FILE_SYM | FIRST_SYM | FOUND_SYM - | FULL | GENERAL | GENERATED_SYM | GRANTS @@ -17310,6 +17380,7 @@ reserved_keyword_udt_not_param_type: | FIRST_VALUE_SYM | FOREIGN | FROM + | FULL | FULLTEXT_SYM | GOTO_ORACLE_SYM | GRANT @@ -18057,6 +18128,7 @@ set_expr_or_default: set_expr_misc: ON { $$= new (thd->mem_root) Item_string_sys(thd, "ON", 2); } | ALL { $$= new (thd->mem_root) Item_string_sys(thd, "ALL", 3); } + | FULL { $$= new (thd->mem_root) Item_string_sys(thd, "FULL", 4); } | BINARY { $$= new (thd->mem_root) Item_string_sys(thd, "binary", 6); } ; diff --git a/sql/table.cc b/sql/table.cc index fb25f4ddaf1bf..4d3905258d275 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -6831,7 +6831,7 @@ bool TABLE_LIST::set_insert_values(MEM_ROOT *mem_root) RETURN TRUE if a leaf, FALSE otherwise. */ -bool TABLE_LIST::is_leaf_for_name_resolution() +bool TABLE_LIST::is_leaf_for_name_resolution() const { return (is_merged_derived() || is_natural_join || is_join_columns_complete || !nested_join); @@ -6861,13 +6861,13 @@ bool TABLE_LIST::is_leaf_for_name_resolution() else return 'this' */ -TABLE_LIST *TABLE_LIST::first_leaf_for_name_resolution() +TABLE_LIST *TABLE_LIST::first_leaf_for_name_resolution() const { TABLE_LIST *UNINIT_VAR(cur_table_ref); NESTED_JOIN *cur_nested_join; if (is_leaf_for_name_resolution()) - return this; + return const_cast(this); DBUG_ASSERT(nested_join); for (cur_nested_join= nested_join; @@ -6882,7 +6882,8 @@ TABLE_LIST *TABLE_LIST::first_leaf_for_name_resolution() already at the front of the list. Otherwise the first operand is in the end of the list of join operands. */ - if (!(cur_table_ref->outer_join & JOIN_TYPE_RIGHT)) + if (!(cur_table_ref->outer_join & JOIN_TYPE_RIGHT) || + (cur_table_ref->outer_join & JOIN_TYPE_FULL)) { TABLE_LIST *next; while ((next= it++)) @@ -6937,7 +6938,8 @@ TABLE_LIST *TABLE_LIST::last_leaf_for_name_resolution() 'join_list' are in reverse order, thus the last operand is in the end of the list. */ - if ((cur_table_ref->outer_join & JOIN_TYPE_RIGHT)) + if ((cur_table_ref->outer_join & JOIN_TYPE_RIGHT) && + !(cur_table_ref->outer_join & JOIN_TYPE_FULL)) { List_iterator_fast it(cur_nested_join->join_list); TABLE_LIST *next; @@ -7239,13 +7241,52 @@ TABLE *TABLE_LIST::get_real_join_table() } +/* + Return true when this view/derived table contains a FULL JOIN. +*/ +static bool join_list_contains_full_join(List *join_list) +{ + List_iterator it(*join_list); + TABLE_LIST *tbl; + + while ((tbl= it++)) + { + if (tbl->outer_join & JOIN_TYPE_FULL) + return true; + if (tbl->nested_join && + join_list_contains_full_join(&tbl->nested_join->join_list)) + return true; + } + + return false; +} + + +bool TABLE_LIST::contains_full_join() const +{ + List *join_list= nullptr; + + if (view) + join_list= &view->first_select_lex()->top_join_list; + else if (derived) + join_list= &derived->first_select()->top_join_list; + else if (nested_join) + join_list= &nested_join->join_list; + else + return false; + + return join_list_contains_full_join(join_list); +} + + Natural_join_column::Natural_join_column(Field_translator *field_param, TABLE_LIST *tab) { DBUG_ASSERT(tab->field_translation); view_field= field_param; - table_field= NULL; + table_field= nullptr; table_ref= tab; + natural_full_join_field= nullptr; is_common= FALSE; } @@ -7257,12 +7298,16 @@ Natural_join_column::Natural_join_column(Item_field *field_param, table_field= field_param; view_field= NULL; table_ref= tab; + natural_full_join_field= nullptr; is_common= FALSE; } const Lex_ident_column Natural_join_column::name() { + if (natural_full_join_field) + return natural_full_join_field->name; + if (view_field) { DBUG_ASSERT(table_field == NULL); @@ -7275,12 +7320,25 @@ const Lex_ident_column Natural_join_column::name() Item *Natural_join_column::create_item(THD *thd) { + if (natural_full_join_field) + return natural_full_join_field; + if (view_field) { DBUG_ASSERT(table_field == NULL); return create_view_field(thd, table_ref, &view_field->item, &view_field->name); } + + return table_field; +} + + +Item *Natural_join_column::get_item() +{ + if (view_field) + return view_field->item; + return table_field; } @@ -10233,6 +10291,8 @@ bool TABLE_LIST::init_derived(THD *thd, bool init_view) hint_table_state(thd, this, MERGE_HINT_ENUM, optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_MERGE)); // (2) + DBUG_ASSERT(!(outer_join & JOIN_TYPE_FULL) || foj_partner); + if (!is_materialized_derived() && unit->can_be_merged() && /* Following is special case of @@ -10260,7 +10320,13 @@ bool TABLE_LIST::init_derived(THD *thd, bool init_view) (thd->lex->sql_command == SQLCOM_DELETE && (((Sql_cmd_delete *) thd->lex->m_sql_cmd)->is_multitable() || thd->lex->query_tables->is_multitable())))) && - !is_recursive_with_table()) + !is_recursive_with_table() && + /* + Derived tables that participate in a FULL JOIN must not be + merged because the FULL JOIN null-complement logic only + works when the physical table is available. + */ + !foj_partner) set_merged_derived(); else set_materialized_derived(); diff --git a/sql/table.h b/sql/table.h index ba15cf45a1257..951d14188d8ad 100644 --- a/sql/table.h +++ b/sql/table.h @@ -2250,9 +2250,11 @@ class IS_table_read_plan; #define VIEW_ALGORITHM_MERGE_FRM 1U #define VIEW_ALGORITHM_TMPTABLE_FRM 2U -#define JOIN_TYPE_LEFT 1U -#define JOIN_TYPE_RIGHT 2U -#define JOIN_TYPE_OUTER 4U /* Marker that this is an outer join */ +#define JOIN_TYPE_LEFT 1U +#define JOIN_TYPE_RIGHT 2U +#define JOIN_TYPE_FULL 4U +#define JOIN_TYPE_OUTER 8U /* Marker that this is an outer join */ +#define JOIN_TYPE_NATURAL 16U /* view WITH CHECK OPTION parameter options */ #define VIEW_CHECK_NONE 0 @@ -2301,12 +2303,14 @@ struct Field_translator Field (for tables), or a Field_translator (for views). */ +class Item_func_coalesce; class Natural_join_column: public Sql_alloc { public: Field_translator *view_field; /* Column reference of merge view. */ Item_field *table_field; /* Column reference of table or temp view. */ TABLE_LIST *table_ref; /* Original base table/view reference. */ + Item_func_coalesce *natural_full_join_field; /* True if a common join column of two NATURAL/USING join operands. Notice that when we have a hierarchy of nested NATURAL/USING joins, a column can @@ -2320,6 +2324,7 @@ class Natural_join_column: public Sql_alloc Natural_join_column(Item_field *field_param, TABLE_LIST *tab); const Lex_ident_column name(); Item *create_item(THD *thd); + Item *get_item(); Field *field(); const Lex_ident_table safe_table_name() const; const Lex_ident_db safe_db_name() const; @@ -3071,7 +3076,7 @@ struct TABLE_LIST bool set_insert_values(MEM_ROOT *mem_root); void replace_view_error_with_generic(THD *thd); TABLE_LIST *find_underlying_table(TABLE *table); - TABLE_LIST *first_leaf_for_name_resolution(); + TABLE_LIST *first_leaf_for_name_resolution() const; TABLE_LIST *last_leaf_for_name_resolution(); /* System Versioning */ @@ -3104,7 +3109,14 @@ struct TABLE_LIST return tbl; } TABLE *get_real_join_table(); - bool is_leaf_for_name_resolution(); + + /* + returns true when *this represents either a VIEW, + derived table, or join nest which contains a FULL JOIN. + */ + bool contains_full_join() const; + + bool is_leaf_for_name_resolution() const; inline TABLE_LIST *top_table() { return belong_to_view ? belong_to_view : this; } inline bool prepare_check_option(THD *thd) @@ -3303,6 +3315,13 @@ struct TABLE_LIST tabledef_version.str= (const uchar *) version->str; tabledef_version.length= version->length; } + + /* + If not nullptr, then foj_partner points to the other + table in a FULL OUTER JOIN. For example, + SELECT ... FROM *this FULL OUTER JOIN foj_partner ... + */ + TABLE_LIST *foj_partner{nullptr}; private: bool prep_check_option(THD *thd, uint8 check_opt_type); bool prep_where(THD *thd, Item **conds, bool no_where_clause);