From 669a23ad94fc91a77590980394cee1500607f388 Mon Sep 17 00:00:00 2001 From: Richard Wesley Date: Wed, 20 Aug 2025 14:55:16 -0700 Subject: [PATCH] Internal #5662: IEJoin Test Plans * Check the main IEJoin tests to make sure they are generating IEJoins. fixes: duckdblabs/duckdb-internal#5662 --- .../join/iejoin/iejoin_issue_6314.test_slow | 36 +++++++++ test/sql/join/iejoin/iejoin_issue_6861.test | 27 +++++++ test/sql/join/iejoin/iejoin_issue_7278.test | 80 ++++++++++++++++--- .../join/iejoin/predicate_expressions.test | 42 ++++++++++ .../join/iejoin/test_iejoin_east_west.test | 50 +++++++++++- test/sql/join/iejoin/test_iejoin_events.test | 11 +++ .../join/iejoin/test_iejoin_null_keys.test | 24 ++++++ .../sql/join/iejoin/test_iejoin_overlaps.test | 22 +++++ .../iejoin/test_iejoin_sort_tasks.test_slow | 18 +++++ 9 files changed, 295 insertions(+), 15 deletions(-) diff --git a/test/sql/join/iejoin/iejoin_issue_6314.test_slow b/test/sql/join/iejoin/iejoin_issue_6314.test_slow index e480b14ebc9c..f1e7de0694ee 100644 --- a/test/sql/join/iejoin/iejoin_issue_6314.test_slow +++ b/test/sql/join/iejoin/iejoin_issue_6314.test_slow @@ -486,6 +486,15 @@ statement ok INSERT INTO flags VALUES('2023-02-10 20:18:13.432147','2023-02-13 15:22:40.650655','legacy','2/11'); # INNER join +query II +EXPLAIN +SELECT ota.*, flags.desc as flag +FROM ota +INNER JOIN flags ON ota.ts BETWEEN flags.start AND flags.end +ORDER BY ts; +---- +physical_plan :.*IE_JOIN.* + statement ok CREATE TABLE inner_join AS SELECT ota.*, flags.desc as flag FROM ota @@ -498,6 +507,15 @@ SELECT COUNT(*), COUNT(ts), COUNT(flag) FROM inner_join 87775 87775 87775 # LEFT join +query II +EXPLAIN +SELECT ota.*, flags.desc as flag +FROM ota +LEFT JOIN flags ON ota.ts BETWEEN flags.start AND flags.end +ORDER BY ts; +---- +physical_plan :.*IE_JOIN.* + statement ok CREATE TABLE left_join AS SELECT ota.*, flags.desc as flag FROM ota @@ -510,6 +528,15 @@ SELECT COUNT(*), COUNT(ts), COUNT(flag) FROM left_join 167137 167137 87775 # RIGHT join +query II +EXPLAIN +SELECT ota.*, flags.desc as flag +FROM ota +RIGHT JOIN flags ON ota.ts BETWEEN flags.start AND flags.end +ORDER BY ts; +---- +physical_plan :.*IE_JOIN.* + statement ok CREATE TABLE right_join AS SELECT ota.*, flags.desc as flag FROM ota @@ -522,6 +549,15 @@ SELECT COUNT(*), COUNT(ts), COUNT(flag) FROM right_join 87842 87775 87842 # FULL OUTER join +query II +EXPLAIN +SELECT ota.*, flags.desc as flag +FROM ota +FULL OUTER JOIN flags ON ota.ts BETWEEN flags.start AND flags.end +ORDER BY ts; +---- +physical_plan :.*IE_JOIN.* + statement ok CREATE TABLE full_outer_join AS SELECT ota.*, flags.desc as flag FROM ota diff --git a/test/sql/join/iejoin/iejoin_issue_6861.test b/test/sql/join/iejoin/iejoin_issue_6861.test index 388e4a3171d7..5fbc94416a6f 100644 --- a/test/sql/join/iejoin/iejoin_issue_6861.test +++ b/test/sql/join/iejoin/iejoin_issue_6861.test @@ -11,6 +11,9 @@ CREATE TABLE test(x INT); statement ok SET merge_join_threshold=0 +statement ok +SET nested_loop_join_threshold=0; + query II SELECT * FROM test AS a, test AS b @@ -26,18 +29,42 @@ CREATE TABLE all_null AS SELECT * FROM test; statement ok UPDATE all_null SET x=(NULL); +query II +EXPLAIN +SELECT * +FROM all_null AS a, all_null AS b +WHERE (a.x BETWEEN b.x AND b.x); +---- +physical_plan :.*IE_JOIN.* + query II SELECT * FROM all_null AS a, all_null AS b WHERE (a.x BETWEEN b.x AND b.x); ---- +query II +EXPLAIN +SELECT * +FROM test AS a, all_null AS b +WHERE (a.x BETWEEN b.x AND b.x); +---- +physical_plan :.*IE_JOIN.* + query II SELECT * FROM test AS a, all_null AS b WHERE (a.x BETWEEN b.x AND b.x); ---- +query II +EXPLAIN +SELECT * +FROM all_null AS a, test AS b +WHERE (a.x BETWEEN b.x AND b.x); +---- +physical_plan :.*IE_JOIN.* + query II SELECT * FROM all_null AS a, test AS b diff --git a/test/sql/join/iejoin/iejoin_issue_7278.test b/test/sql/join/iejoin/iejoin_issue_7278.test index 915b0446ed5e..837e766221a4 100644 --- a/test/sql/join/iejoin/iejoin_issue_7278.test +++ b/test/sql/join/iejoin/iejoin_issue_7278.test @@ -25,11 +25,15 @@ create table snapshot_data as from generate_series(1,1000) t(i) ; -query I -with cal_last_13 as ( +# IEJoin is disabled for CTEs for some reason +statement ok +create table cal_last_13 as( select * from calendar where yyyyww in (SELECT yyyyww FROM calendar) -) +); + +query II +explain select count(*) from snapshot_data data @@ -37,16 +41,29 @@ join cal_last_13 cal on data.snapshot_ts >= cal.start_ts and data.snapshot_ts <= cal.end_ts ---- -1000 +physical_plan :.*IE_JOIN.* query I -with cal_last_13 as ( +select + count(*) +from snapshot_data data +join cal_last_13 cal + on data.snapshot_ts >= cal.start_ts + and data.snapshot_ts <= cal.end_ts +---- +1000 + +statement ok +create or replace table cal_last_13 as ( select * from calendar where yyyyww in (SELECT yyyyww FROM calendar) union all select * from calendar where yyyyww in (SELECT yyyyww FROM calendar) -) +); + +query II +explain select count(*) from snapshot_data data @@ -54,10 +71,20 @@ join cal_last_13 cal on data.snapshot_ts >= cal.start_ts and data.snapshot_ts <= cal.end_ts ---- -2000 +physical_plan :.*IE_JOIN.* query I -with cal_last_13 as ( +select + count(*) +from snapshot_data data +join cal_last_13 cal + on data.snapshot_ts >= cal.start_ts + and data.snapshot_ts <= cal.end_ts +---- +2000 + +statement ok +create or replace table cal_last_13 as ( select * from calendar where yyyyww in (SELECT yyyyww FROM calendar) union all @@ -66,7 +93,10 @@ with cal_last_13 as ( union all select * from calendar where yyyyww in (SELECT yyyyww FROM calendar) -) +); + +query II +explain select count(*) from snapshot_data data @@ -74,10 +104,20 @@ join cal_last_13 cal on data.snapshot_ts >= cal.start_ts and data.snapshot_ts <= cal.end_ts ---- -3000 +physical_plan :.*IE_JOIN.* query I -with cal_last_13 as ( +select + count(*) +from snapshot_data data +join cal_last_13 cal + on data.snapshot_ts >= cal.start_ts + and data.snapshot_ts <= cal.end_ts +---- +3000 + +statement ok +create or replace table cal_last_13 as ( select * from calendar where yyyyww in (SELECT yyyyww FROM calendar) union all @@ -86,7 +126,23 @@ with cal_last_13 as ( union all select * from calendar where yyyyww in (SELECT yyyyww FROM calendar) -) +); + +query II +explain +select + count(*) +from snapshot_data data +join cal_last_13 cal + on data.snapshot_ts >= cal.start_ts + and data.snapshot_ts <= cal.end_ts +join cal_last_13 cal2 + on data.snapshot_ts >= cal2.start_ts + and data.snapshot_ts <= cal2.end_ts +---- +physical_plan :.*IE_JOIN.* + +query I select count(*) from snapshot_data data diff --git a/test/sql/join/iejoin/predicate_expressions.test b/test/sql/join/iejoin/predicate_expressions.test index bb0f05302a60..ad8d23a0ecfe 100644 --- a/test/sql/join/iejoin/predicate_expressions.test +++ b/test/sql/join/iejoin/predicate_expressions.test @@ -8,6 +8,9 @@ PRAGMA enable_verification statement ok SET merge_join_threshold=0 +statement ok +PRAGMA explain_output = PHYSICAL_ONLY; + # Create a range of dates statement ok create table calendar as SELECT * @@ -34,6 +37,19 @@ from calendar cross join generate_series(1, 85) as n # Aggregate each table by using a range join +query II +explain +select + range, + count(*) as n +from scd2_non_null +inner join calendar + on range between range_start and ifnull(range_end,'2099-01-01') +group by range +order by range +---- +physical_plan :.*IE_JOIN.* + query II nosort expected select range, @@ -45,6 +61,19 @@ group by range order by range # First key should work +query II +explain +select + range, + count(*) as n +from scd2 +inner join calendar + on range <= ifnull(range_end,'2099-01-01') and range_start <= range +group by range +order by range +---- +physical_plan :.*IE_JOIN.* + query II nosort expected select range, @@ -56,6 +85,19 @@ group by range order by range # Second key should work +query II +explain +select + range, + count(*) as n +from scd2 +inner join calendar + on range between range_start and ifnull(range_end,'2099-01-01') +group by range +order by range +---- +physical_plan :.*IE_JOIN.* + query II nosort expected select range, diff --git a/test/sql/join/iejoin/test_iejoin_east_west.test b/test/sql/join/iejoin/test_iejoin_east_west.test index 0f4897179f26..44541f2b0e6a 100644 --- a/test/sql/join/iejoin/test_iejoin_east_west.test +++ b/test/sql/join/iejoin/test_iejoin_east_west.test @@ -5,9 +5,15 @@ statement ok PRAGMA enable_verification +statement ok +PRAGMA explain_output = PHYSICAL_ONLY; + statement ok SET merge_join_threshold=0 +statement ok +SET nested_loop_join_threshold=0; + # create tables statement ok CREATE TABLE east AS SELECT * FROM (VALUES @@ -25,6 +31,15 @@ CREATE TABLE west AS SELECT * FROM (VALUES ) west(rid, t_id, time, cost, cores) # Qs +query II +EXPLAIN +SELECT s1.rid, s2.rid +FROM west s1, west s2 +WHERE s1.time > s2.time +ORDER BY 1, 2 +---- +physical_plan :.*PIECEWISE_MERGE_JOIN.* + query II SELECT s1.rid, s2.rid FROM west s1, west s2 @@ -39,6 +54,15 @@ s2 s4 s4 s3 # Qp +query II +EXPLAIN +SELECT s1.rid, s2.rid +FROM west s1, west s2 +WHERE s1.time > s2.time AND s1.cost < s2.cost +ORDER BY 1, 2 +---- +physical_plan :.*IE_JOIN.* + query II SELECT s1.rid, s2.rid FROM west s1, west s2 @@ -49,6 +73,15 @@ s1 s3 s4 s3 # Qt +query II +EXPLAIN +SELECT east.rid, west.rid +FROM east, west +WHERE east.dur < west.time AND east.rev > west.cost +ORDER BY 1, 2 +---- +physical_plan :.*IE_JOIN.* + query II SELECT east.rid, west.rid FROM east, west @@ -58,11 +91,22 @@ ORDER BY 1, 2 r2 s2 # Test string comparisons -query II -WITH weststr AS ( +statement ok +CREATE TABLE weststr AS ( SELECT rid, time::VARCHAR AS time, cost::VARCHAR as cost FROM west -) +); + +query II +EXPLAIN +SELECT s1.rid, s2.rid +FROM weststr s1, weststr s2 +WHERE s1.time > s2.time AND s1.cost < s2.cost +ORDER BY 1, 2 +---- +physical_plan :.*IE_JOIN.* + +query II SELECT s1.rid, s2.rid FROM weststr s1, weststr s2 WHERE s1.time > s2.time AND s1.cost < s2.cost diff --git a/test/sql/join/iejoin/test_iejoin_events.test b/test/sql/join/iejoin/test_iejoin_events.test index afb8278b44a7..a454255765a7 100644 --- a/test/sql/join/iejoin/test_iejoin_events.test +++ b/test/sql/join/iejoin/test_iejoin_events.test @@ -34,6 +34,17 @@ CREATE TABLE events AS ( ) q ); +query II +EXPLAIN +SELECT COUNT(*) FROM ( + SELECT r.id, s.id + FROM events r, events s + WHERE r.start <= s.end AND r.end >= s.start + AND r.id <> s.id +) q2; +---- +physical_plan :.*IE_JOIN.* + query I SELECT COUNT(*) FROM ( SELECT r.id, s.id diff --git a/test/sql/join/iejoin/test_iejoin_null_keys.test b/test/sql/join/iejoin/test_iejoin_null_keys.test index 614ac896b8ea..3dd90794ee1c 100644 --- a/test/sql/join/iejoin/test_iejoin_null_keys.test +++ b/test/sql/join/iejoin/test_iejoin_null_keys.test @@ -5,6 +5,9 @@ statement ok pragma enable_verification +statement ok +PRAGMA explain_output = PHYSICAL_ONLY; + statement ok SET merge_join_threshold=0 @@ -14,6 +17,15 @@ create table tt (x int, y int, z int); statement ok insert into tt select nullif(r % 3, 0), nullif (r % 5, 0), r from range(10) tbl(r); +query II +EXPLAIN +select * +from tt t1 left join tt t2 +on t1.x < t2.x and t1.y < t2.y +order by t1.x nulls first, t1.y nulls first, t1.z, t2.x, t2.y, t2.z; +---- +physical_plan :.*IE_JOIN.* + query IIIIII select * from tt t1 left join tt t2 @@ -41,6 +53,18 @@ create table tt2 (x int); statement ok insert into tt2 select * from range(10); +query II +explain +select t1.x, t1.y +from ( + select (case when x < 100 then null else 99 end) x, (case when x < 100 then 99 else 99 end) y + from tt2 +) t1 left join tt2 t2 +on t1.x < t2.x and t1.y < t2.x +order by t1.x nulls first, t1.y nulls first; +---- +physical_plan :.*IE_JOIN.* + query II select t1.x, t1.y from ( diff --git a/test/sql/join/iejoin/test_iejoin_overlaps.test b/test/sql/join/iejoin/test_iejoin_overlaps.test index 25c63e9a141b..db7d5eebb5d6 100644 --- a/test/sql/join/iejoin/test_iejoin_overlaps.test +++ b/test/sql/join/iejoin/test_iejoin_overlaps.test @@ -8,8 +8,22 @@ PRAGMA enable_verification statement ok SET merge_join_threshold=0 +statement ok +SET nested_loop_join_threshold=0; + +statement ok +PRAGMA explain_output = PHYSICAL_ONLY; + # We read from CSVs to prevent the optimiser from # using statistics to decide the join is a NOP +query II +EXPLAIN +SELECT t1.x, t2.x +FROM 'test/sql/join/iejoin/overlap.left.csv' t1, 'test/sql/join/iejoin/overlap.right.csv' t2 +WHERE t1.x < t2.x AND t1.y > t2.y; +---- +physical_plan :.*IE_JOIN.* + query II SELECT t1.x, t2.x FROM 'test/sql/join/iejoin/overlap.left.csv' t1, 'test/sql/join/iejoin/overlap.right.csv' t2 @@ -22,3 +36,11 @@ SELECT t1.x, t2.x FROM 'test/sql/join/iejoin/overlap.left.csv' t1, 'test/sql/join/iejoin/overlap.right.csv' t2 WHERE t1.y > t2.y AND t1.x < t2.x; ---- + +query II +EXPLAIN +SELECT t1.x, t2.x +FROM 'test/sql/join/iejoin/overlap.left.csv' t1, 'test/sql/join/iejoin/overlap.right.csv' t2 +WHERE t1.y > t2.y AND t1.x < t2.x; +---- +physical_plan :.*IE_JOIN.* diff --git a/test/sql/join/iejoin/test_iejoin_sort_tasks.test_slow b/test/sql/join/iejoin/test_iejoin_sort_tasks.test_slow index 64030d0affc2..71a2760b88d9 100644 --- a/test/sql/join/iejoin/test_iejoin_sort_tasks.test_slow +++ b/test/sql/join/iejoin/test_iejoin_sort_tasks.test_slow @@ -14,6 +14,24 @@ statement ok SET merge_join_threshold=0 # Stream tables with minimal overlap that require merge tasks on both sides. +query II +EXPLAIN +SELECT lhs.begin, rhs.begin +FROM ( + SELECT + i AS id, + i AS begin, + i + 1 AS end + FROM range(1, 10000002) tbl(i)) lhs, + (SELECT + i - 100000000 AS id, + i AS begin, + i + 1 AS end + FROM range(10000001, 20000002) tbl(i)) rhs +WHERE lhs.begin < rhs.end AND rhs.begin < lhs.end +---- +physical_plan :.*IE_JOIN.* + query II SELECT lhs.begin, rhs.begin FROM (