blob: 053d0ef48989ccf57add65cc5766a6832f943ade [file] [log] [blame]
--
-- exercises for the hash join code
--
-- GPDB: Cloudberry doesn't support parallel scans at the moment, so all the upstream
-- parallelism tests don't really apply. We still run the tests, but they won't
-- exercise parallelism.
begin;
-- GPDB requires allow_system_table_mods=on to modify pg_class.reltuples.
set allow_system_table_mods=on;
set local min_parallel_table_scan_size = 0;
set local parallel_setup_cost = 0;
set local enable_hashjoin = on;
-- Extract bucket and batch counts from an explain analyze plan. In
-- general we can't make assertions about how many batches (or
-- buckets) will be required because it can vary, but we can in some
-- special cases and we can check for growth.
create or replace function find_hash(node json)
returns json language plpgsql
as
$$
declare
x json;
child json;
begin
if node->>'Node Type' = 'Hash' then
return node;
else
for child in select json_array_elements(node->'Plans')
loop
x := find_hash(child);
if x is not null then
return x;
end if;
end loop;
return null;
end if;
end;
$$;
create or replace function hash_join_batches(query text)
returns table (original int, final int) language plpgsql
as
$$
declare
whole_plan json;
hash_node json;
begin
for whole_plan in
execute 'explain (analyze, format ''json'') ' || query
loop
hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan'));
original := hash_node->>'Original Hash Batches';
final := hash_node->>'Hash Batches';
return next;
end loop;
end;
$$;
-- Make a simple relation with well distributed keys and correctly
-- estimated size.
create table simple as
select generate_series(1, 60000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
alter table simple set (parallel_workers = 2);
analyze simple;
-- Make a relation whose size we will under-estimate. We want stats
-- to say 1000 rows, but actually there are 20,000 rows.
create table bigger_than_it_looks as
select generate_series(1, 60000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
alter table bigger_than_it_looks set (autovacuum_enabled = 'false');
WARNING: autovacuum is not supported in Cloudberry
alter table bigger_than_it_looks set (parallel_workers = 2);
analyze bigger_than_it_looks;
update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks';
-- Make a relation whose size we underestimate and that also has a
-- kind of skew that breaks our batching scheme. We want stats to say
-- 2 rows, but actually there are 20,000 rows with the same key.
create table extremely_skewed (id int, t text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
alter table extremely_skewed set (autovacuum_enabled = 'false');
WARNING: autovacuum is not supported in Cloudberry
alter table extremely_skewed set (parallel_workers = 2);
analyze extremely_skewed;
insert into extremely_skewed
select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
from generate_series(1, 20000);
update pg_class
set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192
where relname = 'extremely_skewed';
-- Make a relation with a couple of enormous tuples.
create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
alter table wide set (parallel_workers = 2);
ANALYZE wide;
-- The "optimal" case: the hash table fits in memory; we plan for 1
-- batch, we stick to that number, and peak memory usage stays within
-- our work_mem budget
-- non-parallel
savepoint settings;
set local max_parallel_workers_per_gather = 0;
set local work_mem = '4MB';
explain (costs off)
select count(*) from simple r join simple s using (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: r.id
-> Seq Scan on simple r
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: s.id
-> Seq Scan on simple s
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select count(*) from simple r join simple s using (id);
count
-------
60000
(1 row)
select original > 1 as initially_multibatch, final > original as increased_batches
from hash_join_batches(
$$
select count(*) from simple r join simple s using (id);
$$);
initially_multibatch | increased_batches
----------------------+-------------------
f | f
(1 row)
rollback to settings;
-- parallel with parallel-oblivious hash join
savepoint settings;
set local max_parallel_workers_per_gather = 2;
set local work_mem = '4MB';
set local enable_parallel_hash = off;
explain (costs off)
select count(*) from simple r join simple s using (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: r.id
-> Seq Scan on simple r
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: s.id
-> Seq Scan on simple s
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select count(*) from simple r join simple s using (id);
count
-------
60000
(1 row)
select original > 1 as initially_multibatch, final > original as increased_batches
from hash_join_batches(
$$
select count(*) from simple r join simple s using (id);
$$);
initially_multibatch | increased_batches
----------------------+-------------------
f | f
(1 row)
rollback to settings;
-- parallel with parallel-aware hash join
savepoint settings;
set local max_parallel_workers_per_gather = 2;
set local work_mem = '4MB';
set local enable_parallel_hash = on;
explain (costs off)
select count(*) from simple r join simple s using (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: r.id
-> Seq Scan on simple r
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: s.id
-> Seq Scan on simple s
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select count(*) from simple r join simple s using (id);
count
-------
60000
(1 row)
select original > 1 as initially_multibatch, final > original as increased_batches
from hash_join_batches(
$$
select count(*) from simple r join simple s using (id);
$$);
initially_multibatch | increased_batches
----------------------+-------------------
f | f
(1 row)
rollback to settings;
-- The "good" case: batches required, but we plan the right number; we
-- plan for some number of batches, and we stick to that number, and
-- peak memory usage says within our work_mem budget
-- non-parallel
savepoint settings;
set local max_parallel_workers_per_gather = 0;
set local work_mem = '128kB';
set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem
explain (costs off)
select count(*) from simple r join simple s using (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: r.id
-> Seq Scan on simple r
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: s.id
-> Seq Scan on simple s
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select count(*) from simple r join simple s using (id);
count
-------
60000
(1 row)
select original > 1 as initially_multibatch, final > original as increased_batches
from hash_join_batches(
$$
select count(*) from simple r join simple s using (id);
$$);
initially_multibatch | increased_batches
----------------------+-------------------
t | f
(1 row)
rollback to settings;
-- parallel with parallel-oblivious hash join
savepoint settings;
set local max_parallel_workers_per_gather = 2;
set local work_mem = '128kB';
set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem
set local enable_parallel_hash = off;
explain (costs off)
select count(*) from simple r join simple s using (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: r.id
-> Seq Scan on simple r
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: s.id
-> Seq Scan on simple s
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select count(*) from simple r join simple s using (id);
count
-------
60000
(1 row)
select original > 1 as initially_multibatch, final > original as increased_batches
from hash_join_batches(
$$
select count(*) from simple r join simple s using (id);
$$);
initially_multibatch | increased_batches
----------------------+-------------------
t | f
(1 row)
rollback to settings;
-- parallel with parallel-aware hash join
savepoint settings;
set local max_parallel_workers_per_gather = 2;
set local work_mem = '192kB';
set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem
set local enable_parallel_hash = on;
explain (costs off)
select count(*) from simple r join simple s using (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: r.id
-> Seq Scan on simple r
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: s.id
-> Seq Scan on simple s
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select count(*) from simple r join simple s using (id);
count
-------
60000
(1 row)
select original > 1 as initially_multibatch, final > original as increased_batches
from hash_join_batches(
$$
select count(*) from simple r join simple s using (id);
$$);
initially_multibatch | increased_batches
----------------------+-------------------
t | f
(1 row)
rollback to settings;
-- The "bad" case: during execution we need to increase number of
-- batches; in this case we plan for 1 batch, and increase at least a
-- couple of times, and peak memory usage stays within our work_mem
-- budget
-- non-parallel
savepoint settings;
set local max_parallel_workers_per_gather = 0;
set local work_mem = '128kB';
set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem
explain (costs off)
select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Seq Scan on simple r
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on bigger_than_it_looks s
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
count
-------
60000
(1 row)
select original > 1 as initially_multibatch, final > original as increased_batches
from hash_join_batches(
$$
select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
$$);
initially_multibatch | increased_batches
----------------------+-------------------
f | t
(1 row)
rollback to settings;
-- parallel with parallel-oblivious hash join
savepoint settings;
set local max_parallel_workers_per_gather = 2;
set local work_mem = '128kB';
set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem
set local enable_parallel_hash = off;
explain (costs off)
select count(*) from simple r join bigger_than_it_looks s using (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Seq Scan on simple r
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on bigger_than_it_looks s
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select count(*) from simple r join bigger_than_it_looks s using (id);
count
-------
60000
(1 row)
select original > 1 as initially_multibatch, final > original as increased_batches
from hash_join_batches(
$$
select count(*) from simple r join bigger_than_it_looks s using (id);
$$);
initially_multibatch | increased_batches
----------------------+-------------------
f | t
(1 row)
rollback to settings;
-- parallel with parallel-aware hash join
savepoint settings;
set local max_parallel_workers_per_gather = 1;
set local work_mem = '192kB';
set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem
set local enable_parallel_hash = on;
explain (costs off)
select count(*) from simple r join bigger_than_it_looks s using (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Seq Scan on simple r
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on bigger_than_it_looks s
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select count(*) from simple r join bigger_than_it_looks s using (id);
count
-------
60000
(1 row)
select original > 1 as initially_multibatch, final > original as increased_batches
from hash_join_batches(
$$
select count(*) from simple r join bigger_than_it_looks s using (id);
$$);
initially_multibatch | increased_batches
----------------------+-------------------
f | t
(1 row)
rollback to settings;
-- The "ugly" case: increasing the number of batches during execution
-- doesn't help, so stop trying to fit in work_mem and hope for the
-- best; in this case we plan for 1 batch, increases just once and
-- then stop increasing because that didn't help at all, so we blow
-- right through the work_mem budget and hope for the best...
-- non-parallel
savepoint settings;
set local max_parallel_workers_per_gather = 0;
set local work_mem = '128kB';
set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem
explain (costs off)
select count(*) from simple r join extremely_skewed s using (id);
NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed
HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics.
QUERY PLAN
---------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Seq Scan on simple r
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on extremely_skewed s
Optimizer: Pivotal Optimizer (GPORCA)
(10 rows)
select count(*) from simple r join extremely_skewed s using (id);
NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed
HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics.
count
-------
20000
(1 row)
select * from hash_join_batches(
$$
select count(*) from simple r join extremely_skewed s using (id);
$$);
NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed
HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics.
original | final
----------+-------
1 | 2
(1 row)
rollback to settings;
-- parallel with parallel-oblivious hash join
savepoint settings;
set local max_parallel_workers_per_gather = 2;
set local work_mem = '128kB';
set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem
set local enable_parallel_hash = off;
explain (costs off)
select count(*) from simple r join extremely_skewed s using (id);
NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed
HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics.
QUERY PLAN
---------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Seq Scan on simple r
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on extremely_skewed s
Optimizer: Pivotal Optimizer (GPORCA)
(10 rows)
select count(*) from simple r join extremely_skewed s using (id);
NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed
HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics.
count
-------
20000
(1 row)
select * from hash_join_batches(
$$
select count(*) from simple r join extremely_skewed s using (id);
$$);
NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed
HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics.
original | final
----------+-------
1 | 2
(1 row)
rollback to settings;
-- parallel with parallel-aware hash join
savepoint settings;
set local max_parallel_workers_per_gather = 1;
set local work_mem = '128kB';
set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem
set local enable_parallel_hash = on;
explain (costs off)
select count(*) from simple r join extremely_skewed s using (id);
NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed
HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics.
QUERY PLAN
---------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: (r.id = s.id)
-> Seq Scan on simple r
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on extremely_skewed s
Optimizer: Pivotal Optimizer (GPORCA)
(10 rows)
select count(*) from simple r join extremely_skewed s using (id);
NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed
HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics.
count
-------
20000
(1 row)
select * from hash_join_batches(
$$
select count(*) from simple r join extremely_skewed s using (id);
$$);
NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed
HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics.
original | final
----------+-------
1 | 2
(1 row)
rollback to settings;
-- A couple of other hash join tests unrelated to work_mem management.
-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate
savepoint settings;
set local max_parallel_workers_per_gather = 2;
set local work_mem = '4MB';
set local parallel_leader_participation = off;
select * from hash_join_batches(
$$
select count(*) from simple r join simple s using (id);
$$);
original | final
----------+-------
1 | 1
(1 row)
rollback to settings;
-- Exercise rescans. We'll turn off parallel_leader_participation so
-- that we can check that instrumentation comes back correctly.
create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
analyze join_foo;
alter table join_foo set (parallel_workers = 0);
create table join_bar as select generate_series(1, 20000) as id, 'xxxxx'::text as t;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
analyze join_bar;
alter table join_bar set (parallel_workers = 2);
-- multi-batch with rescan, parallel-oblivious
savepoint settings;
set enable_parallel_hash = off;
set parallel_leader_participation = off;
set min_parallel_table_scan_size = 0;
set parallel_setup_cost = 0;
set parallel_tuple_cost = 0;
set max_parallel_workers_per_gather = 2;
set enable_material = off;
set enable_mergejoin = off;
set work_mem = '64kB';
set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem
explain (costs off)
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Nested Loop Left Join
Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
-> Seq Scan on join_foo
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Hash Join
Hash Cond: (b1.id = b2.id)
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: b1.id
-> Seq Scan on join_bar b1
-> Hash
-> Redistribute Motion 3:3 (slice4; segments: 3)
Hash Key: b2.id
-> Seq Scan on join_bar b2
Optimizer: Pivotal Optimizer (GPORCA)
(18 rows)
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
count
-------
3
(1 row)
select final > 1 as multibatch
from hash_join_batches(
$$
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
$$);
multibatch
------------
t
(1 row)
rollback to settings;
-- single-batch with rescan, parallel-oblivious
savepoint settings;
set enable_parallel_hash = off;
set parallel_leader_participation = off;
set min_parallel_table_scan_size = 0;
set parallel_setup_cost = 0;
set parallel_tuple_cost = 0;
set max_parallel_workers_per_gather = 2;
set enable_material = off;
set enable_mergejoin = off;
set work_mem = '4MB';
explain (costs off)
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Nested Loop Left Join
Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
-> Seq Scan on join_foo
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Hash Join
Hash Cond: (b1.id = b2.id)
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: b1.id
-> Seq Scan on join_bar b1
-> Hash
-> Redistribute Motion 3:3 (slice4; segments: 3)
Hash Key: b2.id
-> Seq Scan on join_bar b2
Optimizer: Pivotal Optimizer (GPORCA)
(18 rows)
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
count
-------
3
(1 row)
select final > 1 as multibatch
from hash_join_batches(
$$
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
$$);
multibatch
------------
f
(1 row)
rollback to settings;
-- multi-batch with rescan, parallel-aware
savepoint settings;
set enable_parallel_hash = on;
set parallel_leader_participation = off;
set min_parallel_table_scan_size = 0;
set parallel_setup_cost = 0;
set parallel_tuple_cost = 0;
set max_parallel_workers_per_gather = 2;
set enable_material = off;
set enable_mergejoin = off;
set work_mem = '64kB';
set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem
explain (costs off)
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Nested Loop Left Join
Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
-> Seq Scan on join_foo
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Hash Join
Hash Cond: (b1.id = b2.id)
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: b1.id
-> Seq Scan on join_bar b1
-> Hash
-> Redistribute Motion 3:3 (slice4; segments: 3)
Hash Key: b2.id
-> Seq Scan on join_bar b2
Optimizer: Pivotal Optimizer (GPORCA)
(18 rows)
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
count
-------
3
(1 row)
select final > 1 as multibatch
from hash_join_batches(
$$
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
$$);
multibatch
------------
t
(1 row)
rollback to settings;
-- single-batch with rescan, parallel-aware
savepoint settings;
set enable_parallel_hash = on;
set parallel_leader_participation = off;
set min_parallel_table_scan_size = 0;
set parallel_setup_cost = 0;
set parallel_tuple_cost = 0;
set max_parallel_workers_per_gather = 2;
set enable_material = off;
set enable_mergejoin = off;
set work_mem = '4MB';
explain (costs off)
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Nested Loop Left Join
Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
-> Seq Scan on join_foo
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Hash Join
Hash Cond: (b1.id = b2.id)
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: b1.id
-> Seq Scan on join_bar b1
-> Hash
-> Redistribute Motion 3:3 (slice4; segments: 3)
Hash Key: b2.id
-> Seq Scan on join_bar b2
Optimizer: Pivotal Optimizer (GPORCA)
(18 rows)
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
count
-------
3
(1 row)
select final > 1 as multibatch
from hash_join_batches(
$$
select count(*) from join_foo
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
$$);
multibatch
------------
f
(1 row)
rollback to settings;
-- A full outer join where every record is matched.
-- non-parallel
savepoint settings;
set local max_parallel_workers_per_gather = 0;
explain (costs off)
select count(*) from simple r full outer join simple s using (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Full Join
Hash Cond: (r.id = s.id)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: r.id
-> Seq Scan on simple r
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: s.id
-> Seq Scan on simple s
Optimizer: GPORCA
(13 rows)
select count(*) from simple r full outer join simple s using (id);
count
-------
60000
(1 row)
rollback to settings;
-- parallelism not possible with parallel-oblivious outer hash join
savepoint settings;
set local max_parallel_workers_per_gather = 2;
explain (costs off)
select count(*) from simple r full outer join simple s using (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Full Join
Hash Cond: (r.id = s.id)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: r.id
-> Seq Scan on simple r
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: s.id
-> Seq Scan on simple s
Optimizer: GPORCA
(13 rows)
select count(*) from simple r full outer join simple s using (id);
count
-------
60000
(1 row)
rollback to settings;
-- An full outer join where every record is not matched.
-- non-parallel
savepoint settings;
set local max_parallel_workers_per_gather = 0;
explain (costs off)
select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Full Join
Hash Cond: (r.id = (0 - s.id))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: r.id
-> Seq Scan on simple r
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: (0 - s.id)
-> Seq Scan on simple s
Optimizer: GPORCA
(13 rows)
select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
count
--------
120000
(1 row)
rollback to settings;
-- parallelism not possible with parallel-oblivious outer hash join
savepoint settings;
set local max_parallel_workers_per_gather = 2;
explain (costs off)
select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Full Join
Hash Cond: (r.id = (0 - s.id))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: r.id
-> Seq Scan on simple r
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: (0 - s.id)
-> Seq Scan on simple s
Optimizer: GPORCA
(13 rows)
select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
count
--------
120000
(1 row)
rollback to settings;
-- exercise special code paths for huge tuples (note use of non-strict
-- expression and left join required to get the detoasted tuple into
-- the hash table)
-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and
-- sts_puttuple oversized tuple cases because it's multi-batch)
-- NOTE: I (Heikki) could not cajole the planner to create a
-- plan like in upstream. I accepted the plan you get, but now this doesn't
-- exercise the special code path it's supposed to.
-- Greenplum does not support parallel scan or parallel hash join now, the
-- following cases will not hit the code path is supposed to be. We leave
-- it here for maybe future wrk.
savepoint settings;
set max_parallel_workers_per_gather = 2;
set enable_parallel_hash = on;
set work_mem = '128kB';
insert into wide select generate_series(3, 100) as id, rpad('', 320000, 'x') as t;
explain (costs off)
select length(max(s.t))
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
QUERY PLAN
------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Right Join
Hash Cond: (wide.id = wide_1.id)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: wide.id
-> Seq Scan on wide
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: wide_1.id
-> Seq Scan on wide wide_1
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select length(max(s.t))
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
length
--------
320000
(1 row)
select final > 1 as multibatch
from hash_join_batches(
$$
select length(max(s.t))
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
$$);
multibatch
------------
f
(1 row)
rollback to settings;
-- If virtualbuckets is much larger than innerndistinct, and
-- outerndistinct is much larger than innerndistinct. Then most
-- tuples of the outer table will match the empty bucket. So when
-- we calculate the cost of traversing the bucket, we need to ignore
-- the tuple matching empty bucket.
savepoint settings;
set max_parallel_workers_per_gather = 0;
create table join_hash_t_small(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table join_hash_t_big(b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into join_hash_t_small select i%100 from generate_series(0, 3000)i;
insert into join_hash_t_big select i%100000 from generate_series(1, 100000)i ;
analyze join_hash_t_small;
analyze join_hash_t_big;
explain (costs off) select * from join_hash_t_small, join_hash_t_big where a = b;
QUERY PLAN
--------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Join
Hash Cond: (join_hash_t_big.b = join_hash_t_small.a)
-> Seq Scan on join_hash_t_big
-> Hash
-> Seq Scan on join_hash_t_small
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
rollback to settings;
rollback;
-- Verify that hash key expressions reference the correct
-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's
-- need to reference Hash's outer plan (which is below HashJoin's
-- inner plan). It's not trivial to verify that the references are
-- correct (we don't display the hashkeys themselves), but if the
-- hashkeys contain subplan references, those will be displayed. Force
-- subplans to appear just about everywhere.
--
-- Bug report:
-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com
--
BEGIN;
SET LOCAL enable_sort = OFF; -- avoid mergejoins
SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order
CREATE TABLE hjtest_1 (a text, b int, id int, c bool);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE hjtest_2 (a bool, id int, b text, c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b;
EXPLAIN (COSTS OFF, VERBOSE)
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_1, hjtest_2
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: hjtest_1.a, hjtest_2.a, ((hjtest_1.tableoid)::regclass), ((hjtest_2.tableoid)::regclass)
-> Hash Join
Output: hjtest_1.a, hjtest_2.a, hjtest_1.tableoid, hjtest_2.tableoid
Hash Cond: (((1) = hjtest_1.id) AND (((SubPlan 2)) = ((SubPlan 4))))
Join Filter: (hjtest_1.a <> hjtest_2.b)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Output: hjtest_2.a, hjtest_2.b, hjtest_2.tableoid, (1), ((SubPlan 2))
Hash Key: (1)
-> Nested Loop Left Join
Output: hjtest_2.a, hjtest_2.b, hjtest_2.tableoid, (1), (SubPlan 2)
Join Filter: (hjtest_2.id = 1)
-> Seq Scan on public.hjtest_2
Output: hjtest_2.a, hjtest_2.id, hjtest_2.b, hjtest_2.c, hjtest_2.tableoid
Filter: (SubPlan 1)
SubPlan 1
-> Result
Output: true
Filter: (((hjtest_2.c * 5)) < 55)
-> Result
Output: (hjtest_2.c * 5)
-> Materialize
Output: (1)
-> Result
Output: 1
SubPlan 2
-> Result
Output: (hjtest_2.c * 5)
-> Result
Output: true
-> Hash
Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, ((SubPlan 4))
-> Redistribute Motion 3:3 (slice3; segments: 3)
Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, ((SubPlan 4))
Hash Key: hjtest_1.id
-> Seq Scan on public.hjtest_1
Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, (SubPlan 4)
Filter: (SubPlan 3)
SubPlan 4
-> Result
Output: (hjtest_1.b * 5)
-> Result
Output: true
SubPlan 3
-> Result
Output: true
Filter: (((hjtest_1.b * 5)) < 50)
-> Result
Output: (hjtest_1.b * 5)
Settings: enable_sort = 'off', from_collapse_limit = '1'
Optimizer: Pivotal Optimizer (GPORCA)
(49 rows)
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_1, hjtest_2
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
a1 | a2 | t1 | t2
------+----+----------+----------
text | t | hjtest_1 | hjtest_2
(1 row)
EXPLAIN (COSTS OFF, VERBOSE)
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_2, hjtest_1
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: hjtest_1.a, hjtest_2.a, ((hjtest_1.tableoid)::regclass), ((hjtest_2.tableoid)::regclass)
-> Hash Join
Output: hjtest_1.a, hjtest_2.a, hjtest_1.tableoid, hjtest_2.tableoid
Hash Cond: (((1) = hjtest_1.id) AND (((SubPlan 2)) = ((SubPlan 4))))
Join Filter: (hjtest_1.a <> hjtest_2.b)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Output: hjtest_2.a, hjtest_2.b, hjtest_2.tableoid, (1), ((SubPlan 2))
Hash Key: (1), ((SubPlan 2))
-> Nested Loop Left Join
Output: hjtest_2.a, hjtest_2.b, hjtest_2.tableoid, (1), (SubPlan 2)
Join Filter: (hjtest_2.id = 1)
-> Seq Scan on public.hjtest_2
Output: hjtest_2.a, hjtest_2.id, hjtest_2.b, hjtest_2.c, hjtest_2.tableoid
Filter: (SubPlan 1)
SubPlan 1
-> Result
Output: true
Filter: (((hjtest_2.c * 5)) < 55)
-> Result
Output: (hjtest_2.c * 5)
-> Materialize
Output: (1)
-> Result
Output: 1
SubPlan 2
-> Result
Output: (hjtest_2.c * 5)
-> Result
Output: true
-> Hash
Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, ((SubPlan 4))
-> Redistribute Motion 3:3 (slice3; segments: 3)
Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, ((SubPlan 4))
Hash Key: hjtest_1.id, ((SubPlan 4))
-> Seq Scan on public.hjtest_1
Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, (SubPlan 4)
Filter: (SubPlan 3)
SubPlan 4
-> Result
Output: (hjtest_1.b * 5)
-> Result
Output: true
SubPlan 3
-> Result
Output: true
Filter: (((hjtest_1.b * 5)) < 50)
-> Result
Output: (hjtest_1.b * 5)
Settings: enable_sort = 'off', from_collapse_limit = '1'
Optimizer: Pivotal Optimizer (GPORCA)
(49 rows)
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_2, hjtest_1
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
a1 | a2 | t1 | t2
------+----+----------+----------
text | t | hjtest_1 | hjtest_2
(1 row)
ROLLBACK;