| -- |
| -- exercises for the hash join code |
| -- |
| -- GPDB: Cloudberry doesn't support parallel scans at the moment, so all the upstream |
| -- parallelism tests don't really apply. We still run the tests, but they won't |
| -- exercise parallelism. |
| begin; |
| -- GPDB requires allow_system_table_mods=on to modify pg_class.reltuples. |
| set allow_system_table_mods=on; |
| set local min_parallel_table_scan_size = 0; |
| set local parallel_setup_cost = 0; |
| set local enable_hashjoin = on; |
| -- Extract bucket and batch counts from an explain analyze plan. In |
| -- general we can't make assertions about how many batches (or |
| -- buckets) will be required because it can vary, but we can in some |
| -- special cases and we can check for growth. |
| create or replace function find_hash(node json) |
| returns json language plpgsql |
| as |
| $$ |
| declare |
| x json; |
| child json; |
| begin |
| if node->>'Node Type' = 'Hash' then |
| return node; |
| else |
| for child in select json_array_elements(node->'Plans') |
| loop |
| x := find_hash(child); |
| if x is not null then |
| return x; |
| end if; |
| end loop; |
| return null; |
| end if; |
| end; |
| $$; |
| create or replace function hash_join_batches(query text) |
| returns table (original int, final int) language plpgsql |
| as |
| $$ |
| declare |
| whole_plan json; |
| hash_node json; |
| begin |
| for whole_plan in |
| execute 'explain (analyze, format ''json'') ' || query |
| loop |
| hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); |
| original := hash_node->>'Original Hash Batches'; |
| final := hash_node->>'Hash Batches'; |
| return next; |
| end loop; |
| end; |
| $$; |
| -- Make a simple relation with well distributed keys and correctly |
| -- estimated size. |
| create table simple as |
| select generate_series(1, 60000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| alter table simple set (parallel_workers = 2); |
| analyze simple; |
| -- Make a relation whose size we will under-estimate. We want stats |
| -- to say 1000 rows, but actually there are 20,000 rows. |
| create table bigger_than_it_looks as |
| select generate_series(1, 60000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); |
| WARNING: autovacuum is not supported in Cloudberry |
| alter table bigger_than_it_looks set (parallel_workers = 2); |
| analyze bigger_than_it_looks; |
| update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; |
| -- Make a relation whose size we underestimate and that also has a |
| -- kind of skew that breaks our batching scheme. We want stats to say |
| -- 2 rows, but actually there are 20,000 rows with the same key. |
| create table extremely_skewed (id int, t text); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| alter table extremely_skewed set (autovacuum_enabled = 'false'); |
| WARNING: autovacuum is not supported in Cloudberry |
| alter table extremely_skewed set (parallel_workers = 2); |
| analyze extremely_skewed; |
| insert into extremely_skewed |
| select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' |
| from generate_series(1, 20000); |
| update pg_class |
| set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 |
| where relname = 'extremely_skewed'; |
| -- Make a relation with a couple of enormous tuples. |
| create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| alter table wide set (parallel_workers = 2); |
| ANALYZE wide; |
| -- The "optimal" case: the hash table fits in memory; we plan for 1 |
| -- batch, we stick to that number, and peak memory usage stays within |
| -- our work_mem budget |
| -- non-parallel |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 0; |
| set local work_mem = '4MB'; |
| explain (costs off) |
| select count(*) from simple r join simple s using (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: r.id |
| -> Seq Scan on simple r |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: s.id |
| -> Seq Scan on simple s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| select count(*) from simple r join simple s using (id); |
| count |
| ------- |
| 60000 |
| (1 row) |
| |
| select original > 1 as initially_multibatch, final > original as increased_batches |
| from hash_join_batches( |
| $$ |
| select count(*) from simple r join simple s using (id); |
| $$); |
| initially_multibatch | increased_batches |
| ----------------------+------------------- |
| f | f |
| (1 row) |
| |
| rollback to settings; |
| -- parallel with parallel-oblivious hash join |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 2; |
| set local work_mem = '4MB'; |
| set local enable_parallel_hash = off; |
| explain (costs off) |
| select count(*) from simple r join simple s using (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: r.id |
| -> Seq Scan on simple r |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: s.id |
| -> Seq Scan on simple s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| select count(*) from simple r join simple s using (id); |
| count |
| ------- |
| 60000 |
| (1 row) |
| |
| select original > 1 as initially_multibatch, final > original as increased_batches |
| from hash_join_batches( |
| $$ |
| select count(*) from simple r join simple s using (id); |
| $$); |
| initially_multibatch | increased_batches |
| ----------------------+------------------- |
| f | f |
| (1 row) |
| |
| rollback to settings; |
| -- parallel with parallel-aware hash join |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 2; |
| set local work_mem = '4MB'; |
| set local enable_parallel_hash = on; |
| explain (costs off) |
| select count(*) from simple r join simple s using (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: r.id |
| -> Seq Scan on simple r |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: s.id |
| -> Seq Scan on simple s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| select count(*) from simple r join simple s using (id); |
| count |
| ------- |
| 60000 |
| (1 row) |
| |
| select original > 1 as initially_multibatch, final > original as increased_batches |
| from hash_join_batches( |
| $$ |
| select count(*) from simple r join simple s using (id); |
| $$); |
| initially_multibatch | increased_batches |
| ----------------------+------------------- |
| f | f |
| (1 row) |
| |
| rollback to settings; |
| -- The "good" case: batches required, but we plan the right number; we |
| -- plan for some number of batches, and we stick to that number, and |
| -- peak memory usage says within our work_mem budget |
| -- non-parallel |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 0; |
| set local work_mem = '128kB'; |
| set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem |
| explain (costs off) |
| select count(*) from simple r join simple s using (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: r.id |
| -> Seq Scan on simple r |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: s.id |
| -> Seq Scan on simple s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| select count(*) from simple r join simple s using (id); |
| count |
| ------- |
| 60000 |
| (1 row) |
| |
| select original > 1 as initially_multibatch, final > original as increased_batches |
| from hash_join_batches( |
| $$ |
| select count(*) from simple r join simple s using (id); |
| $$); |
| initially_multibatch | increased_batches |
| ----------------------+------------------- |
| t | f |
| (1 row) |
| |
| rollback to settings; |
| -- parallel with parallel-oblivious hash join |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 2; |
| set local work_mem = '128kB'; |
| set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem |
| set local enable_parallel_hash = off; |
| explain (costs off) |
| select count(*) from simple r join simple s using (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: r.id |
| -> Seq Scan on simple r |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: s.id |
| -> Seq Scan on simple s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| select count(*) from simple r join simple s using (id); |
| count |
| ------- |
| 60000 |
| (1 row) |
| |
| select original > 1 as initially_multibatch, final > original as increased_batches |
| from hash_join_batches( |
| $$ |
| select count(*) from simple r join simple s using (id); |
| $$); |
| initially_multibatch | increased_batches |
| ----------------------+------------------- |
| t | f |
| (1 row) |
| |
| rollback to settings; |
| -- parallel with parallel-aware hash join |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 2; |
| set local work_mem = '192kB'; |
| set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem |
| set local enable_parallel_hash = on; |
| explain (costs off) |
| select count(*) from simple r join simple s using (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: r.id |
| -> Seq Scan on simple r |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: s.id |
| -> Seq Scan on simple s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| select count(*) from simple r join simple s using (id); |
| count |
| ------- |
| 60000 |
| (1 row) |
| |
| select original > 1 as initially_multibatch, final > original as increased_batches |
| from hash_join_batches( |
| $$ |
| select count(*) from simple r join simple s using (id); |
| $$); |
| initially_multibatch | increased_batches |
| ----------------------+------------------- |
| t | f |
| (1 row) |
| |
| rollback to settings; |
| -- The "bad" case: during execution we need to increase number of |
| -- batches; in this case we plan for 1 batch, and increase at least a |
| -- couple of times, and peak memory usage stays within our work_mem |
| -- budget |
| -- non-parallel |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 0; |
| set local work_mem = '128kB'; |
| set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem |
| explain (costs off) |
| select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Seq Scan on simple r |
| -> Hash |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on bigger_than_it_looks s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); |
| count |
| ------- |
| 60000 |
| (1 row) |
| |
| select original > 1 as initially_multibatch, final > original as increased_batches |
| from hash_join_batches( |
| $$ |
| select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); |
| $$); |
| initially_multibatch | increased_batches |
| ----------------------+------------------- |
| f | t |
| (1 row) |
| |
| rollback to settings; |
| -- parallel with parallel-oblivious hash join |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 2; |
| set local work_mem = '128kB'; |
| set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem |
| set local enable_parallel_hash = off; |
| explain (costs off) |
| select count(*) from simple r join bigger_than_it_looks s using (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Seq Scan on simple r |
| -> Hash |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on bigger_than_it_looks s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| select count(*) from simple r join bigger_than_it_looks s using (id); |
| count |
| ------- |
| 60000 |
| (1 row) |
| |
| select original > 1 as initially_multibatch, final > original as increased_batches |
| from hash_join_batches( |
| $$ |
| select count(*) from simple r join bigger_than_it_looks s using (id); |
| $$); |
| initially_multibatch | increased_batches |
| ----------------------+------------------- |
| f | t |
| (1 row) |
| |
| rollback to settings; |
| -- parallel with parallel-aware hash join |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 1; |
| set local work_mem = '192kB'; |
| set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem |
| set local enable_parallel_hash = on; |
| explain (costs off) |
| select count(*) from simple r join bigger_than_it_looks s using (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Seq Scan on simple r |
| -> Hash |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on bigger_than_it_looks s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| select count(*) from simple r join bigger_than_it_looks s using (id); |
| count |
| ------- |
| 60000 |
| (1 row) |
| |
| select original > 1 as initially_multibatch, final > original as increased_batches |
| from hash_join_batches( |
| $$ |
| select count(*) from simple r join bigger_than_it_looks s using (id); |
| $$); |
| initially_multibatch | increased_batches |
| ----------------------+------------------- |
| f | t |
| (1 row) |
| |
| rollback to settings; |
| -- The "ugly" case: increasing the number of batches during execution |
| -- doesn't help, so stop trying to fit in work_mem and hope for the |
| -- best; in this case we plan for 1 batch, increases just once and |
| -- then stop increasing because that didn't help at all, so we blow |
| -- right through the work_mem budget and hope for the best... |
| -- non-parallel |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 0; |
| set local work_mem = '128kB'; |
| set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem |
| explain (costs off) |
| select count(*) from simple r join extremely_skewed s using (id); |
| NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed |
| HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics. |
| QUERY PLAN |
| --------------------------------------------------------------------------- |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Seq Scan on simple r |
| -> Hash |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on extremely_skewed s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (10 rows) |
| |
| select count(*) from simple r join extremely_skewed s using (id); |
| NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed |
| HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics. |
| count |
| ------- |
| 20000 |
| (1 row) |
| |
| select * from hash_join_batches( |
| $$ |
| select count(*) from simple r join extremely_skewed s using (id); |
| $$); |
| NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed |
| HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics. |
| original | final |
| ----------+------- |
| 1 | 2 |
| (1 row) |
| |
| rollback to settings; |
| -- parallel with parallel-oblivious hash join |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 2; |
| set local work_mem = '128kB'; |
| set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem |
| set local enable_parallel_hash = off; |
| explain (costs off) |
| select count(*) from simple r join extremely_skewed s using (id); |
| NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed |
| HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics. |
| QUERY PLAN |
| --------------------------------------------------------------------------- |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Seq Scan on simple r |
| -> Hash |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on extremely_skewed s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (10 rows) |
| |
| select count(*) from simple r join extremely_skewed s using (id); |
| NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed |
| HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics. |
| count |
| ------- |
| 20000 |
| (1 row) |
| |
| select * from hash_join_batches( |
| $$ |
| select count(*) from simple r join extremely_skewed s using (id); |
| $$); |
| NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed |
| HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics. |
| original | final |
| ----------+------- |
| 1 | 2 |
| (1 row) |
| |
| rollback to settings; |
| -- parallel with parallel-aware hash join |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 1; |
| set local work_mem = '128kB'; |
| set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem |
| set local enable_parallel_hash = on; |
| explain (costs off) |
| select count(*) from simple r join extremely_skewed s using (id); |
| NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed |
| HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics. |
| QUERY PLAN |
| --------------------------------------------------------------------------- |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (r.id = s.id) |
| -> Seq Scan on simple r |
| -> Hash |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on extremely_skewed s |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (10 rows) |
| |
| select count(*) from simple r join extremely_skewed s using (id); |
| NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed |
| HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics. |
| count |
| ------- |
| 20000 |
| (1 row) |
| |
| select * from hash_join_batches( |
| $$ |
| select count(*) from simple r join extremely_skewed s using (id); |
| $$); |
| NOTICE: One or more columns in the following table(s) do not have statistics: extremely_skewed |
| HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics. |
| original | final |
| ----------+------- |
| 1 | 2 |
| (1 row) |
| |
| rollback to settings; |
| -- A couple of other hash join tests unrelated to work_mem management. |
| -- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 2; |
| set local work_mem = '4MB'; |
| set local parallel_leader_participation = off; |
| select * from hash_join_batches( |
| $$ |
| select count(*) from simple r join simple s using (id); |
| $$); |
| original | final |
| ----------+------- |
| 1 | 1 |
| (1 row) |
| |
| rollback to settings; |
| -- Exercise rescans. We'll turn off parallel_leader_participation so |
| -- that we can check that instrumentation comes back correctly. |
| create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| analyze join_foo; |
| alter table join_foo set (parallel_workers = 0); |
| create table join_bar as select generate_series(1, 20000) as id, 'xxxxx'::text as t; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| analyze join_bar; |
| alter table join_bar set (parallel_workers = 2); |
| -- multi-batch with rescan, parallel-oblivious |
| savepoint settings; |
| set enable_parallel_hash = off; |
| set parallel_leader_participation = off; |
| set min_parallel_table_scan_size = 0; |
| set parallel_setup_cost = 0; |
| set parallel_tuple_cost = 0; |
| set max_parallel_workers_per_gather = 2; |
| set enable_material = off; |
| set enable_mergejoin = off; |
| set work_mem = '64kB'; |
| set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem |
| explain (costs off) |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Nested Loop Left Join |
| Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) |
| -> Seq Scan on join_foo |
| -> Materialize |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Hash Join |
| Hash Cond: (b1.id = b2.id) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: b1.id |
| -> Seq Scan on join_bar b1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: b2.id |
| -> Seq Scan on join_bar b2 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (18 rows) |
| |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| count |
| ------- |
| 3 |
| (1 row) |
| |
| select final > 1 as multibatch |
| from hash_join_batches( |
| $$ |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| $$); |
| multibatch |
| ------------ |
| t |
| (1 row) |
| |
| rollback to settings; |
| -- single-batch with rescan, parallel-oblivious |
| savepoint settings; |
| set enable_parallel_hash = off; |
| set parallel_leader_participation = off; |
| set min_parallel_table_scan_size = 0; |
| set parallel_setup_cost = 0; |
| set parallel_tuple_cost = 0; |
| set max_parallel_workers_per_gather = 2; |
| set enable_material = off; |
| set enable_mergejoin = off; |
| set work_mem = '4MB'; |
| explain (costs off) |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Nested Loop Left Join |
| Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) |
| -> Seq Scan on join_foo |
| -> Materialize |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Hash Join |
| Hash Cond: (b1.id = b2.id) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: b1.id |
| -> Seq Scan on join_bar b1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: b2.id |
| -> Seq Scan on join_bar b2 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (18 rows) |
| |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| count |
| ------- |
| 3 |
| (1 row) |
| |
| select final > 1 as multibatch |
| from hash_join_batches( |
| $$ |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| $$); |
| multibatch |
| ------------ |
| f |
| (1 row) |
| |
| rollback to settings; |
| -- multi-batch with rescan, parallel-aware |
| savepoint settings; |
| set enable_parallel_hash = on; |
| set parallel_leader_participation = off; |
| set min_parallel_table_scan_size = 0; |
| set parallel_setup_cost = 0; |
| set parallel_tuple_cost = 0; |
| set max_parallel_workers_per_gather = 2; |
| set enable_material = off; |
| set enable_mergejoin = off; |
| set work_mem = '64kB'; |
| set local statement_mem = '1000kB'; -- GPDB uses statement_mem instead of work_mem |
| explain (costs off) |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Nested Loop Left Join |
| Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) |
| -> Seq Scan on join_foo |
| -> Materialize |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Hash Join |
| Hash Cond: (b1.id = b2.id) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: b1.id |
| -> Seq Scan on join_bar b1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: b2.id |
| -> Seq Scan on join_bar b2 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (18 rows) |
| |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| count |
| ------- |
| 3 |
| (1 row) |
| |
| select final > 1 as multibatch |
| from hash_join_batches( |
| $$ |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| $$); |
| multibatch |
| ------------ |
| t |
| (1 row) |
| |
| rollback to settings; |
| -- single-batch with rescan, parallel-aware |
| savepoint settings; |
| set enable_parallel_hash = on; |
| set parallel_leader_participation = off; |
| set min_parallel_table_scan_size = 0; |
| set parallel_setup_cost = 0; |
| set parallel_tuple_cost = 0; |
| set max_parallel_workers_per_gather = 2; |
| set enable_material = off; |
| set enable_mergejoin = off; |
| set work_mem = '4MB'; |
| explain (costs off) |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Nested Loop Left Join |
| Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) |
| -> Seq Scan on join_foo |
| -> Materialize |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Hash Join |
| Hash Cond: (b1.id = b2.id) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: b1.id |
| -> Seq Scan on join_bar b1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: b2.id |
| -> Seq Scan on join_bar b2 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (18 rows) |
| |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| count |
| ------- |
| 3 |
| (1 row) |
| |
| select final > 1 as multibatch |
| from hash_join_batches( |
| $$ |
| select count(*) from join_foo |
| left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss |
| on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; |
| $$); |
| multibatch |
| ------------ |
| f |
| (1 row) |
| |
| rollback to settings; |
| -- A full outer join where every record is matched. |
| -- non-parallel |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 0; |
| explain (costs off) |
| select count(*) from simple r full outer join simple s using (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Full Join |
| Hash Cond: (r.id = s.id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: r.id |
| -> Seq Scan on simple r |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: s.id |
| -> Seq Scan on simple s |
| Optimizer: GPORCA |
| (13 rows) |
| |
| select count(*) from simple r full outer join simple s using (id); |
| count |
| ------- |
| 60000 |
| (1 row) |
| |
| rollback to settings; |
| -- parallelism not possible with parallel-oblivious outer hash join |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 2; |
| explain (costs off) |
| select count(*) from simple r full outer join simple s using (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Full Join |
| Hash Cond: (r.id = s.id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: r.id |
| -> Seq Scan on simple r |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: s.id |
| -> Seq Scan on simple s |
| Optimizer: GPORCA |
| (13 rows) |
| |
| select count(*) from simple r full outer join simple s using (id); |
| count |
| ------- |
| 60000 |
| (1 row) |
| |
| rollback to settings; |
| -- An full outer join where every record is not matched. |
| -- non-parallel |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 0; |
| explain (costs off) |
| select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Full Join |
| Hash Cond: (r.id = (0 - s.id)) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: r.id |
| -> Seq Scan on simple r |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: (0 - s.id) |
| -> Seq Scan on simple s |
| Optimizer: GPORCA |
| (13 rows) |
| |
| select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); |
| count |
| -------- |
| 120000 |
| (1 row) |
| |
| rollback to settings; |
| -- parallelism not possible with parallel-oblivious outer hash join |
| savepoint settings; |
| set local max_parallel_workers_per_gather = 2; |
| explain (costs off) |
| select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Full Join |
| Hash Cond: (r.id = (0 - s.id)) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: r.id |
| -> Seq Scan on simple r |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: (0 - s.id) |
| -> Seq Scan on simple s |
| Optimizer: GPORCA |
| (13 rows) |
| |
| select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); |
| count |
| -------- |
| 120000 |
| (1 row) |
| |
| rollback to settings; |
| -- exercise special code paths for huge tuples (note use of non-strict |
| -- expression and left join required to get the detoasted tuple into |
| -- the hash table) |
| -- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and |
| -- sts_puttuple oversized tuple cases because it's multi-batch) |
| -- NOTE: I (Heikki) could not cajole the planner to create a |
| -- plan like in upstream. I accepted the plan you get, but now this doesn't |
| -- exercise the special code path it's supposed to. |
| -- Greenplum does not support parallel scan or parallel hash join now, the |
| -- following cases will not hit the code path is supposed to be. We leave |
| -- it here for maybe future wrk. |
| savepoint settings; |
| set max_parallel_workers_per_gather = 2; |
| set enable_parallel_hash = on; |
| set work_mem = '128kB'; |
| insert into wide select generate_series(3, 100) as id, rpad('', 320000, 'x') as t; |
| explain (costs off) |
| select length(max(s.t)) |
| from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Right Join |
| Hash Cond: (wide.id = wide_1.id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: wide.id |
| -> Seq Scan on wide |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: wide_1.id |
| -> Seq Scan on wide wide_1 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| select length(max(s.t)) |
| from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); |
| length |
| -------- |
| 320000 |
| (1 row) |
| |
| select final > 1 as multibatch |
| from hash_join_batches( |
| $$ |
| select length(max(s.t)) |
| from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); |
| $$); |
| multibatch |
| ------------ |
| f |
| (1 row) |
| |
| rollback to settings; |
| -- If virtualbuckets is much larger than innerndistinct, and |
| -- outerndistinct is much larger than innerndistinct. Then most |
| -- tuples of the outer table will match the empty bucket. So when |
| -- we calculate the cost of traversing the bucket, we need to ignore |
| -- the tuple matching empty bucket. |
| savepoint settings; |
| set max_parallel_workers_per_gather = 0; |
| create table join_hash_t_small(a int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table join_hash_t_big(b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into join_hash_t_small select i%100 from generate_series(0, 3000)i; |
| insert into join_hash_t_big select i%100000 from generate_series(1, 100000)i ; |
| analyze join_hash_t_small; |
| analyze join_hash_t_big; |
| explain (costs off) select * from join_hash_t_small, join_hash_t_big where a = b; |
| QUERY PLAN |
| -------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (join_hash_t_big.b = join_hash_t_small.a) |
| -> Seq Scan on join_hash_t_big |
| -> Hash |
| -> Seq Scan on join_hash_t_small |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (7 rows) |
| |
| rollback to settings; |
| rollback; |
| -- Verify that hash key expressions reference the correct |
| -- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's |
| -- need to reference Hash's outer plan (which is below HashJoin's |
| -- inner plan). It's not trivial to verify that the references are |
| -- correct (we don't display the hashkeys themselves), but if the |
| -- hashkeys contain subplan references, those will be displayed. Force |
| -- subplans to appear just about everywhere. |
| -- |
| -- Bug report: |
| -- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com |
| -- |
| BEGIN; |
| SET LOCAL enable_sort = OFF; -- avoid mergejoins |
| SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order |
| CREATE TABLE hjtest_1 (a text, b int, id int, c bool); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE hjtest_2 (a bool, id int, b text, c int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches |
| INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition |
| INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 |
| INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) |
| INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches |
| INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition |
| INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 |
| INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) |
| INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; |
| EXPLAIN (COSTS OFF, VERBOSE) |
| SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 |
| FROM hjtest_1, hjtest_2 |
| WHERE |
| hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) |
| AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) |
| AND (SELECT hjtest_1.b * 5) < 50 |
| AND (SELECT hjtest_2.c * 5) < 55 |
| AND hjtest_1.a <> hjtest_2.b; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: hjtest_1.a, hjtest_2.a, ((hjtest_1.tableoid)::regclass), ((hjtest_2.tableoid)::regclass) |
| -> Hash Join |
| Output: hjtest_1.a, hjtest_2.a, hjtest_1.tableoid, hjtest_2.tableoid |
| Hash Cond: (((1) = hjtest_1.id) AND (((SubPlan 2)) = ((SubPlan 4)))) |
| Join Filter: (hjtest_1.a <> hjtest_2.b) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: hjtest_2.a, hjtest_2.b, hjtest_2.tableoid, (1), ((SubPlan 2)) |
| Hash Key: (1) |
| -> Nested Loop Left Join |
| Output: hjtest_2.a, hjtest_2.b, hjtest_2.tableoid, (1), (SubPlan 2) |
| Join Filter: (hjtest_2.id = 1) |
| -> Seq Scan on public.hjtest_2 |
| Output: hjtest_2.a, hjtest_2.id, hjtest_2.b, hjtest_2.c, hjtest_2.tableoid |
| Filter: (SubPlan 1) |
| SubPlan 1 |
| -> Result |
| Output: true |
| Filter: (((hjtest_2.c * 5)) < 55) |
| -> Result |
| Output: (hjtest_2.c * 5) |
| -> Materialize |
| Output: (1) |
| -> Result |
| Output: 1 |
| SubPlan 2 |
| -> Result |
| Output: (hjtest_2.c * 5) |
| -> Result |
| Output: true |
| -> Hash |
| Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, ((SubPlan 4)) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, ((SubPlan 4)) |
| Hash Key: hjtest_1.id |
| -> Seq Scan on public.hjtest_1 |
| Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, (SubPlan 4) |
| Filter: (SubPlan 3) |
| SubPlan 4 |
| -> Result |
| Output: (hjtest_1.b * 5) |
| -> Result |
| Output: true |
| SubPlan 3 |
| -> Result |
| Output: true |
| Filter: (((hjtest_1.b * 5)) < 50) |
| -> Result |
| Output: (hjtest_1.b * 5) |
| Settings: enable_sort = 'off', from_collapse_limit = '1' |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (49 rows) |
| |
| SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 |
| FROM hjtest_1, hjtest_2 |
| WHERE |
| hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) |
| AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) |
| AND (SELECT hjtest_1.b * 5) < 50 |
| AND (SELECT hjtest_2.c * 5) < 55 |
| AND hjtest_1.a <> hjtest_2.b; |
| a1 | a2 | t1 | t2 |
| ------+----+----------+---------- |
| text | t | hjtest_1 | hjtest_2 |
| (1 row) |
| |
| EXPLAIN (COSTS OFF, VERBOSE) |
| SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 |
| FROM hjtest_2, hjtest_1 |
| WHERE |
| hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) |
| AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) |
| AND (SELECT hjtest_1.b * 5) < 50 |
| AND (SELECT hjtest_2.c * 5) < 55 |
| AND hjtest_1.a <> hjtest_2.b; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: hjtest_1.a, hjtest_2.a, ((hjtest_1.tableoid)::regclass), ((hjtest_2.tableoid)::regclass) |
| -> Hash Join |
| Output: hjtest_1.a, hjtest_2.a, hjtest_1.tableoid, hjtest_2.tableoid |
| Hash Cond: (((1) = hjtest_1.id) AND (((SubPlan 2)) = ((SubPlan 4)))) |
| Join Filter: (hjtest_1.a <> hjtest_2.b) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: hjtest_2.a, hjtest_2.b, hjtest_2.tableoid, (1), ((SubPlan 2)) |
| Hash Key: (1), ((SubPlan 2)) |
| -> Nested Loop Left Join |
| Output: hjtest_2.a, hjtest_2.b, hjtest_2.tableoid, (1), (SubPlan 2) |
| Join Filter: (hjtest_2.id = 1) |
| -> Seq Scan on public.hjtest_2 |
| Output: hjtest_2.a, hjtest_2.id, hjtest_2.b, hjtest_2.c, hjtest_2.tableoid |
| Filter: (SubPlan 1) |
| SubPlan 1 |
| -> Result |
| Output: true |
| Filter: (((hjtest_2.c * 5)) < 55) |
| -> Result |
| Output: (hjtest_2.c * 5) |
| -> Materialize |
| Output: (1) |
| -> Result |
| Output: 1 |
| SubPlan 2 |
| -> Result |
| Output: (hjtest_2.c * 5) |
| -> Result |
| Output: true |
| -> Hash |
| Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, ((SubPlan 4)) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, ((SubPlan 4)) |
| Hash Key: hjtest_1.id, ((SubPlan 4)) |
| -> Seq Scan on public.hjtest_1 |
| Output: hjtest_1.a, hjtest_1.id, hjtest_1.tableoid, (SubPlan 4) |
| Filter: (SubPlan 3) |
| SubPlan 4 |
| -> Result |
| Output: (hjtest_1.b * 5) |
| -> Result |
| Output: true |
| SubPlan 3 |
| -> Result |
| Output: true |
| Filter: (((hjtest_1.b * 5)) < 50) |
| -> Result |
| Output: (hjtest_1.b * 5) |
| Settings: enable_sort = 'off', from_collapse_limit = '1' |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (49 rows) |
| |
| SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 |
| FROM hjtest_2, hjtest_1 |
| WHERE |
| hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) |
| AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) |
| AND (SELECT hjtest_1.b * 5) < 50 |
| AND (SELECT hjtest_2.c * 5) < 55 |
| AND hjtest_1.a <> hjtest_2.b; |
| a1 | a2 | t1 | t2 |
| ------+----+----------+---------- |
| text | t | hjtest_1 | hjtest_2 |
| (1 row) |
| |
| ROLLBACK; |