blob: 8b6e66c9d0e71b782a28ceda0c3537d6c98e705a [file] [log] [blame]
-- Additional GPDB-added tests for UNION
SET optimizer_trace_fallback=on;
create temp table t_union1 (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select distinct a, null::integer as c from t_union1 union select a, b from t_union1;
a | c
---+---
(0 rows)
drop table t_union1;
select null union select distinct null;
?column?
----------
(1 row)
select 1 union select distinct null::integer;
?column?
----------
1
(2 rows)
select 1 a, NULL b, NULL c UNION SELECT 2, 3, NULL UNION SELECT 3, NULL, 4;
a | b | c
---+---+---
1 | |
2 | 3 |
3 | | 4
(3 rows)
select ARRAY[1, 2, 3] union select distinct null::integer[];
array
---------
{1,2,3}
(2 rows)
select 1 intersect (select 1, 2 union all select 3, 4);
ERROR: each INTERSECT query must have the same number of columns
LINE 1: select 1 intersect (select 1, 2 union all select 3, 4);
^
select 1 a, row_number() over (partition by 'a') union all (select 1 a , 2 b);
a | row_number
---+------------
1 | 2
1 | 1
(2 rows)
-- This should preserve domain types
select pg_typeof(a) from (select 'a'::information_schema.sql_identifier a union all
select 'b'::information_schema.sql_identifier)a;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation
pg_typeof
-----------------------------------
information_schema.sql_identifier
information_schema.sql_identifier
(2 rows)
(select * from (
(select '1' as a union select null)
union
(select 1 union select distinct null::integer)
)s)
union
(select * from (
(select '1' union select null)
union
(select 1 union select distinct null::integer)
)s2);
a
---
1
(2 rows)
-- Yet, we keep behaviors on text-like columns
select pg_typeof(a) from(select 'foo' a union select 'foo'::name)s;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation
pg_typeof
-----------
name
(1 row)
select pg_typeof(a) from(select 1 x, 'foo' a union
select 1, 'foo' union select 1, 'foo'::name)s;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation
pg_typeof
-----------
text
(1 row)
select pg_typeof(a) from(select 1 x, 'foo' a union
(select 1, 'foo' union select 1, 'foo'::name))s;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation
pg_typeof
-----------
name
(1 row)
CREATE TABLE union_ctas (a, b) AS SELECT 1, 2 UNION SELECT 1, 1 UNION SELECT 1, 1;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
SELECT * FROM union_ctas;
a | b
---+---
1 | 1
1 | 2
(2 rows)
DROP TABLE union_ctas;
-- MPP-21075: push quals below union
CREATE TABLE union_quals1 (a, b) AS SELECT i, i%2 from generate_series(1,10) i;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE union_quals2 (a, b) AS SELECT i%2, i from generate_series(1,10) i;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
SELECT * FROM (SELECT a, b from union_quals1 UNION SELECT b, a from union_quals2) as foo(a,b) where a > b order by a;
a | b
----+---
2 | 0
3 | 1
4 | 0
5 | 1
6 | 0
7 | 1
8 | 0
9 | 1
10 | 0
(9 rows)
SELECT * FROM (SELECT a, max(b) over() from union_quals1 UNION SELECT * from union_quals2) as foo(a,b) where b > 6 order by a,b;
a | b
---+----
0 | 8
0 | 10
1 | 7
1 | 9
(4 rows)
-- MPP-22266: different combinations of set operations and distinct
select * from ((select 1, 'A' from (select distinct 'B') as foo) union (select 1, 'C')) as bar;
?column? | ?column?
----------+----------
1 | A
1 | C
(2 rows)
select 1 union (select distinct null::integer union select '10');
?column?
----------
1
10
(3 rows)
select 1 union (select 2 from (select distinct null::integer union select 1) as x);
?column?
----------
1
2
(2 rows)
select 1 union (select distinct 10 from (select 1, 3.0 union select distinct 2, null::integer) as foo);
?column?
----------
1
10
(2 rows)
select 1 union (select distinct '10' from (select 1, 3.0 union select distinct 2, null::integer) as foo);
ERROR: UNION types integer and text cannot be matched
LINE 1: select 1 union (select distinct '10' from (select 1, 3.0 uni...
^
select distinct a from (select 'A' union select 'B') as foo(a);
a
---
A
B
(2 rows)
select distinct a from (select distinct 'A' union select 'B') as foo(a);
a
---
A
B
(2 rows)
select distinct a from (select distinct 'A' union select distinct 'B') as foo(a);
a
---
A
B
(2 rows)
select distinct a from (select 'A' from (select distinct 'C' ) as bar union select distinct 'B') as foo(a);
a
---
A
B
(2 rows)
select distinct a from (select distinct 'A' from (select distinct 'C' ) as bar union select distinct 'B') as foo(a);
a
---
A
B
(2 rows)
select distinct a from (select distinct 'A' from (select 'C' from (select distinct 'D') as bar1 ) as bar union select distinct 'B') as foo(a);
a
---
A
B
(2 rows)
-- Test case where input to one branch of UNION resides on a single segment, and another on the QE.
-- The external table resides on QD, and the LIMIT on the test1 table forces the plan to be focused
-- on a single QE.
--
CREATE TABLE test1 (id int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into test1 values (1);
CREATE EXTERNAL WEB TABLE test2 (id int) EXECUTE 'echo 2' ON COORDINATOR FORMAT 'csv';
(SELECT 'test1' as branch, id FROM test1 LIMIT 1)
union
(SELECT 'test2' as branch, id FROM test2);
branch | id
--------+----
test1 | 1
test2 | 2
(2 rows)
explain (SELECT 'test1' as branch, id FROM test1 LIMIT 1)
union
(SELECT 'test2' as branch, id FROM test2);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..984.78 rows=1125 width=12)
-> HashAggregate (cost=0.00..984.73 rows=375 width=12)
Group Key: ('test1'::text), test1.id
-> Append (cost=0.00..984.65 rows=334 width=12)
-> Redistribute Motion 1:3 (slice2) (cost=0.00..431.00 rows=1 width=12)
Hash Key: ('test1'::text), test1.id
-> GroupAggregate (cost=0.00..431.00 rows=1 width=12)
Group Key: ('test1'::text), test1.id
-> Sort (cost=0.00..431.00 rows=1 width=12)
Sort Key: ('test1'::text), test1.id
-> Limit (cost=0.00..431.00 rows=1 width=12)
-> Result (cost=0.00..431.00 rows=1 width=12)
-> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on test1 (cost=0.00..431.00 rows=1 width=4)
-> HashAggregate (cost=0.00..553.64 rows=334 width=12)
Group Key: ('test2'::text), test2.id
-> Redistribute Motion 3:3 (slice4; segments: 3) (cost=0.00..553.56 rows=334 width=12)
Hash Key: ('test2'::text), test2.id
-> Streaming HashAggregate (cost=0.00..553.55 rows=334 width=12)
Group Key: 'test2'::text, test2.id
-> Result (cost=0.00..471.53 rows=333334 width=12)
-> Redistribute Motion 1:3 (slice5) (cost=0.00..467.53 rows=333334 width=4)
-> Foreign Scan on test2 (cost=0.00..449.70 rows=1000000 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(24 rows)
--
-- Test pulling up distribution key expression, when the different branches
-- of a UNION ALL have different typmods.
--
create table pullup_distkey_test(
a character varying,
b character varying(30)
) distributed by (b);
insert into pullup_distkey_test values ('foo', 'bar');
with base as
(
select a, b from pullup_distkey_test
union all
select 'xx' as a, 'bar' as b
)
select a from base
union all
select a from base where a = 'foo';
a
-----
foo
xx
foo
(3 rows)
--
-- Test union all two replicated tables with different numsegments
--
create table rep2(c1 int, c2 int) distributed replicated;
create table rep3(c1 int, c2 int) distributed replicated;
set allow_system_table_mods = on;
update gp_distribution_policy set numsegments = 2
where localoid = 'rep2'::regclass;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables
select localoid::regclass, policytype, numsegments
from gp_distribution_policy
where localoid::regclass in ('rep2', 'rep3');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables
localoid | policytype | numsegments
----------+------------+-------------
rep3 | r | 3
rep2 | r | 2
(2 rows)
explain select * from rep2 union all select * from rep3;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
QUERY PLAN
---------------------------------------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1) (cost=1922.00..1922.00 rows=172200 width=8)
-> Append (cost=0.00..1922.00 rows=172200 width=8)
-> Seq Scan on rep2 (cost=0.00..961.00 rows=86100 width=8)
-> Seq Scan on rep3 (cost=0.00..961.00 rows=86100 width=8)
Optimizer: Postgres query optimizer
(5 rows)
select * from rep2 union all select * from rep3;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
c1 | c2
----+----
(0 rows)
reset allow_system_table_mods;
drop table rep2;
drop table rep3;
--
-- Setup
--
--start_ignore
DROP TABLE IF EXISTS T_a1 CASCADE;
NOTICE: table "t_a1" does not exist, skipping
DROP TABLE IF EXISTS T_b2 CASCADE;
NOTICE: table "t_b2" does not exist, skipping
DROP TABLE IF EXISTS T_random CASCADE;
NOTICE: table "t_random" does not exist, skipping
--end_ignore
CREATE TABLE T_a1 (a1 int, a2 int) DISTRIBUTED BY(a1);
INSERT INTO T_a1 SELECT i, i%5 from generate_series(1,10) i;
CREATE TABLE T_b2 (b1 int, b2 int) DISTRIBUTED BY(b2);
INSERT INTO T_b2 SELECT i, i%5 from generate_series(1,20) i;
CREATE TABLE T_random (c1 int, c2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO T_random SELECT i, i%5 from generate_series(1,30) i;
--start_ignore
create language plpython3u;
--end_ignore
create or replace function count_operator(query text, operator text) returns int as
$$
rv = plpy.execute('EXPLAIN ' + query)
search_text = operator
result = 0
for i in range(len(rv)):
cur_line = rv[i]['QUERY PLAN']
if search_text.lower() in cur_line.lower():
result = result+1
return result
$$
language plpython3u;
--
-- N-ary UNION ALL results
--
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select a1 from T_a1)
UNION ALL
(select b1 from T_b2)
UNION ALL
(select c1 from T_random)
UNION ALL
(select d1 from T_constant)
order by 1;
a1
-----
1
1
1
2
2
2
3
3
3
4
4
4
5
5
5
6
6
6
7
7
7
8
8
8
9
9
9
10
10
10
11
11
12
12
13
13
14
14
15
15
16
16
17
17
18
18
19
19
20
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(63 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select b1 from T_b2)
UNION ALL
(select a1 from T_a1)
UNION ALL
(select c1 from T_random)
UNION ALL
(select d1 from T_constant)
order by 1;
b1
-----
1
1
1
2
2
2
3
3
3
4
4
4
5
5
5
6
6
6
7
7
7
8
8
8
9
9
9
10
10
10
11
11
12
12
13
13
14
14
15
15
16
16
17
17
18
18
19
19
20
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(63 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select c1 from T_random)
UNION ALL
(select a1 from T_a1)
UNION ALL
(select b1 from T_b2)
UNION ALL
(select d1 from T_constant)
order by 1;
c1
-----
1
1
1
2
2
2
3
3
3
4
4
4
5
5
5
6
6
6
7
7
7
8
8
8
9
9
9
10
10
10
11
11
12
12
13
13
14
14
15
15
16
16
17
17
18
18
19
19
20
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(63 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select d1 from T_constant)
UNION ALL
(select c1 from T_random)
UNION ALL
(select a1 from T_a1)
UNION ALL
(select b1 from T_b2)
order by 1;
d1
-----
1
1
1
2
2
2
3
3
3
4
4
4
5
5
5
6
6
6
7
7
7
8
8
8
9
9
9
10
10
10
11
11
12
12
13
13
14
14
15
15
16
16
17
17
18
18
19
19
20
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(63 rows)
--
-- N-ary UNION ALL explain
--
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select a1 from T_a1)
UNION ALL
(select b1 from T_b2)
UNION ALL
(select c1 from T_random)
UNION ALL
(select d1 from T_constant)
order by 1;'
, 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select b1 from T_b2)
UNION ALL
(select a1 from T_a1)
UNION ALL
(select c1 from T_random)
UNION ALL
(select d1 from T_constant)
order by 1;'
, 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select c1 from T_random)
UNION ALL
(select a1 from T_a1)
UNION ALL
(select b1 from T_b2)
UNION ALL
(select d1 from T_constant)
order by 1;'
, 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select d1 from T_constant)
UNION ALL
(select c1 from T_random)
UNION ALL
(select a1 from T_a1)
UNION ALL
(select b1 from T_b2)
order by 1;'
, 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
--
-- N-ary UNION results
--
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select a1 from T_a1)
UNION
(select b1 from T_b2)
UNION
(select c1 from T_random)
UNION
(select d1 from T_constant)
order by 1;
a1
-----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(33 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select b1 from T_b2)
UNION
(select a1 from T_a1)
UNION
(select c1 from T_random)
UNION ALL
(select d1 from T_constant)
order by 1;
b1
-----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(33 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select c1 from T_random)
UNION
(select a1 from T_a1)
UNION
(select b1 from T_b2)
UNION ALL
(select d1 from T_constant)
order by 1;
c1
-----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(33 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select d1 from T_constant)
UNION ALL
(select c1 from T_random)
UNION
(select a1 from T_a1)
UNION
(select b1 from T_b2)
order by 1;
d1
-----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(33 rows)
--
-- N-ary UNION explain
--
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select a1 from T_a1)
UNION
(select b1 from T_b2)
UNION
(select c1 from T_random)
UNION
(select d1 from T_constant)
order by 1;'
, 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select b1 from T_b2)
UNION
(select a1 from T_a1)
UNION
(select c1 from T_random)
UNION
(select d1 from T_constant)
order by 1;'
, 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select c1 from T_random)
UNION
(select a1 from T_a1)
UNION
(select b1 from T_b2)
UNION
(select d1 from T_constant)
order by 1;'
, 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select d1 from T_constant)
UNION
(select c1 from T_random)
UNION
(select a1 from T_a1)
UNION
(select b1 from T_b2)
order by 1;'
, 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
--
-- Binary UNION ALL results
--
(select a1 from T_a1) UNION ALL (select b1 from T_b2) order by 1;
a1
----
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
10
10
11
12
13
14
15
16
17
18
19
20
(30 rows)
(select b1 from T_b2) UNION ALL (select a1 from T_a1) order by 1;
b1
----
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
10
10
11
12
13
14
15
16
17
18
19
20
(30 rows)
(select a1 from T_a1) UNION ALL (select c1 from T_random) order by 1;
a1
----
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
10
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
(40 rows)
(select c1 from T_random) UNION ALL (select a1 from T_a1) order by 1;
c1
----
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
10
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
(40 rows)
(select * from T_a1) UNION ALL (select * from T_b2) order by 1;
a1 | a2
----+----
1 | 1
1 | 1
2 | 2
2 | 2
3 | 3
3 | 3
4 | 4
4 | 4
5 | 0
5 | 0
6 | 1
6 | 1
7 | 2
7 | 2
8 | 3
8 | 3
9 | 4
9 | 4
10 | 0
10 | 0
11 | 1
12 | 2
13 | 3
14 | 4
15 | 0
16 | 1
17 | 2
18 | 3
19 | 4
20 | 0
(30 rows)
(select * from T_a1) UNION ALL (select * from T_random) order by 1;
a1 | a2
----+----
1 | 1
1 | 1
2 | 2
2 | 2
3 | 3
3 | 3
4 | 4
4 | 4
5 | 0
5 | 0
6 | 1
6 | 1
7 | 2
7 | 2
8 | 3
8 | 3
9 | 4
9 | 4
10 | 0
10 | 0
11 | 1
12 | 2
13 | 3
14 | 4
15 | 0
16 | 1
17 | 2
18 | 3
19 | 4
20 | 0
21 | 1
22 | 2
23 | 3
24 | 4
25 | 0
26 | 1
27 | 2
28 | 3
29 | 4
30 | 0
(40 rows)
(select * from T_b2) UNION ALL (select * from T_random) order by 1;
b1 | b2
----+----
1 | 1
1 | 1
2 | 2
2 | 2
3 | 3
3 | 3
4 | 4
4 | 4
5 | 0
5 | 0
6 | 1
6 | 1
7 | 2
7 | 2
8 | 3
8 | 3
9 | 4
9 | 4
10 | 0
10 | 0
11 | 1
11 | 1
12 | 2
12 | 2
13 | 3
13 | 3
14 | 4
14 | 4
15 | 0
15 | 0
16 | 1
16 | 1
17 | 2
17 | 2
18 | 3
18 | 3
19 | 4
19 | 4
20 | 0
20 | 0
21 | 1
22 | 2
23 | 3
24 | 4
25 | 0
26 | 1
27 | 2
28 | 3
29 | 4
30 | 0
(50 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select a1 from T_a1) UNION ALL (select d1 from T_constant) order by 1;
a1
-----
1
2
3
4
5
6
7
8
9
10
100
200
300
(13 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select d1 from T_constant) UNION ALL (select a1 from T_a1) order by 1;
d1
-----
1
2
3
4
5
6
7
8
9
10
100
200
300
(13 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select c1 from T_random) UNION ALL (select d1 from T_constant) order by 1;
c1
-----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(33 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select d1 from T_constant) UNION ALL (select c1 from T_random) order by 1;
d1
-----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(33 rows)
--
-- Binary UNION ALL explain
--
select count_operator('(select a1 from T_a1) UNION ALL (select b1 from T_b2) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select b1 from T_b2) UNION ALL (select a1 from T_a1) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select a1 from T_a1) UNION ALL (select c1 from T_random) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select c1 from T_random) UNION ALL (select a1 from T_a1) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select * from T_a1) UNION ALL (select * from T_b2) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select * from T_a1) UNION ALL (select * from T_random) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select * from T_b2) UNION ALL (select * from T_random) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select a1 from T_a1) UNION ALL (select d1 from T_constant) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select d1 from T_constant) UNION ALL (select a1 from T_a1) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select c1 from T_random) UNION ALL (select d1 from T_constant) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('with T_constant (d1, d2) as(
SELECT 100, 100
UNION ALL SELECT 200, 200
UNION ALL SELECT 300, 300)
(select d1 from T_constant) UNION ALL (select c1 from T_random) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
--
-- Binary UNION results
--
(select a1 from T_a1) UNION (select b1 from T_b2) order by 1;
a1
----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
(20 rows)
(select b1 from T_b2) UNION (select a1 from T_a1) order by 1;
b1
----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
(20 rows)
(select a1 from T_a1) UNION (select c1 from T_random) order by 1;
a1
----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
(30 rows)
(select c1 from T_random) UNION (select a1 from T_a1) order by 1;
c1
----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
(30 rows)
(select * from T_a1) UNION (select * from T_b2) order by 1;
a1 | a2
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 0
6 | 1
7 | 2
8 | 3
9 | 4
10 | 0
11 | 1
12 | 2
13 | 3
14 | 4
15 | 0
16 | 1
17 | 2
18 | 3
19 | 4
20 | 0
(20 rows)
(select * from T_a1) UNION (select * from T_random) order by 1;
a1 | a2
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 0
6 | 1
7 | 2
8 | 3
9 | 4
10 | 0
11 | 1
12 | 2
13 | 3
14 | 4
15 | 0
16 | 1
17 | 2
18 | 3
19 | 4
20 | 0
21 | 1
22 | 2
23 | 3
24 | 4
25 | 0
26 | 1
27 | 2
28 | 3
29 | 4
30 | 0
(30 rows)
(select * from T_b2) UNION (select * from T_random) order by 1;
b1 | b2
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 0
6 | 1
7 | 2
8 | 3
9 | 4
10 | 0
11 | 1
12 | 2
13 | 3
14 | 4
15 | 0
16 | 1
17 | 2
18 | 3
19 | 4
20 | 0
21 | 1
22 | 2
23 | 3
24 | 4
25 | 0
26 | 1
27 | 2
28 | 3
29 | 4
30 | 0
(30 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select a1 from T_a1) UNION (select d1 from T_constant) order by 1;
a1
-----
1
2
3
4
5
6
7
8
9
10
100
200
300
(13 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select d1 from T_constant) UNION (select a1 from T_a1) order by 1;
d1
-----
1
2
3
4
5
6
7
8
9
10
100
200
300
(13 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select c1 from T_random) UNION (select d1 from T_constant) order by 1;
c1
-----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(33 rows)
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select d1 from T_constant) UNION (select c1 from T_random) order by 1;
d1
-----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
100
200
300
(33 rows)
--
-- Binary UNION explain
--
select count_operator('(select a1 from T_a1) UNION (select b1 from T_b2) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select b1 from T_b2) UNION (select a1 from T_a1) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select a1 from T_a1) UNION (select c1 from T_random) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select c1 from T_random) UNION (select a1 from T_a1) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select * from T_a1) UNION (select * from T_b2) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select * from T_a1) UNION (select * from T_random) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('(select * from T_b2) UNION (select * from T_random) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
1
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select a1 from T_a1) UNION (select d1 from T_constant) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select d1 from T_constant) UNION (select a1 from T_a1) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('
with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select c1 from T_random) UNION (select d1 from T_constant) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
select count_operator('with T_constant (d1, d2) as(
SELECT 100, 100
UNION SELECT 200, 200
UNION SELECT 300, 300)
(select d1 from T_constant) UNION (select c1 from T_random) order by 1;', 'APPEND');
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
count_operator
----------------
2
(1 row)
CREATE TABLE t1_setop(a int) DISTRIBUTED BY (a);
CREATE TABLE t2_setop(a int) DISTRIBUTED BY (a);
INSERT INTO t1_setop VALUES (1), (2), (3);
INSERT INTO t2_setop VALUES (3), (4), (5);
(SELECT a FROM t1_setop EXCEPT SELECT a FROM t2_setop ORDER BY a)
UNION
(SELECT a FROM t2_setop EXCEPT SELECT a FROM t1_setop ORDER BY a)
ORDER BY a;
a
---
1
2
4
5
(4 rows)
create table t1_ncols(a int, b int, c text, d date) distributed by (a);
create table t2_ncols(a smallint, b bigint, c varchar(20), d date) distributed by (c, b)
partition by range (a) (start (0) end (8) every (4));
create view v1_ncols(id, a, b, c, d) as select 1,* from t1_ncols union all select 2,* from t2_ncols;
insert into t1_ncols values (1, 11, 'one', '2001-01-01');
insert into t2_ncols values (2, 22, 'two', '2002-02-02');
insert into t2_ncols values (4, 44, 'four','2004-04-04');
select b from t1_ncols union all select a from t2_ncols;
b
----
4
2
11
(3 rows)
select a+100, b, d from t1_ncols union select b, a+200, d from t2_ncols order by 1;
?column? | b | d
----------+-----+------------
22 | 202 | 02-02-2002
44 | 204 | 04-04-2004
101 | 11 | 01-01-2001
(3 rows)
select c, a from v1_ncols;
c | a
------+---
one | 1
four | 4
two | 2
(3 rows)
with cte1(aa, b, c, d) as (select a*100, b, c, d from t1_ncols union select * from t2_ncols)
select x.aa/100 aaa, x.c, y.c from cte1 x join cte1 y on x.aa=y.aa;
aaa | c | c
-----+------+------
0 | two | two
0 | four | four
1 | one | one
(3 rows)
select from t2_ncols union select * from t2_ncols;
ERROR: each UNION query must have the same number of columns
LINE 1: select from t2_ncols union select * from t2_ncols;
^
-- Test the result of union of 2 tables distributed on different number of segments
-- start_ignore
drop schema if exists union_schema CASCADE;
NOTICE: schema "union_schema" does not exist, skipping
-- end_ignore
create schema union_schema;
create table union_schema.t1(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table union_schema.t2(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table union_schema.t3(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
set allow_system_table_mods = on;
update gp_distribution_policy set numsegments = 1
where localoid = 'union_schema.t1'::regclass::oid;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables
update gp_distribution_policy set numsegments = 2
where localoid = 'union_schema.t2'::regclass::oid;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables
select relname, policytype, numsegments, distkey
from pg_class, gp_distribution_policy, pg_namespace ns
where pg_class.oid = localoid and relnamespace = ns.oid
and nspname = 'union_schema'
and relname in ('t1', 't2', 't3')
order by relname;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation
relname | policytype | numsegments | distkey
---------+------------+-------------+---------
t1 | p | 1 | 1
t2 | p | 2 | 1
t3 | p | 3 | 1
(3 rows)
insert into union_schema.t1 select i, i from generate_series(1,10)i;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
insert into union_schema.t2 select i, i from generate_series(1,20)i;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
analyze union_schema.t1;
analyze union_schema.t2;
explain select * from union_schema.t1 join union_schema.t2
on union_schema.t1.a = union_schema.t2.b;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
QUERY PLAN
-----------------------------------------------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1) (cost=1.23..3.00 rows=10 width=16)
-> Hash Join (cost=1.23..2.80 rows=10 width=16)
Hash Cond: (t2.b = t1.a)
-> Redistribute Motion 2:1 (slice2; segments: 2) (cost=0.00..1.40 rows=20 width=8)
Hash Key: t2.b
-> Seq Scan on t2 (cost=0.00..1.10 rows=10 width=8)
-> Hash (cost=1.10..1.10 rows=10 width=8)
-> Seq Scan on t1 (cost=0.00..1.10 rows=10 width=8)
Optimizer: Postgres query optimizer
(9 rows)
explain select union_schema.t1.a, union_schema.t2.b
from union_schema.t1 join union_schema.t2
on union_schema.t1.a = union_schema.t2.b
union all
select * from union_schema.t3;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=1.23..1472.30 rows=86130 width=8)
-> Append (cost=1.23..323.90 rows=28710 width=8)
-> Hash Join (cost=1.23..2.80 rows=10 width=8)
Hash Cond: (t2.b = t1.a)
-> Redistribute Motion 2:3 (slice2; segments: 2) (cost=0.00..1.40 rows=20 width=4)
Hash Key: t2.b
Hash Module: 1
-> Seq Scan on t2 (cost=0.00..1.10 rows=10 width=4)
-> Hash (cost=1.10..1.10 rows=10 width=4)
-> Seq Scan on t1 (cost=0.00..1.10 rows=10 width=4)
-> Seq Scan on t3 (cost=0.00..321.00 rows=28700 width=8)
Optimizer: Postgres query optimizer
(12 rows)
select * from union_schema.t1 join union_schema.t2
on union_schema.t1.a = union_schema.t2.b;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
a | b | a | b
----+----+----+----
2 | 2 | 2 | 2
3 | 3 | 3 | 3
4 | 4 | 4 | 4
6 | 6 | 6 | 6
7 | 7 | 7 | 7
8 | 8 | 8 | 8
9 | 9 | 9 | 9
10 | 10 | 10 | 10
1 | 1 | 1 | 1
5 | 5 | 5 | 5
(10 rows)
select union_schema.t1.a, union_schema.t2.b
from union_schema.t1 join union_schema.t2
on union_schema.t1.a = union_schema.t2.b
union all
select * from union_schema.t3;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
a | b
----+----
2 | 2
3 | 3
4 | 4
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
1 | 1
5 | 5
(10 rows)
truncate union_schema.t1, union_schema.t2;
insert into union_schema.t1 select i, i from generate_series(1,20)i;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
insert into union_schema.t2 select i, i from generate_series(1,10)i;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
analyze union_schema.t1;
analyze union_schema.t2;
explain select * from union_schema.t1 join union_schema.t2
on union_schema.t1.a = union_schema.t2.b;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1) (cost=1.32..2.90 rows=10 width=16)
-> Hash Join (cost=1.32..2.70 rows=10 width=16)
Hash Cond: (t1.a = t2.b)
-> Seq Scan on t1 (cost=0.00..1.20 rows=20 width=8)
-> Hash (cost=1.20..1.20 rows=10 width=8)
-> Redistribute Motion 2:1 (slice2; segments: 2) (cost=0.00..1.20 rows=10 width=8)
Hash Key: t2.b
-> Seq Scan on t2 (cost=0.00..1.05 rows=5 width=8)
Optimizer: Postgres query optimizer
(9 rows)
explain select union_schema.t1.a, union_schema.t2.b
from union_schema.t1 join union_schema.t2
on union_schema.t1.a = union_schema.t2.b
union all
select * from union_schema.t3;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=1.32..1472.20 rows=86130 width=8)
-> Append (cost=1.32..323.80 rows=28710 width=8)
-> Hash Join (cost=1.32..2.70 rows=10 width=8)
Hash Cond: (t1.a = t2.b)
-> Seq Scan on t1 (cost=0.00..1.20 rows=20 width=4)
-> Hash (cost=1.20..1.20 rows=10 width=4)
-> Redistribute Motion 2:3 (slice2; segments: 2) (cost=0.00..1.20 rows=10 width=4)
Hash Key: t2.b
Hash Module: 1
-> Seq Scan on t2 (cost=0.00..1.05 rows=5 width=4)
-> Seq Scan on t3 (cost=0.00..321.00 rows=28700 width=8)
Optimizer: Postgres query optimizer
(12 rows)
select * from union_schema.t1 join union_schema.t2
on union_schema.t1.a = union_schema.t2.b;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
a | b | a | b
----+----+----+----
1 | 1 | 1 | 1
2 | 2 | 2 | 2
3 | 3 | 3 | 3
4 | 4 | 4 | 4
5 | 5 | 5 | 5
6 | 6 | 6 | 6
7 | 7 | 7 | 7
8 | 8 | 8 | 8
9 | 9 | 9 | 9
10 | 10 | 10 | 10
(10 rows)
select union_schema.t1.a, union_schema.t2.b
from union_schema.t1 join union_schema.t2
on union_schema.t1.a = union_schema.t2.b
union all
select * from union_schema.t3;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Unknown error: Partially Distributed Data
a | b
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
reset allow_system_table_mods;
-- The following tests demonstrate the plan alternative
-- where ORCA requests union all's outer child to become
-- a Non-Singleton. This alternative is at times costed
-- lower than aligning the inner child's distribution spec
-- with that delivered by the outer child.
--
-- Replicated ∪ Hashed
create table rep (a int) distributed replicated;
insert into rep select i from generate_series (1, 10) i;
create table dist (a int);
insert into dist select i from generate_series (1, 1000) i;
analyze dist;
analyze rep;
-- It's more cost-effective to apply a duplicate-sensitive
-- random motion (non-phyiscal) on a replicated table,
-- than a broadcast motion on a distributed table.
explain select a from rep union all select a from dist;
QUERY PLAN
---------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.03 rows=1010 width=4)
-> Append (cost=0.00..862.01 rows=337 width=4)
-> Result (cost=0.00..431.00 rows=4 width=4)
One-Time Filter: (gp_execution_segment() = 2)
-> Seq Scan on rep (cost=0.00..431.00 rows=10 width=4)
-> Seq Scan on dist (cost=0.00..431.01 rows=334 width=4)
Optimizer: GPORCA
(7 rows)
-- Universal ∪ Random
create table rand (a int) distributed randomly;
insert into rand select i from generate_series (1, 10000) i;
analyze rand;
-- It's more cost-effective to apply a duplicate-sensitive
-- random motion (non-physical) on a universal TVF, than a
-- gather motion on a randomly distributed table.
explain select i from generate_series(1,1000) i union all select a from rand;
QUERY PLAN
----------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.28 rows=11000 width=4)
-> Append (cost=0.00..431.12 rows=3667 width=4)
-> Result (cost=0.00..0.01 rows=334 width=4)
One-Time Filter: (gp_execution_segment() = 2)
-> Function Scan on generate_series (cost=0.00..0.00 rows=334 width=4)
-> Seq Scan on rand (cost=0.00..431.06 rows=3334 width=4)
Optimizer: GPORCA
(7 rows)
-------------------------------------------------------------------------------
--Test case to check parallel union all with 'json' type 1st column in project list
-------------------------------------------------------------------------------
set optimizer_parallel_union to on;
drop table if exists my_table;
NOTICE: table "my_table" does not exist, skipping
create table my_table ( id serial primary key, json_data json);
insert into my_table (json_data) values ('{"name": "Name1", "age": 10}');
insert into my_table (json_data) values ('{"name": "Name2", "age": 20}');
insert into my_table (json_data) values ('{"name": "Name3", "age": 30}');
insert into my_table (json_data) values ('{"name": "Name4", "age": 40}');
explain select json_data from my_table where json_data->>'age' = '30' union all select json_data from my_table where json_data->>'age' = '40' ;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=8)
-> Append (cost=0.00..431.00 rows=1 width=8)
-> Result (cost=0.00..431.00 rows=1 width=8)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8)
Hash Key: my_table.id
-> Seq Scan on my_table (cost=0.00..431.00 rows=1 width=12)
Filter: ((json_data ->> 'age'::text) = '30'::text)
-> Result (cost=0.00..431.00 rows=1 width=8)
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=8)
Hash Key: my_table_1.id
-> Seq Scan on my_table my_table_1 (cost=0.00..431.00 rows=1 width=12)
Filter: ((json_data ->> 'age'::text) = '40'::text)
Optimizer: GPORCA
(13 rows)
select json_data from my_table where json_data->>'age' = '30' union all select json_data from my_table where json_data->>'age' = '40' ;
json_data
------------------------------
{"name": "Name3", "age": 30}
{"name": "Name4", "age": 40}
(2 rows)
explain select json_data,id from my_table where json_data->>'age' = '30' union all select json_data,id from my_table where json_data->>'age' = '40' ;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=12)
-> Append (cost=0.00..431.00 rows=1 width=12)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=12)
Hash Key: my_table.id
-> Seq Scan on my_table (cost=0.00..431.00 rows=1 width=12)
Filter: ((json_data ->> 'age'::text) = '30'::text)
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=12)
Hash Key: my_table_1.id
-> Seq Scan on my_table my_table_1 (cost=0.00..431.00 rows=1 width=12)
Filter: ((json_data ->> 'age'::text) = '40'::text)
Optimizer: GPORCA
(11 rows)
select json_data,id from my_table where json_data->>'age' = '30' union all select json_data,id from my_table where json_data->>'age' = '40' ;
json_data | id
------------------------------+----
{"name": "Name3", "age": 30} | 3
{"name": "Name4", "age": 40} | 4
(2 rows)
set optimizer_parallel_union to off;
drop table if exists my_table;
--
-- Clean up
--
DROP TABLE IF EXISTS T_a1 CASCADE;
DROP TABLE IF EXISTS T_b2 CASCADE;
DROP TABLE IF EXISTS T_random CASCADE;
DROP VIEW IF EXISTS v1_ncols CASCADE;
DROP TABLE IF EXISTS t1_ncols CASCADE;
DROP TABLE IF EXISTS t2_ncols CASCADE;
DROP SCHEMA IF EXISTS union_schema CASCADE;
NOTICE: drop cascades to 3 other objects
DETAIL: drop cascades to table union_schema.t1
drop cascades to table union_schema.t2
drop cascades to table union_schema.t3