blob: 24ee61ff642e2349ea136fe83e41062e92f6f06a [file] [log] [blame]
-- start_matchsubs
-- m/ERROR: could not devise a plan \(cdbpath.c:.*\)/
-- s/ERROR: could not devise a plan \(cdbpath.c:.*\)/ERROR: could not devise a plan/
-- m/ERROR: could not devise a plan\. \(cdbpath.c:.*\)/
-- s/ERROR: could not devise a plan\. \(cdbpath.c:.*\)/ERROR: could not devise a plan \. /
-- end_matchsubs
--
-- Basic tests for replicated table
--
create schema rpt;
set search_path to rpt;
---------
-- INSERT
---------
create table foo (x int, y int) distributed replicated;
create table foo1(like foo) distributed replicated;
create table bar (like foo) distributed randomly;
create table bar1 (like foo) distributed by (x);
-- values --> replicated table
-- random partitioned table --> replicated table
-- hash partitioned table --> replicated table
-- singleQE --> replicated table
-- replicated --> replicated table
insert into bar values (1, 1), (3, 1);
insert into bar1 values (1, 1), (3, 1);
insert into foo1 values (1, 1), (3, 1);
insert into foo select * from bar;
insert into foo select * from bar1;
insert into foo select * from bar order by x limit 1;
insert into foo select * from foo;
select * from foo order by x;
x | y
---+---
1 | 1
1 | 1
1 | 1
1 | 1
1 | 1
1 | 1
3 | 1
3 | 1
3 | 1
3 | 1
(10 rows)
select bar.x, bar.y from bar, (select * from foo) as t1 order by 1,2;
x | y
---+---
1 | 1
1 | 1
1 | 1
1 | 1
1 | 1
1 | 1
1 | 1
1 | 1
1 | 1
1 | 1
3 | 1
3 | 1
3 | 1
3 | 1
3 | 1
3 | 1
3 | 1
3 | 1
3 | 1
3 | 1
(20 rows)
select bar.x, bar.y from bar, (select * from foo order by x limit 1) as t1 order by 1,2;
x | y
---+---
1 | 1
3 | 1
(2 rows)
truncate foo;
truncate foo1;
truncate bar;
truncate bar1;
-- replicated table --> random partitioned table
-- replicated table --> hash partitioned table
insert into foo values (1, 1), (3, 1);
insert into bar select * from foo order by x limit 1;
insert into bar1 select * from foo order by x limit 1;
select * from foo order by x;
x | y
---+---
1 | 1
3 | 1
(2 rows)
select * from bar order by x;
x | y
---+---
1 | 1
(1 row)
select * from bar1 order by x;
x | y
---+---
1 | 1
(1 row)
drop table if exists foo;
drop table if exists foo1;
drop table if exists bar;
drop table if exists bar1;
--
-- CREATE UNIQUE INDEX
--
-- create unique index on non-distributed key.
create table foo (x int, y int) distributed replicated;
create table bar (x int, y int) distributed randomly;
-- success
create unique index foo_idx on foo (y);
-- should fail
create unique index bar_idx on bar (y);
ERROR: UNIQUE and DISTRIBUTED RANDOMLY are incompatible
drop table if exists foo;
drop table if exists bar;
--
-- CREATE TABLE with both PRIMARY KEY and UNIQUE constraints
--
create table foo (id int primary key, name text unique) distributed replicated;
-- success
insert into foo values (1,'aaa');
insert into foo values (2,'bbb');
-- fail
insert into foo values (1,'ccc');
ERROR: duplicate key value violates unique constraint "foo_pkey" (seg0 192.168.99.102:25432 pid=22681)
DETAIL: Key (id)=(1) already exists.
insert into foo values (3,'aaa');
ERROR: duplicate key value violates unique constraint "foo_name_key" (seg2 192.168.99.102:25434 pid=22683)
DETAIL: Key (name)=(aaa) already exists.
drop table if exists foo;
--
-- CREATE TABLE
--
--
-- Like
CREATE TABLE parent (
name text,
age int4,
location point
) distributed replicated;
CREATE TABLE child (like parent) distributed replicated;
CREATE TABLE child1 (like parent) DISTRIBUTED BY (name);
CREATE TABLE child2 (like parent);
NOTICE: table doesn't have 'DISTRIBUTED BY' clause, defaulting to distribution columns from LIKE table
-- should be replicated table
\d child
Table "rpt.child"
Column | Type | Collation | Nullable | Default
----------+---------+-----------+----------+---------
name | text | | |
age | integer | | |
location | point | | |
Distributed Replicated
-- should distributed by name
\d child1
Table "rpt.child1"
Column | Type | Collation | Nullable | Default
----------+---------+-----------+----------+---------
name | text | | |
age | integer | | |
location | point | | |
Distributed by: (name)
-- should be replicated table
\d child2
Table "rpt.child2"
Column | Type | Collation | Nullable | Default
----------+---------+-----------+----------+---------
name | text | | |
age | integer | | |
location | point | | |
Distributed Replicated
drop table if exists parent;
drop table if exists child;
drop table if exists child1;
drop table if exists child2;
-- Inherits
CREATE TABLE parent_rep (
name text,
age int4,
location point
) distributed replicated;
CREATE TABLE parent_part (
name text,
age int4,
location point
) distributed by (name);
-- inherits from a replicated table, should fail
CREATE TABLE child (
salary int4,
manager name
) INHERITS (parent_rep);
ERROR: cannot inherit from replicated table "parent_rep" to create table "child"
DETAIL: An inheritance hierarchy cannot contain a mixture of distributed and non-distributed tables.
-- replicated table can not have parents, should fail
CREATE TABLE child (
salary int4,
manager name
) INHERITS (parent_part) DISTRIBUTED REPLICATED;
ERROR: INHERITS clause cannot be used with DISTRIBUTED REPLICATED clause
drop table if exists parent_rep;
drop table if exists parent_part;
drop table if exists child;
NOTICE: table "child" does not exist, skipping
--
-- CTAS
--
-- CTAS from generate_series
create table foo as select i as c1, i as c2
from generate_series(1,3) i distributed replicated;
-- CTAS from replicated table
create table bar as select * from foo distributed replicated;
select * from bar;
c1 | c2
----+----
1 | 1
2 | 2
3 | 3
(3 rows)
drop table if exists foo;
drop table if exists bar;
-- CTAS from partition table table
create table foo as select i as c1, i as c2
from generate_series(1,3) i;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table bar as select * from foo distributed replicated;
select * from bar;
c1 | c2
----+----
1 | 1
2 | 2
3 | 3
(3 rows)
drop table if exists foo;
drop table if exists bar;
-- CTAS from singleQE
create table foo as select i as c1, i as c2
from generate_series(1,3) i;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select * from foo;
c1 | c2
----+----
1 | 1
2 | 2
3 | 3
(3 rows)
create table bar as select * from foo order by c1 limit 1 distributed replicated;
select * from bar;
c1 | c2
----+----
1 | 1
(1 row)
drop table if exists foo;
drop table if exists bar;
-- Create view can work
create table foo(x int, y int) distributed replicated;
insert into foo values(1,1);
create view v_foo as select * from foo;
select * from v_foo;
x | y
---+---
1 | 1
(1 row)
drop view v_foo;
drop table if exists foo;
---------
-- Alter
--------
-- Drop distributed key column
create table foo(x int, y int) distributed replicated;
create table bar(like foo) distributed by (x);
insert into foo values(1,1);
insert into bar values(1,1);
-- success
alter table foo drop column x;
-- fail
alter table bar drop column x;
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
drop table if exists foo;
drop table if exists foo1;
NOTICE: table "foo1" does not exist, skipping
drop table if exists bar;
drop table if exists bar1;
NOTICE: table "bar1" does not exist, skipping
-- Alter gp_distribution_policy
create table foo(x int, y int) distributed replicated;
create table foo1(x int, y int) distributed replicated;
create table bar(x int, y int) distributed by (x);
create table bar1(x int, y int) distributed randomly;
insert into foo select i,i from generate_series(1,10) i;
insert into foo1 select i,i from generate_series(1,10) i;
insert into bar select i,i from generate_series(1,10) i;
insert into bar1 select i,i from generate_series(1,10) i;
-- alter distribution policy of replicated table
alter table foo set distributed by (x);
alter table foo1 set distributed randomly;
-- alter a partitioned table to replicated table
alter table bar set distributed replicated;
alter table bar1 set distributed replicated;
-- verify the new policies
\d foo
Table "rpt.foo"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
x | integer | | |
y | integer | | |
Distributed by: (x)
\d foo1
Table "rpt.foo1"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
x | integer | | |
y | integer | | |
Distributed randomly
\d bar
Table "rpt.bar"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
x | integer | | |
y | integer | | |
Distributed Replicated
\d bar1
Table "rpt.bar1"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
x | integer | | |
y | integer | | |
Distributed Replicated
-- verify the reorganized data
select * from foo;
x | y
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
select * from foo1;
x | y
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
select * from bar;
x | y
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
select * from bar1;
x | y
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
-- alter back
alter table foo set distributed replicated;
alter table foo1 set distributed replicated;
alter table bar set distributed by (x);
alter table bar1 set distributed randomly;
-- verify the policies again
\d foo
Table "rpt.foo"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
x | integer | | |
y | integer | | |
Distributed Replicated
\d foo1
Table "rpt.foo1"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
x | integer | | |
y | integer | | |
Distributed Replicated
\d bar
Table "rpt.bar"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
x | integer | | |
y | integer | | |
Distributed by: (x)
\d bar1
Table "rpt.bar1"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
x | integer | | |
y | integer | | |
Distributed randomly
-- verify the reorganized data again
select * from foo;
x | y
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
select * from foo1;
x | y
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
select * from bar;
x | y
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
select * from bar1;
x | y
----+----
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
drop table if exists foo;
drop table if exists foo1;
drop table if exists bar;
drop table if exists bar1;
---------
-- UPDATE / DELETE
---------
create table foo(x int, y int) distributed replicated;
create table bar(x int, y int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into foo values (1, 1), (2, 1);
insert into bar values (1, 2), (2, 2);
update foo set y = 2 where y = 1;
select * from foo;
x | y
---+---
1 | 2
2 | 2
(2 rows)
update foo set y = 1 from bar where bar.y = foo.y;
select * from foo;
x | y
---+---
1 | 1
2 | 1
(2 rows)
delete from foo where y = 1;
select * from foo;
x | y
---+---
(0 rows)
-- Test replicate table within init plan
insert into foo values (1, 1), (2, 1);
select * from bar where exists (select * from foo);
x | y
---+---
2 | 2
1 | 2
(2 rows)
------
-- Test Current Of is disabled for replicated table
------
begin;
declare c1 cursor for select * from foo;
fetch 1 from c1;
x | y
---+---
1 | 1
(1 row)
delete from foo where current of c1;
ERROR: "foo" is not simply updatable
abort;
begin;
declare c1 cursor for select * from foo;
fetch 1 from c1;
x | y
---+---
1 | 1
(1 row)
update foo set y = 1 where current of c1;
ERROR: "foo" is not simply updatable
abort;
-----
-- Test updatable view works for replicated table
----
truncate foo;
truncate bar;
insert into foo values (1, 1);
insert into foo values (2, 2);
insert into bar values (1, 1);
create view v_foo as select * from foo where y = 1;
begin;
update v_foo set y = 2;
select * from gp_dist_random('foo');
x | y
---+---
2 | 2
1 | 2
2 | 2
1 | 2
2 | 2
1 | 2
(6 rows)
abort;
update v_foo set y = 3 from bar where bar.y = v_foo.y;
select * from gp_dist_random('foo');
x | y
---+---
2 | 2
1 | 3
2 | 2
1 | 3
2 | 2
1 | 3
(6 rows)
-- Test gp_segment_id for replicated table
-- gp_segment_id is ambiguous for replicated table, it's been disabled now.
create table baz (c1 int, c2 int) distributed replicated;
create table qux (c1 int, c2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select gp_segment_id from baz;
ERROR: column "gp_segment_id" does not exist
LINE 1: select gp_segment_id from baz;
^
select xmin from baz;
ERROR: column "xmin" does not exist
LINE 1: select xmin from baz;
^
select xmax from baz;
ERROR: column "xmax" does not exist
LINE 1: select xmax from baz;
^
select ctid from baz;
ERROR: column "ctid" does not exist
LINE 1: select ctid from baz;
^
select * from baz where c2 = gp_segment_id;
ERROR: column "gp_segment_id" does not exist
LINE 1: select * from baz where c2 = gp_segment_id;
^
select * from baz, qux where baz.c1 = gp_segment_id;
c1 | c2 | c1 | c2
----+----+----+----
(0 rows)
update baz set c2 = gp_segment_id;
ERROR: column "gp_segment_id" does not exist
LINE 1: update baz set c2 = gp_segment_id;
^
update baz set c2 = 1 where gp_segment_id = 1;
ERROR: column "gp_segment_id" does not exist
LINE 1: update baz set c2 = 1 where gp_segment_id = 1;
^
update baz set c2 = 1 from qux where gp_segment_id = baz.c1;
insert into baz select i, i from generate_series(1, 1000) i;
vacuum baz;
vacuum full baz;
analyze baz;
-- Test dependencies check when alter table to replicated table
create view v_qux as select ctid from qux;
alter table qux set distributed replicated;
ERROR: cannot set distributed replicated because other object depend on its system columns
DETAIL: view v_qux depends on column ctid of table qux
HINT: system columns of replicated table will be exposed to users after altering, resolve dependencies first
drop view v_qux;
alter table qux set distributed replicated;
-- Test cursor for update also works for replicated table
create table cursor_update (c1 int, c2 int) distributed replicated;
insert into cursor_update select i, i from generate_series(1, 10) i;
begin;
declare c1 cursor for select * from cursor_update order by c2 for update;
fetch next from c1;
c1 | c2
----+----
1 | 1
(1 row)
end;
-- Test MinMax path on replicated table
create table minmaxtest (x int, y int) distributed replicated;
create index on minmaxtest (x);
insert into minmaxtest select generate_series(1, 10);
set enable_seqscan=off;
select min(x) from minmaxtest;
min
-----
1
(1 row)
-- Test replicated on partition table
-- should fail
CREATE TABLE foopart (a int4, b int4) DISTRIBUTED REPLICATED PARTITION BY RANGE (a) (START (1) END (10));
ERROR: PARTITION BY clause cannot be used with DISTRIBUTED REPLICATED clause
CREATE TABLE foopart (a int4, b int4) PARTITION BY RANGE (a) (START (1) END (10)) ;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
NOTICE: CREATE TABLE will create partition "foopart_1_prt_1" for table "foopart"
-- should fail
ALTER TABLE foopart SET DISTRIBUTED REPLICATED;
ERROR: can't set the distribution policy of a partition table to REPLICATED
ALTER TABLE foopart_1_prt_1 SET DISTRIBUTED REPLICATED;
ERROR: can't set the distribution policy of "foopart_1_prt_1"
HINT: Distribution policy can be set for an entire partitioned table, not for one of its leaf parts or an interior branch.
DROP TABLE foopart;
-- Test that replicated table can't inherit a parent table, and it also
-- can't be inherited by a child table.
-- 1. Replicated table can't inherit a parent table.
CREATE TABLE parent (t text) DISTRIBUTED BY (t);
-- This is not allowed: should fail
CREATE TABLE child () INHERITS (parent) DISTRIBUTED REPLICATED;
ERROR: INHERITS clause cannot be used with DISTRIBUTED REPLICATED clause
CREATE TABLE child (t text) DISTRIBUTED REPLICATED;
-- should fail
ALTER TABLE child INHERIT parent;
ERROR: Replicated table cannot inherit a parent
DROP TABLE child, parent;
-- 2. Replicated table can't be inherited
CREATE TABLE parent (t text) DISTRIBUTED REPLICATED;
-- should fail
CREATE TABLE child () INHERITS (parent) DISTRIBUTED REPLICATED;
ERROR: INHERITS clause cannot be used with DISTRIBUTED REPLICATED clause
CREATE TABLE child () INHERITS (parent) DISTRIBUTED BY (t);
ERROR: cannot inherit from replicated table "parent" to create table "child"
DETAIL: An inheritance hierarchy cannot contain a mixture of distributed and non-distributed tables.
CREATE TABLE child (t text) DISTRIBUTED REPLICATED;
ALTER TABLE child INHERIT parent;
ERROR: Replicated table cannot inherit a parent
CREATE TABLE child2(t text) DISTRIBUTED BY (t);
ALTER TABLE child2 INHERIT parent;
ERROR: Replicated table cannot be inherited
DROP TABLE child, child2, parent;
-- volatile replicated
-- General and segmentGeneral locus imply that if the corresponding
-- slice is executed in many different segments should provide the
-- same result data set. Thus, in some cases, General and segmentGeneral
-- can be treated like broadcast. But if the segmentGeneral and general
-- locus path contain volatile functions, they lose the property and
-- can only be treated as singleQE. The following cases are to check that
-- we correctly handle all these cases.
-- FIXME: ORCA does not consider this, we need to fix the cases when ORCA
-- consider this.
create table t_hashdist(a int, b int, c int) distributed by (a);
create table t_replicate_volatile(a int, b int, c int) distributed replicated;
---- pushed down filter
explain (costs off) select * from t_replicate_volatile, t_hashdist where t_replicate_volatile.a > random();
QUERY PLAN
----------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop
Join Filter: true
-> Seq Scan on t_hashdist
-> Materialize
-> Seq Scan on t_replicate_volatile
Filter: ((a)::double precision > random())
Optimizer: Pivotal Optimizer (GPORCA)
(8 rows)
-- join qual
explain (costs off) select * from t_hashdist, t_replicate_volatile x, t_replicate_volatile y where x.a + y.a > random();
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop
Join Filter: (((x.a + y.a))::double precision > random())
-> Nested Loop
Join Filter: true
-> Seq Scan on t_hashdist
-> Seq Scan on t_replicate_volatile y
-> Seq Scan on t_replicate_volatile x
Optimizer: Pivotal Optimizer (GPORCA)
(9 rows)
-- sublink & subquery
explain (costs off) select * from t_hashdist where a > All (select random() from t_replicate_volatile);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on t_hashdist
Filter: (SubPlan 1)
SubPlan 1
-> Result
Filter: ((CASE WHEN (sum((CASE WHEN ((t_hashdist.a)::double precision <= random()) THEN 1 ELSE 0 END)) IS NULL) THEN true WHEN (sum((CASE WHEN (random() IS NULL) THEN 1 ELSE 0 END)) > '0'::bigint) THEN NULL::boolean WHEN ((t_hashdist.a)::double precision IS NULL) THEN NULL::boolean WHEN (sum((CASE WHEN ((t_hashdist.a)::double precision <= random()) THEN 1 ELSE 0 END)) = '0'::bigint) THEN true ELSE false END) = true)
-> Aggregate
-> Seq Scan on t_replicate_volatile
Optimizer: Pivotal Optimizer (GPORCA)
(9 rows)
explain (costs off) select * from t_hashdist where a in (select random()::int from t_replicate_volatile);
QUERY PLAN
------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (t_hashdist.a = ((random())::integer))
-> Seq Scan on t_hashdist
-> Hash
-> Redistribute Motion 1:3 (slice2; segments: 1)
Hash Key: ((random())::integer)
-> Seq Scan on t_replicate_volatile
Optimizer: Pivotal Optimizer (GPORCA)
(9 rows)
-- subplan
explain (costs off, verbose) select * from t_hashdist left join t_replicate_volatile on t_hashdist.a > any (select random() from t_replicate_volatile);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: t_hashdist.a, t_hashdist.b, t_hashdist.c, t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c
-> Nested Loop Left Join
Output: t_hashdist.a, t_hashdist.b, t_hashdist.c, t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c
Join Filter: ((SubPlan 1))
-> Seq Scan on rpt.t_hashdist
Output: t_hashdist.a, t_hashdist.b, t_hashdist.c, (SubPlan 1)
SubPlan 1
-> Materialize
Output: (random())
-> Broadcast Motion 1:3 (slice2; segments: 1)
Output: (random())
-> Seq Scan on rpt.t_replicate_volatile t_replicate_volatile_1
Output: random()
-> Materialize
Output: t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c
-> Seq Scan on rpt.t_replicate_volatile
Output: t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c
Optimizer: Postgres query optimizer
Settings: enable_seqscan=off, optimizer=on
(20 rows)
-- targetlist
explain (costs off) select * from t_hashdist cross join (select random () from t_replicate_volatile)x;
QUERY PLAN
----------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop
Join Filter: true
-> Seq Scan on t_hashdist
-> Materialize
-> Seq Scan on t_replicate_volatile
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
explain (costs off) select * from t_hashdist cross join (select a, sum(random()) from t_replicate_volatile group by a) x;
QUERY PLAN
----------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop
Join Filter: true
-> Seq Scan on t_hashdist
-> GroupAggregate
Group Key: t_replicate_volatile.a
-> Sort
Sort Key: t_replicate_volatile.a
-> Seq Scan on t_replicate_volatile
Optimizer: Pivotal Optimizer (GPORCA)
(10 rows)
explain (costs off) select * from t_hashdist cross join (select random() as k, sum(a) from t_replicate_volatile group by k) x;
QUERY PLAN
------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop
Join Filter: true
-> HashAggregate
Group Key: (random())
-> Redistribute Motion 1:3 (slice3; segments: 1)
Hash Key: (random())
-> Seq Scan on t_replicate_volatile
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on t_hashdist
Optimizer: Pivotal Optimizer (GPORCA)
(12 rows)
explain (costs off) select * from t_hashdist cross join (select a, sum(b) as s from t_replicate_volatile group by a having sum(b) > random() order by a) x ;
QUERY PLAN
--------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop
Join Filter: true
-> Seq Scan on t_hashdist
-> Materialize
-> Result
Filter: (((sum(t_replicate_volatile.b)))::double precision > random())
-> GroupAggregate
Group Key: t_replicate_volatile.a
-> Sort
Sort Key: t_replicate_volatile.a
-> Seq Scan on t_replicate_volatile
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
-- insert
explain (costs off) insert into t_replicate_volatile select random() from t_replicate_volatile;
QUERY PLAN
---------------------------------------------------------------------------
Insert on t_replicate_volatile
-> Result
-> Broadcast Motion 1:3 (slice1; segments: 1)
-> Seq Scan on t_replicate_volatile t_replicate_volatile_1
Optimizer: Pivotal Optimizer (GPORCA)
(5 rows)
explain (costs off) insert into t_replicate_volatile select random(), a, a from generate_series(1, 10) a;
QUERY PLAN
----------------------------------------------------
Insert on t_replicate_volatile
-> Result
-> Broadcast Motion 1:3 (slice1)
-> Function Scan on generate_series
Optimizer: Pivotal Optimizer (GPORCA)
(5 rows)
create sequence seq_for_insert_replicated_table;
explain (costs off) insert into t_replicate_volatile select nextval('seq_for_insert_replicated_table');
QUERY PLAN
--------------------------------------------
Insert on t_replicate_volatile
-> Result
-> Broadcast Motion 1:3 (slice1)
-> Result
Optimizer: Pivotal Optimizer (GPORCA)
(5 rows)
explain (costs off) select a from t_replicate_volatile union all select * from nextval('seq_for_insert_replicated_table');
QUERY PLAN
-------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Append
-> Result
One-Time Filter: (gp_execution_segment() = 0)
-> Seq Scan on t_replicate_volatile
-> Redistribute Motion 1:3 (slice2)
-> Function Scan on nextval
Optimizer: GPORCA
(8 rows)
-- CTAS
explain (costs off) create table rpt_ctas as select random() from generate_series(1, 10) distributed replicated;
QUERY PLAN
___________
Result
-> Broadcast Motion 1:3 (slice1)
-> Function Scan on generate_series
GP_IGNORE:(4 rows)
explain (costs off) create table rpt_ctas as select a from generate_series(1, 10) a group by a having sum(a) > random() distributed replicated;
QUERY PLAN
___________
Result
-> Broadcast Motion 3:3 (slice1; segments: 3)
-> Result
Filter: (((sum(generate_series)))::double precision > random())
-> HashAggregate
Group Key: generate_series
-> Result
-> Function Scan on generate_series
Optimizer: Pivotal Optimizer (GPORCA)
GP_IGNORE:(9 rows)
-- update & delete
explain (costs off) update t_replicate_volatile set a = 1 where b > random();
ERROR: could not devise a plan (cdbpath.c:2578)
explain (costs off) update t_replicate_volatile set a = 1 from t_replicate_volatile x where x.a + random() = t_replicate_volatile.b;
ERROR: could not devise a plan (cdbpath.c:2578)
explain (costs off) update t_replicate_volatile set a = 1 from t_hashdist x where x.a + random() = t_replicate_volatile.b;
ERROR: could not devise a plan (cdbpath.c:2578)
explain (costs off) delete from t_replicate_volatile where a < random();
ERROR: could not devise a plan (cdbpath.c:2578)
explain (costs off) delete from t_replicate_volatile using t_replicate_volatile x where t_replicate_volatile.a + x.b < random();
ERROR: could not devise a plan (cdbpath.c:2578)
explain (costs off) update t_replicate_volatile set a = random();
ERROR: could not devise a plan. (cdbpath.c:2441)
-- limit
explain (costs off) insert into t_replicate_volatile select * from t_replicate_volatile limit random();
QUERY PLAN
---------------------------------------------------------------------------------------
Insert on t_replicate_volatile
-> Result
-> Broadcast Motion 1:3 (slice1)
-> Limit
-> Gather Motion 1:1 (slice2; segments: 1)
-> Seq Scan on t_replicate_volatile t_replicate_volatile_1
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
explain (costs off) select * from t_hashdist cross join (select * from t_replicate_volatile limit random()) x;
QUERY PLAN
------------------------------------------------------
Nested Loop
Join Filter: true
-> Limit
-> Gather Motion 1:1 (slice2; segments: 1)
-> Seq Scan on t_replicate_volatile
-> Materialize
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on t_hashdist
Optimizer: Pivotal Optimizer (GPORCA)
(9 rows)
-- ORCA
-- verify that JOIN derives the inner child distribution if the outer is tainted replicated (in this
-- case, the inner child is the hash distributed table, but the distribution is random because the
-- hash distribution key is not the JOIN key. we want to return the inner distribution because the
-- JOIN key determines the distribution of the JOIN output).
create table dist_tab (a integer, b integer) distributed by (a);
create table rep_tab (c integer) distributed replicated;
create index idx on dist_tab (b);
insert into dist_tab values (1, 2), (2, 2), (2, 1), (1, 1);
insert into rep_tab values (1), (2);
analyze dist_tab;
analyze rep_tab;
set optimizer_enable_hashjoin=off;
set enable_hashjoin=off;
set enable_nestloop=on;
explain select b from dist_tab where b in (select distinct c from rep_tab);
QUERY PLAN
---------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..443.00 rows=4 width=4)
-> Nested Loop (cost=0.00..443.00 rows=2 width=4)
Join Filter: true
-> GroupAggregate (cost=0.00..431.00 rows=2 width=4)
Group Key: rep_tab.c
-> Sort (cost=0.00..431.00 rows=2 width=4)
Sort Key: rep_tab.c
-> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4)
-> Index Scan using idx on dist_tab (cost=0.00..12.00 rows=1 width=4)
Index Cond: (b = rep_tab.c)
Optimizer: Pivotal Optimizer (GPORCA)
(11 rows)
select b from dist_tab where b in (select distinct c from rep_tab);
b
---
1
2
1
2
(4 rows)
reset optimizer_enable_hashjoin;
reset enable_hashjoin;
reset enable_nestloop;
create table rand_tab (d integer) distributed randomly;
insert into rand_tab values (1), (2);
analyze rand_tab;
-- Table Side Derives
-- rep_tab pdsOuter EdtTaintedReplicated
-- rep_tab pdsInner EdtHashed
--
-- join derives EdtHashed
explain select c from rep_tab where c in (select distinct c from rep_tab);
QUERY PLAN
-------------------------------------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..862.00 rows=2 width=4)
-> Hash Semi Join (cost=0.00..862.00 rows=6 width=4)
Hash Cond: (rep_tab.c = rep_tab_1.c)
-> Seq Scan on rep_tab (cost=0.00..431.00 rows=6 width=4)
-> Hash (cost=431.00..431.00 rows=6 width=4)
-> Seq Scan on rep_tab rep_tab_1 (cost=0.00..431.00 rows=6 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
select c from rep_tab where c in (select distinct c from rep_tab);
c
---
2
1
(2 rows)
-- Table Side Derives
-- dist_tab pdsOuter EdtHashed
-- rep_tab pdsInner EdtTaintedReplicated
--
-- join derives EdtHashed
explain select a from dist_tab where a in (select distinct c from rep_tab);
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=4 width=4)
-> Hash Semi Join (cost=0.00..862.00 rows=2 width=4)
Hash Cond: (dist_tab.a = rep_tab.c)
-> Seq Scan on dist_tab (cost=0.00..431.00 rows=2 width=4)
-> Hash (cost=431.00..431.00 rows=2 width=4)
-> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
select a from dist_tab where a in (select distinct c from rep_tab);
a
---
1
1
2
2
(4 rows)
-- Table Side Derives
-- rand_tab pdsOuter EdtRandom
-- rep_tab pdsInner EdtTaintedReplicated
--
-- join derives EdtRandom
explain select d from rand_tab where d in (select distinct c from rep_tab);
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=4)
-> Hash Semi Join (cost=0.00..862.00 rows=1 width=4)
Hash Cond: (rand_tab.d = rep_tab.c)
-> Seq Scan on rand_tab (cost=0.00..431.00 rows=1 width=4)
-> Hash (cost=431.00..431.00 rows=2 width=4)
-> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
select d from rand_tab where d in (select distinct c from rep_tab);
d
---
1
2
(2 rows)
-- Table Side Derives
-- rep_tab pdsOuter EdtTaintedReplicated
-- dist_tab pdsInner EdtHashed
--
-- join derives EdtHashed
explain select c from rep_tab where c in (select distinct a from dist_tab);
QUERY PLAN
----------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=4)
-> Hash Join (cost=0.00..862.00 rows=1 width=4)
Hash Cond: (dist_tab.a = rep_tab.c)
-> GroupAggregate (cost=0.00..431.00 rows=1 width=4)
Group Key: dist_tab.a
-> Sort (cost=0.00..431.00 rows=2 width=4)
Sort Key: dist_tab.a
-> Seq Scan on dist_tab (cost=0.00..431.00 rows=2 width=4)
-> Hash (cost=431.00..431.00 rows=2 width=4)
-> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(11 rows)
select c from rep_tab where c in (select distinct a from dist_tab);
c
---
1
2
(2 rows)
-- Table Side Derives
-- rep_tab pdsOuter EdtTaintedReplicated
-- rand_tab pdsInner EdtHashed
--
-- join derives EdtHashed
explain select c from rep_tab where c in (select distinct d from rand_tab);
QUERY PLAN
------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=4)
-> Hash Semi Join (cost=0.00..862.00 rows=1 width=4)
Hash Cond: (rep_tab.c = rand_tab.d)
-> Result (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
Hash Key: rand_tab.d
-> Seq Scan on rand_tab (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(10 rows)
select c from rep_tab where c in (select distinct d from rand_tab);
c
---
2
1
(2 rows)
-- Github Issue 13532
create table t1_13532(a int, b int) distributed replicated;
create table t2_13532(a int, b int) distributed replicated;
create index idx_t2_13532 on t2_13532(b);
explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() and x.b = y.b;
QUERY PLAN
----------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
-> Nested Loop
Join Filter: true
-> Seq Scan on t1_13532 x
-> Index Scan using idx_t2_13532 on t2_13532 y
Index Cond: (b = x.b)
Filter: ((a)::double precision < random())
Optimizer: Pivotal Optimizer (GPORCA)
(8 rows)
set enable_bitmapscan = off;
explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() and x.b = y.b;
QUERY PLAN
----------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
-> Nested Loop
Join Filter: true
-> Seq Scan on t1_13532 x
-> Index Scan using idx_t2_13532 on t2_13532 y
Index Cond: (b = x.b)
Filter: ((a)::double precision < random())
Optimizer: Pivotal Optimizer (GPORCA)
(8 rows)
-- test for optimizer_enable_replicated_table
explain (costs off) select * from rep_tab;
QUERY PLAN
------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
-> Seq Scan on rep_tab
Optimizer: Pivotal Optimizer (GPORCA)
(3 rows)
set optimizer_enable_replicated_table=off;
set optimizer_trace_fallback=on;
explain (costs off) select * from rep_tab;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Use optimizer_enable_replicated_table to enable replicated tables
QUERY PLAN
------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
-> Seq Scan on rep_tab
Optimizer: Postgres query optimizer
(3 rows)
reset optimizer_trace_fallback;
reset optimizer_enable_replicated_table;
-- Ensure plan with Gather Motion node is generated.
drop table if exists t;
NOTICE: table "t" does not exist, skipping
create table t (i int, j int) distributed replicated;
insert into t values (1, 2);
explain (costs off) select j, (select j) AS "Correlated Field" from t;
QUERY PLAN
------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
-> Seq Scan on t
SubPlan 1
-> Result
-> Result
Optimizer: Pivotal Optimizer (GPORCA)
(6 rows)
select j, (select j) AS "Correlated Field" from t;
j | Correlated Field
---+------------------
2 | 2
(1 row)
explain (costs off) select j, (select 5) AS "Uncorrelated Field" from t;
QUERY PLAN
------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
-> Nested Loop Left Join
Join Filter: true
-> Seq Scan on t
-> Materialize
-> Result
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
select j, (select 5) AS "Uncorrelated Field" from t;
j | Uncorrelated Field
---+--------------------
2 | 5
(1 row)
-- start_ignore
drop schema rpt cascade;
NOTICE: drop cascades to 7 other objects
DETAIL: drop cascades to table foo
drop cascades to table bar
drop cascades to table baz
drop cascades to table qux
-- end_ignore