| -- start_matchsubs |
| -- m/ERROR: could not devise a plan \(cdbpath.c:.*\)/ |
| -- s/ERROR: could not devise a plan \(cdbpath.c:.*\)/ERROR: could not devise a plan/ |
| -- m/ERROR: could not devise a plan\. \(cdbpath.c:.*\)/ |
| -- s/ERROR: could not devise a plan\. \(cdbpath.c:.*\)/ERROR: could not devise a plan \. / |
| -- end_matchsubs |
| -- |
| -- Basic tests for replicated table |
| -- |
| create schema rpt; |
| set search_path to rpt; |
| --------- |
| -- INSERT |
| --------- |
| create table foo (x int, y int) distributed replicated; |
| create table foo1(like foo) distributed replicated; |
| create table bar (like foo) distributed randomly; |
| create table bar1 (like foo) distributed by (x); |
| -- values --> replicated table |
| -- random partitioned table --> replicated table |
| -- hash partitioned table --> replicated table |
| -- singleQE --> replicated table |
| -- replicated --> replicated table |
| insert into bar values (1, 1), (3, 1); |
| insert into bar1 values (1, 1), (3, 1); |
| insert into foo1 values (1, 1), (3, 1); |
| insert into foo select * from bar; |
| insert into foo select * from bar1; |
| insert into foo select * from bar order by x limit 1; |
| insert into foo select * from foo; |
| select * from foo order by x; |
| x | y |
| ---+--- |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 3 | 1 |
| 3 | 1 |
| 3 | 1 |
| 3 | 1 |
| (10 rows) |
| |
| select bar.x, bar.y from bar, (select * from foo) as t1 order by 1,2; |
| x | y |
| ---+--- |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 3 | 1 |
| 3 | 1 |
| 3 | 1 |
| 3 | 1 |
| 3 | 1 |
| 3 | 1 |
| 3 | 1 |
| 3 | 1 |
| 3 | 1 |
| 3 | 1 |
| (20 rows) |
| |
| select bar.x, bar.y from bar, (select * from foo order by x limit 1) as t1 order by 1,2; |
| x | y |
| ---+--- |
| 1 | 1 |
| 3 | 1 |
| (2 rows) |
| |
| truncate foo; |
| truncate foo1; |
| truncate bar; |
| truncate bar1; |
| -- replicated table --> random partitioned table |
| -- replicated table --> hash partitioned table |
| insert into foo values (1, 1), (3, 1); |
| insert into bar select * from foo order by x limit 1; |
| insert into bar1 select * from foo order by x limit 1; |
| select * from foo order by x; |
| x | y |
| ---+--- |
| 1 | 1 |
| 3 | 1 |
| (2 rows) |
| |
| select * from bar order by x; |
| x | y |
| ---+--- |
| 1 | 1 |
| (1 row) |
| |
| select * from bar1 order by x; |
| x | y |
| ---+--- |
| 1 | 1 |
| (1 row) |
| |
| drop table if exists foo; |
| drop table if exists foo1; |
| drop table if exists bar; |
| drop table if exists bar1; |
| -- |
| -- CREATE UNIQUE INDEX |
| -- |
| -- create unique index on non-distributed key. |
| create table foo (x int, y int) distributed replicated; |
| create table bar (x int, y int) distributed randomly; |
| -- success |
| create unique index foo_idx on foo (y); |
| -- should fail |
| create unique index bar_idx on bar (y); |
| ERROR: UNIQUE and DISTRIBUTED RANDOMLY are incompatible |
| drop table if exists foo; |
| drop table if exists bar; |
| -- |
| -- CREATE TABLE with both PRIMARY KEY and UNIQUE constraints |
| -- |
| create table foo (id int primary key, name text unique) distributed replicated; |
| -- success |
| insert into foo values (1,'aaa'); |
| insert into foo values (2,'bbb'); |
| -- fail |
| insert into foo values (1,'ccc'); |
| ERROR: duplicate key value violates unique constraint "foo_pkey" (seg0 192.168.99.102:25432 pid=22681) |
| DETAIL: Key (id)=(1) already exists. |
| insert into foo values (3,'aaa'); |
| ERROR: duplicate key value violates unique constraint "foo_name_key" (seg2 192.168.99.102:25434 pid=22683) |
| DETAIL: Key (name)=(aaa) already exists. |
| drop table if exists foo; |
| -- |
| -- CREATE TABLE |
| -- |
| -- |
| -- Like |
| CREATE TABLE parent ( |
| name text, |
| age int4, |
| location point |
| ) distributed replicated; |
| CREATE TABLE child (like parent) distributed replicated; |
| CREATE TABLE child1 (like parent) DISTRIBUTED BY (name); |
| CREATE TABLE child2 (like parent); |
| NOTICE: table doesn't have 'DISTRIBUTED BY' clause, defaulting to distribution columns from LIKE table |
| -- should be replicated table |
| \d child |
| Table "rpt.child" |
| Column | Type | Collation | Nullable | Default |
| ----------+---------+-----------+----------+--------- |
| name | text | | | |
| age | integer | | | |
| location | point | | | |
| Distributed Replicated |
| |
| -- should distributed by name |
| \d child1 |
| Table "rpt.child1" |
| Column | Type | Collation | Nullable | Default |
| ----------+---------+-----------+----------+--------- |
| name | text | | | |
| age | integer | | | |
| location | point | | | |
| Distributed by: (name) |
| |
| -- should be replicated table |
| \d child2 |
| Table "rpt.child2" |
| Column | Type | Collation | Nullable | Default |
| ----------+---------+-----------+----------+--------- |
| name | text | | | |
| age | integer | | | |
| location | point | | | |
| Distributed Replicated |
| |
| drop table if exists parent; |
| drop table if exists child; |
| drop table if exists child1; |
| drop table if exists child2; |
| -- Inherits |
| CREATE TABLE parent_rep ( |
| name text, |
| age int4, |
| location point |
| ) distributed replicated; |
| CREATE TABLE parent_part ( |
| name text, |
| age int4, |
| location point |
| ) distributed by (name); |
| -- inherits from a replicated table, should fail |
| CREATE TABLE child ( |
| salary int4, |
| manager name |
| ) INHERITS (parent_rep); |
| ERROR: cannot inherit from replicated table "parent_rep" to create table "child" |
| DETAIL: An inheritance hierarchy cannot contain a mixture of distributed and non-distributed tables. |
| -- replicated table can not have parents, should fail |
| CREATE TABLE child ( |
| salary int4, |
| manager name |
| ) INHERITS (parent_part) DISTRIBUTED REPLICATED; |
| ERROR: INHERITS clause cannot be used with DISTRIBUTED REPLICATED clause |
| drop table if exists parent_rep; |
| drop table if exists parent_part; |
| drop table if exists child; |
| NOTICE: table "child" does not exist, skipping |
| -- |
| -- CTAS |
| -- |
| -- CTAS from generate_series |
| create table foo as select i as c1, i as c2 |
| from generate_series(1,3) i distributed replicated; |
| -- CTAS from replicated table |
| create table bar as select * from foo distributed replicated; |
| select * from bar; |
| c1 | c2 |
| ----+---- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| (3 rows) |
| |
| drop table if exists foo; |
| drop table if exists bar; |
| -- CTAS from partition table table |
| create table foo as select i as c1, i as c2 |
| from generate_series(1,3) i; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c1' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table bar as select * from foo distributed replicated; |
| select * from bar; |
| c1 | c2 |
| ----+---- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| (3 rows) |
| |
| drop table if exists foo; |
| drop table if exists bar; |
| -- CTAS from singleQE |
| create table foo as select i as c1, i as c2 |
| from generate_series(1,3) i; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c1' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select * from foo; |
| c1 | c2 |
| ----+---- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| (3 rows) |
| |
| create table bar as select * from foo order by c1 limit 1 distributed replicated; |
| select * from bar; |
| c1 | c2 |
| ----+---- |
| 1 | 1 |
| (1 row) |
| |
| drop table if exists foo; |
| drop table if exists bar; |
| -- Create view can work |
| create table foo(x int, y int) distributed replicated; |
| insert into foo values(1,1); |
| create view v_foo as select * from foo; |
| select * from v_foo; |
| x | y |
| ---+--- |
| 1 | 1 |
| (1 row) |
| |
| drop view v_foo; |
| drop table if exists foo; |
| --------- |
| -- Alter |
| -------- |
| -- Drop distributed key column |
| create table foo(x int, y int) distributed replicated; |
| create table bar(like foo) distributed by (x); |
| insert into foo values(1,1); |
| insert into bar values(1,1); |
| -- success |
| alter table foo drop column x; |
| -- fail |
| alter table bar drop column x; |
| NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy |
| drop table if exists foo; |
| drop table if exists foo1; |
| NOTICE: table "foo1" does not exist, skipping |
| drop table if exists bar; |
| drop table if exists bar1; |
| NOTICE: table "bar1" does not exist, skipping |
| -- Alter gp_distribution_policy |
| create table foo(x int, y int) distributed replicated; |
| create table foo1(x int, y int) distributed replicated; |
| create table bar(x int, y int) distributed by (x); |
| create table bar1(x int, y int) distributed randomly; |
| insert into foo select i,i from generate_series(1,10) i; |
| insert into foo1 select i,i from generate_series(1,10) i; |
| insert into bar select i,i from generate_series(1,10) i; |
| insert into bar1 select i,i from generate_series(1,10) i; |
| -- alter distribution policy of replicated table |
| alter table foo set distributed by (x); |
| alter table foo1 set distributed randomly; |
| -- alter a partitioned table to replicated table |
| alter table bar set distributed replicated; |
| alter table bar1 set distributed replicated; |
| -- verify the new policies |
| \d foo |
| Table "rpt.foo" |
| Column | Type | Collation | Nullable | Default |
| --------+---------+-----------+----------+--------- |
| x | integer | | | |
| y | integer | | | |
| Distributed by: (x) |
| |
| \d foo1 |
| Table "rpt.foo1" |
| Column | Type | Collation | Nullable | Default |
| --------+---------+-----------+----------+--------- |
| x | integer | | | |
| y | integer | | | |
| Distributed randomly |
| |
| \d bar |
| Table "rpt.bar" |
| Column | Type | Collation | Nullable | Default |
| --------+---------+-----------+----------+--------- |
| x | integer | | | |
| y | integer | | | |
| Distributed Replicated |
| |
| \d bar1 |
| Table "rpt.bar1" |
| Column | Type | Collation | Nullable | Default |
| --------+---------+-----------+----------+--------- |
| x | integer | | | |
| y | integer | | | |
| Distributed Replicated |
| |
| -- verify the reorganized data |
| select * from foo; |
| x | y |
| ----+---- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 5 |
| 6 | 6 |
| 7 | 7 |
| 8 | 8 |
| 9 | 9 |
| 10 | 10 |
| (10 rows) |
| |
| select * from foo1; |
| x | y |
| ----+---- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 5 |
| 6 | 6 |
| 7 | 7 |
| 8 | 8 |
| 9 | 9 |
| 10 | 10 |
| (10 rows) |
| |
| select * from bar; |
| x | y |
| ----+---- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 5 |
| 6 | 6 |
| 7 | 7 |
| 8 | 8 |
| 9 | 9 |
| 10 | 10 |
| (10 rows) |
| |
| select * from bar1; |
| x | y |
| ----+---- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 5 |
| 6 | 6 |
| 7 | 7 |
| 8 | 8 |
| 9 | 9 |
| 10 | 10 |
| (10 rows) |
| |
| -- alter back |
| alter table foo set distributed replicated; |
| alter table foo1 set distributed replicated; |
| alter table bar set distributed by (x); |
| alter table bar1 set distributed randomly; |
| -- verify the policies again |
| \d foo |
| Table "rpt.foo" |
| Column | Type | Collation | Nullable | Default |
| --------+---------+-----------+----------+--------- |
| x | integer | | | |
| y | integer | | | |
| Distributed Replicated |
| |
| \d foo1 |
| Table "rpt.foo1" |
| Column | Type | Collation | Nullable | Default |
| --------+---------+-----------+----------+--------- |
| x | integer | | | |
| y | integer | | | |
| Distributed Replicated |
| |
| \d bar |
| Table "rpt.bar" |
| Column | Type | Collation | Nullable | Default |
| --------+---------+-----------+----------+--------- |
| x | integer | | | |
| y | integer | | | |
| Distributed by: (x) |
| |
| \d bar1 |
| Table "rpt.bar1" |
| Column | Type | Collation | Nullable | Default |
| --------+---------+-----------+----------+--------- |
| x | integer | | | |
| y | integer | | | |
| Distributed randomly |
| |
| -- verify the reorganized data again |
| select * from foo; |
| x | y |
| ----+---- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 5 |
| 6 | 6 |
| 7 | 7 |
| 8 | 8 |
| 9 | 9 |
| 10 | 10 |
| (10 rows) |
| |
| select * from foo1; |
| x | y |
| ----+---- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 5 |
| 6 | 6 |
| 7 | 7 |
| 8 | 8 |
| 9 | 9 |
| 10 | 10 |
| (10 rows) |
| |
| select * from bar; |
| x | y |
| ----+---- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 5 |
| 6 | 6 |
| 7 | 7 |
| 8 | 8 |
| 9 | 9 |
| 10 | 10 |
| (10 rows) |
| |
| select * from bar1; |
| x | y |
| ----+---- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 5 |
| 6 | 6 |
| 7 | 7 |
| 8 | 8 |
| 9 | 9 |
| 10 | 10 |
| (10 rows) |
| |
| drop table if exists foo; |
| drop table if exists foo1; |
| drop table if exists bar; |
| drop table if exists bar1; |
| --------- |
| -- UPDATE / DELETE |
| --------- |
| create table foo(x int, y int) distributed replicated; |
| create table bar(x int, y int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into foo values (1, 1), (2, 1); |
| insert into bar values (1, 2), (2, 2); |
| update foo set y = 2 where y = 1; |
| select * from foo; |
| x | y |
| ---+--- |
| 1 | 2 |
| 2 | 2 |
| (2 rows) |
| |
| update foo set y = 1 from bar where bar.y = foo.y; |
| select * from foo; |
| x | y |
| ---+--- |
| 1 | 1 |
| 2 | 1 |
| (2 rows) |
| |
| delete from foo where y = 1; |
| select * from foo; |
| x | y |
| ---+--- |
| (0 rows) |
| |
| -- Test replicate table within init plan |
| insert into foo values (1, 1), (2, 1); |
| select * from bar where exists (select * from foo); |
| x | y |
| ---+--- |
| 2 | 2 |
| 1 | 2 |
| (2 rows) |
| |
| ------ |
| -- Test Current Of is disabled for replicated table |
| ------ |
| begin; |
| declare c1 cursor for select * from foo; |
| fetch 1 from c1; |
| x | y |
| ---+--- |
| 1 | 1 |
| (1 row) |
| |
| delete from foo where current of c1; |
| ERROR: "foo" is not simply updatable |
| abort; |
| begin; |
| declare c1 cursor for select * from foo; |
| fetch 1 from c1; |
| x | y |
| ---+--- |
| 1 | 1 |
| (1 row) |
| |
| update foo set y = 1 where current of c1; |
| ERROR: "foo" is not simply updatable |
| abort; |
| ----- |
| -- Test updatable view works for replicated table |
| ---- |
| truncate foo; |
| truncate bar; |
| insert into foo values (1, 1); |
| insert into foo values (2, 2); |
| insert into bar values (1, 1); |
| create view v_foo as select * from foo where y = 1; |
| begin; |
| update v_foo set y = 2; |
| select * from gp_dist_random('foo'); |
| x | y |
| ---+--- |
| 2 | 2 |
| 1 | 2 |
| 2 | 2 |
| 1 | 2 |
| 2 | 2 |
| 1 | 2 |
| (6 rows) |
| |
| abort; |
| update v_foo set y = 3 from bar where bar.y = v_foo.y; |
| select * from gp_dist_random('foo'); |
| x | y |
| ---+--- |
| 2 | 2 |
| 1 | 3 |
| 2 | 2 |
| 1 | 3 |
| 2 | 2 |
| 1 | 3 |
| (6 rows) |
| |
| -- Test gp_segment_id for replicated table |
| -- gp_segment_id is ambiguous for replicated table, it's been disabled now. |
| create table baz (c1 int, c2 int) distributed replicated; |
| create table qux (c1 int, c2 int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select gp_segment_id from baz; |
| ERROR: column "gp_segment_id" does not exist |
| LINE 1: select gp_segment_id from baz; |
| ^ |
| select xmin from baz; |
| ERROR: column "xmin" does not exist |
| LINE 1: select xmin from baz; |
| ^ |
| select xmax from baz; |
| ERROR: column "xmax" does not exist |
| LINE 1: select xmax from baz; |
| ^ |
| select ctid from baz; |
| ERROR: column "ctid" does not exist |
| LINE 1: select ctid from baz; |
| ^ |
| select * from baz where c2 = gp_segment_id; |
| ERROR: column "gp_segment_id" does not exist |
| LINE 1: select * from baz where c2 = gp_segment_id; |
| ^ |
| select * from baz, qux where baz.c1 = gp_segment_id; |
| c1 | c2 | c1 | c2 |
| ----+----+----+---- |
| (0 rows) |
| |
| update baz set c2 = gp_segment_id; |
| ERROR: column "gp_segment_id" does not exist |
| LINE 1: update baz set c2 = gp_segment_id; |
| ^ |
| update baz set c2 = 1 where gp_segment_id = 1; |
| ERROR: column "gp_segment_id" does not exist |
| LINE 1: update baz set c2 = 1 where gp_segment_id = 1; |
| ^ |
| update baz set c2 = 1 from qux where gp_segment_id = baz.c1; |
| insert into baz select i, i from generate_series(1, 1000) i; |
| vacuum baz; |
| vacuum full baz; |
| analyze baz; |
| -- Test dependencies check when alter table to replicated table |
| create view v_qux as select ctid from qux; |
| alter table qux set distributed replicated; |
| ERROR: cannot set distributed replicated because other object depend on its system columns |
| DETAIL: view v_qux depends on column ctid of table qux |
| HINT: system columns of replicated table will be exposed to users after altering, resolve dependencies first |
| drop view v_qux; |
| alter table qux set distributed replicated; |
| -- Test cursor for update also works for replicated table |
| create table cursor_update (c1 int, c2 int) distributed replicated; |
| insert into cursor_update select i, i from generate_series(1, 10) i; |
| begin; |
| declare c1 cursor for select * from cursor_update order by c2 for update; |
| fetch next from c1; |
| c1 | c2 |
| ----+---- |
| 1 | 1 |
| (1 row) |
| |
| end; |
| -- Test MinMax path on replicated table |
| create table minmaxtest (x int, y int) distributed replicated; |
| create index on minmaxtest (x); |
| insert into minmaxtest select generate_series(1, 10); |
| set enable_seqscan=off; |
| select min(x) from minmaxtest; |
| min |
| ----- |
| 1 |
| (1 row) |
| |
| -- Test replicated on partition table |
| -- should fail |
| CREATE TABLE foopart (a int4, b int4) DISTRIBUTED REPLICATED PARTITION BY RANGE (a) (START (1) END (10)); |
| ERROR: PARTITION BY clause cannot be used with DISTRIBUTED REPLICATED clause |
| CREATE TABLE foopart (a int4, b int4) PARTITION BY RANGE (a) (START (1) END (10)) ; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| NOTICE: CREATE TABLE will create partition "foopart_1_prt_1" for table "foopart" |
| -- should fail |
| ALTER TABLE foopart SET DISTRIBUTED REPLICATED; |
| ERROR: can't set the distribution policy of a partition table to REPLICATED |
| ALTER TABLE foopart_1_prt_1 SET DISTRIBUTED REPLICATED; |
| ERROR: can't set the distribution policy of "foopart_1_prt_1" |
| HINT: Distribution policy can be set for an entire partitioned table, not for one of its leaf parts or an interior branch. |
| DROP TABLE foopart; |
| -- Test that replicated table can't inherit a parent table, and it also |
| -- can't be inherited by a child table. |
| -- 1. Replicated table can't inherit a parent table. |
| CREATE TABLE parent (t text) DISTRIBUTED BY (t); |
| -- This is not allowed: should fail |
| CREATE TABLE child () INHERITS (parent) DISTRIBUTED REPLICATED; |
| ERROR: INHERITS clause cannot be used with DISTRIBUTED REPLICATED clause |
| CREATE TABLE child (t text) DISTRIBUTED REPLICATED; |
| -- should fail |
| ALTER TABLE child INHERIT parent; |
| ERROR: Replicated table cannot inherit a parent |
| DROP TABLE child, parent; |
| -- 2. Replicated table can't be inherited |
| CREATE TABLE parent (t text) DISTRIBUTED REPLICATED; |
| -- should fail |
| CREATE TABLE child () INHERITS (parent) DISTRIBUTED REPLICATED; |
| ERROR: INHERITS clause cannot be used with DISTRIBUTED REPLICATED clause |
| CREATE TABLE child () INHERITS (parent) DISTRIBUTED BY (t); |
| ERROR: cannot inherit from replicated table "parent" to create table "child" |
| DETAIL: An inheritance hierarchy cannot contain a mixture of distributed and non-distributed tables. |
| CREATE TABLE child (t text) DISTRIBUTED REPLICATED; |
| ALTER TABLE child INHERIT parent; |
| ERROR: Replicated table cannot inherit a parent |
| CREATE TABLE child2(t text) DISTRIBUTED BY (t); |
| ALTER TABLE child2 INHERIT parent; |
| ERROR: Replicated table cannot be inherited |
| DROP TABLE child, child2, parent; |
| -- volatile replicated |
| -- General and segmentGeneral locus imply that if the corresponding |
| -- slice is executed in many different segments should provide the |
| -- same result data set. Thus, in some cases, General and segmentGeneral |
| -- can be treated like broadcast. But if the segmentGeneral and general |
| -- locus path contain volatile functions, they lose the property and |
| -- can only be treated as singleQE. The following cases are to check that |
| -- we correctly handle all these cases. |
| -- FIXME: ORCA does not consider this, we need to fix the cases when ORCA |
| -- consider this. |
| create table t_hashdist(a int, b int, c int) distributed by (a); |
| create table t_replicate_volatile(a int, b int, c int) distributed replicated; |
| ---- pushed down filter |
| explain (costs off) select * from t_replicate_volatile, t_hashdist where t_replicate_volatile.a > random(); |
| QUERY PLAN |
| ---------------------------------------------------------------------- |
| Nested Loop |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on t_hashdist |
| -> Materialize |
| -> Result |
| -> Gather Motion 1:1 (slice2; segments: 1) |
| -> Seq Scan on t_replicate_volatile |
| Filter: ((a)::double precision > random()) |
| Optimizer: Postgres query optimizer |
| (9 rows) |
| |
| -- join qual |
| explain (costs off) select * from t_hashdist, t_replicate_volatile x, t_replicate_volatile y where x.a + y.a > random(); |
| QUERY PLAN |
| ------------------------------------------------------------------------------------- |
| Nested Loop |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on t_hashdist |
| -> Materialize |
| -> Result |
| -> Gather Motion 1:1 (slice2; segments: 1) |
| -> Nested Loop |
| Join Filter: (((x.a + y.a))::double precision > random()) |
| -> Seq Scan on t_replicate_volatile x |
| -> Materialize |
| -> Seq Scan on t_replicate_volatile y |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| -- sublink & subquery |
| explain (costs off) select * from t_hashdist where a > All (select random() from t_replicate_volatile); |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Nested Loop Left Anti Semi (Not-In) Join |
| Join Filter: ((t_hashdist.a)::double precision <= "NotIn_SUBQUERY".random) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on t_hashdist |
| -> Materialize |
| -> Result |
| -> Gather Motion 1:1 (slice2; segments: 1) |
| -> Subquery Scan on "NotIn_SUBQUERY" |
| -> Seq Scan on t_replicate_volatile |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| explain (costs off) select * from t_hashdist where a in (select random()::int from t_replicate_volatile); |
| QUERY PLAN |
| ---------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (t_hashdist.a = ((random())::integer)) |
| -> Seq Scan on t_hashdist |
| -> Hash |
| -> Redistribute Motion 1:3 (slice2; segments: 1) |
| Hash Key: ((random())::integer) |
| -> HashAggregate |
| Group Key: ((random())::integer) |
| -> Result |
| -> Seq Scan on t_replicate_volatile |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| -- subplan |
| explain (costs off, verbose) select * from t_hashdist left join t_replicate_volatile on t_hashdist.a > any (select random() from t_replicate_volatile); |
| QUERY PLAN |
| ---------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: t_hashdist.a, t_hashdist.b, t_hashdist.c, t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c |
| -> Nested Loop Left Join |
| Output: t_hashdist.a, t_hashdist.b, t_hashdist.c, t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c |
| Join Filter: ((SubPlan 1)) |
| -> Seq Scan on rpt.t_hashdist |
| Output: t_hashdist.a, t_hashdist.b, t_hashdist.c, (SubPlan 1) |
| SubPlan 1 |
| -> Materialize |
| Output: (random()) |
| -> Broadcast Motion 1:3 (slice2; segments: 1) |
| Output: (random()) |
| -> Seq Scan on rpt.t_replicate_volatile t_replicate_volatile_1 |
| Output: random() |
| -> Materialize |
| Output: t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c |
| -> Seq Scan on rpt.t_replicate_volatile |
| Output: t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c |
| Optimizer: Postgres query optimizer |
| Settings: enable_seqscan=off, optimizer=off |
| (20 rows) |
| |
| -- targetlist |
| explain (costs off) select * from t_hashdist cross join (select random () from t_replicate_volatile)x; |
| QUERY PLAN |
| ------------------------------------------------------------ |
| Nested Loop |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on t_hashdist |
| -> Materialize |
| -> Result |
| -> Gather Motion 1:1 (slice2; segments: 1) |
| -> Seq Scan on t_replicate_volatile |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| explain (costs off) select * from t_hashdist cross join (select a, sum(random()) from t_replicate_volatile group by a) x; |
| QUERY PLAN |
| ---------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Nested Loop |
| -> Broadcast Motion 1:3 (slice2; segments: 1) |
| -> Result |
| -> GroupAggregate |
| Group Key: t_replicate_volatile.a |
| -> Sort |
| Sort Key: t_replicate_volatile.a |
| -> Seq Scan on t_replicate_volatile |
| -> Materialize |
| -> Seq Scan on t_hashdist |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| explain (costs off) select * from t_hashdist cross join (select random() as k, sum(a) from t_replicate_volatile group by k) x; |
| QUERY PLAN |
| ---------------------------------------------------------------------- |
| Nested Loop |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on t_hashdist |
| -> Materialize |
| -> Result |
| -> Gather Motion 1:1 (slice2; segments: 1) |
| -> GroupAggregate |
| Group Key: (random()) |
| -> Sort |
| Sort Key: (random()) |
| -> Seq Scan on t_replicate_volatile |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| explain (costs off) select * from t_hashdist cross join (select a, sum(b) as s from t_replicate_volatile group by a having sum(b) > random() order by a) x ; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Nested Loop |
| -> Broadcast Motion 1:3 (slice2; segments: 1) |
| -> Result |
| -> GroupAggregate |
| Group Key: t_replicate_volatile.a |
| Filter: ((sum(t_replicate_volatile.b))::double precision > random()) |
| -> Sort |
| Sort Key: t_replicate_volatile.a |
| -> Seq Scan on t_replicate_volatile |
| -> Materialize |
| -> Seq Scan on t_hashdist |
| Optimizer: Postgres query optimizer |
| (13 rows) |
| |
| -- insert |
| explain (costs off) insert into t_replicate_volatile select random() from t_replicate_volatile; |
| QUERY PLAN |
| --------------------------------------------------------------------------- |
| Insert on t_replicate_volatile |
| -> Broadcast Motion 1:3 (slice1; segments: 1) |
| -> Result |
| -> Seq Scan on t_replicate_volatile t_replicate_volatile_1 |
| Optimizer: Postgres query optimizer |
| (5 rows) |
| |
| explain (costs off) insert into t_replicate_volatile select random(), a, a from generate_series(1, 10) a; |
| QUERY PLAN |
| ------------------------------------------------------ |
| Insert on t_replicate_volatile |
| -> Broadcast Motion 1:3 (slice1; segments: 1) |
| -> Subquery Scan on "*SELECT*" |
| -> Function Scan on generate_series a |
| Optimizer: Postgres query optimizer |
| (5 rows) |
| |
| create sequence seq_for_insert_replicated_table; |
| explain (costs off) insert into t_replicate_volatile select nextval('seq_for_insert_replicated_table'); |
| QUERY PLAN |
| --------------------------------------------------- |
| Insert on t_replicate_volatile |
| -> Broadcast Motion 1:3 (slice1; segments: 1) |
| -> Subquery Scan on "*SELECT*" |
| -> Result |
| Optimizer: Postgres query optimizer |
| (5 rows) |
| |
| explain (costs off) select a from t_replicate_volatile union all select * from nextval('seq_for_insert_replicated_table'); |
| QUERY PLAN |
| ---------------------------------------------------- |
| Append |
| -> Gather Motion 1:1 (slice1; segments: 1) |
| -> Subquery Scan on "*SELECT* 1" |
| -> Seq Scan on t_replicate_volatile |
| -> Function Scan on nextval |
| Optimizer: Postgres query optimizer |
| (6 rows) |
| |
| -- CTAS |
| explain (costs off) create table rpt_ctas as select random() from generate_series(1, 10) distributed replicated; |
| QUERY PLAN |
| --------------------------------------------- |
| Broadcast Motion 1:3 (slice1; segments: 1) |
| -> Function Scan on generate_series |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| explain (costs off) create table rpt_ctas as select a from generate_series(1, 10) a group by a having sum(a) > random() distributed replicated; |
| QUERY PLAN |
| --------------------------------------------------------- |
| Broadcast Motion 1:3 (slice1; segments: 1) |
| -> HashAggregate |
| Group Key: a |
| Filter: ((sum(a))::double precision > random()) |
| -> Function Scan on generate_series a |
| Optimizer: Postgres query optimizer |
| (6 rows) |
| |
| -- update & delete |
| explain (costs off) update t_replicate_volatile set a = 1 where b > random(); |
| ERROR: could not devise a plan (cdbpath.c:2578) |
| explain (costs off) update t_replicate_volatile set a = 1 from t_replicate_volatile x where x.a + random() = t_replicate_volatile.b; |
| ERROR: could not devise a plan (cdbpath.c:2578) |
| explain (costs off) update t_replicate_volatile set a = 1 from t_hashdist x where x.a + random() = t_replicate_volatile.b; |
| ERROR: could not devise a plan (cdbpath.c:2578) |
| explain (costs off) delete from t_replicate_volatile where a < random(); |
| ERROR: could not devise a plan (cdbpath.c:2578) |
| explain (costs off) delete from t_replicate_volatile using t_replicate_volatile x where t_replicate_volatile.a + x.b < random(); |
| ERROR: could not devise a plan (cdbpath.c:2578) |
| explain (costs off) update t_replicate_volatile set a = random(); |
| ERROR: could not devise a plan. (cdbpath.c:2440) |
| -- limit |
| explain (costs off) insert into t_replicate_volatile select * from t_replicate_volatile limit random(); |
| QUERY PLAN |
| --------------------------------------------------------------------------------- |
| Insert on t_replicate_volatile |
| -> Broadcast Motion 1:3 (slice1; segments: 1) |
| -> Result |
| -> Limit |
| -> Seq Scan on t_replicate_volatile t_replicate_volatile_1 |
| Optimizer: Postgres query optimizer |
| (6 rows) |
| |
| explain (costs off) select * from t_hashdist cross join (select * from t_replicate_volatile limit random()) x; |
| QUERY PLAN |
| ---------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Nested Loop |
| -> Broadcast Motion 1:3 (slice2; segments: 1) |
| -> Result |
| -> Limit |
| -> Seq Scan on t_replicate_volatile |
| -> Materialize |
| -> Seq Scan on t_hashdist |
| Optimizer: Postgres query optimizer |
| (9 rows) |
| |
| -- ORCA |
| -- verify that JOIN derives the inner child distribution if the outer is tainted replicated (in this |
| -- case, the inner child is the hash distributed table, but the distribution is random because the |
| -- hash distribution key is not the JOIN key. we want to return the inner distribution because the |
| -- JOIN key determines the distribution of the JOIN output). |
| create table dist_tab (a integer, b integer) distributed by (a); |
| create table rep_tab (c integer) distributed replicated; |
| create index idx on dist_tab (b); |
| insert into dist_tab values (1, 2), (2, 2), (2, 1), (1, 1); |
| insert into rep_tab values (1), (2); |
| analyze dist_tab; |
| analyze rep_tab; |
| set optimizer_enable_hashjoin=off; |
| set enable_hashjoin=off; |
| set enable_nestloop=on; |
| explain select b from dist_tab where b in (select distinct c from rep_tab); |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=10000000001.16..10000000021.44 rows=4 width=4) |
| -> Nested Loop (cost=10000000001.16..10000000021.39 rows=1 width=4) |
| -> HashAggregate (cost=10000000001.02..10000000001.03 rows=2 width=4) |
| Group Key: rep_tab.c |
| -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) |
| -> Index Only Scan using idx on dist_tab (cost=0.13..10.16 rows=1 width=4) |
| Index Cond: (b = rep_tab.c) |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| select b from dist_tab where b in (select distinct c from rep_tab); |
| b |
| --- |
| 1 |
| 2 |
| 1 |
| 2 |
| (4 rows) |
| |
| reset optimizer_enable_hashjoin; |
| reset enable_hashjoin; |
| reset enable_nestloop; |
| create table rand_tab (d integer) distributed randomly; |
| insert into rand_tab values (1), (2); |
| analyze rand_tab; |
| -- Table Side Derives |
| -- rep_tab pdsOuter EdtTaintedReplicated |
| -- rep_tab pdsInner EdtHashed |
| -- |
| -- join derives EdtHashed |
| explain select c from rep_tab where c in (select distinct c from rep_tab); |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------- |
| Gather Motion 1:1 (slice1; segments: 1) (cost=20000000001.05..20000000002.11 rows=4 width=4) |
| -> Hash Semi Join (cost=20000000001.05..20000000002.11 rows=4 width=4) |
| Hash Cond: (rep_tab.c = rep_tab_1.c) |
| -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) |
| -> Hash (cost=10000000001.02..10000000001.02 rows=1 width=4) |
| -> Seq Scan on rep_tab rep_tab_1 (cost=10000000000.00..10000000001.02 rows=2 width=4) |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| |
| select c from rep_tab where c in (select distinct c from rep_tab); |
| c |
| --- |
| 1 |
| 2 |
| (2 rows) |
| |
| -- Table Side Derives |
| -- dist_tab pdsOuter EdtHashed |
| -- rep_tab pdsInner EdtTaintedReplicated |
| -- |
| -- join derives EdtHashed |
| explain select a from dist_tab where a in (select distinct c from rep_tab); |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) (cost=20000000001.05..20000000003.14 rows=4 width=4) |
| -> Hash Semi Join (cost=20000000001.05..20000000003.14 rows=2 width=4) |
| Hash Cond: (dist_tab.a = rep_tab.c) |
| -> Seq Scan on dist_tab (cost=10000000000.00..10000000002.04 rows=2 width=4) |
| -> Hash (cost=10000000001.02..10000000001.02 rows=1 width=4) |
| -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| |
| select a from dist_tab where a in (select distinct c from rep_tab); |
| a |
| --- |
| 2 |
| 2 |
| 1 |
| 1 |
| (4 rows) |
| |
| -- Table Side Derives |
| -- rand_tab pdsOuter EdtRandom |
| -- rep_tab pdsInner EdtTaintedReplicated |
| -- |
| -- join derives EdtRandom |
| explain select d from rand_tab where d in (select distinct c from rep_tab); |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) (cost=20000000001.05..20000000002.11 rows=4 width=4) |
| -> Hash Semi Join (cost=20000000001.05..20000000002.11 rows=2 width=4) |
| Hash Cond: (rand_tab.d = rep_tab.c) |
| -> Seq Scan on rand_tab (cost=10000000000.00..10000000001.02 rows=1 width=4) |
| -> Hash (cost=10000000001.02..10000000001.02 rows=1 width=4) |
| -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| |
| select d from rand_tab where d in (select distinct c from rep_tab); |
| d |
| --- |
| 1 |
| 2 |
| (2 rows) |
| |
| -- Table Side Derives |
| -- rep_tab pdsOuter EdtTaintedReplicated |
| -- dist_tab pdsInner EdtHashed |
| -- |
| -- join derives EdtHashed |
| explain select c from rep_tab where c in (select distinct a from dist_tab); |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) (cost=20000000001.03..20000000002.14 rows=3 width=4) |
| -> Hash Semi Join (cost=20000000001.03..20000000002.09 rows=1 width=4) |
| Hash Cond: (rep_tab.c = dist_tab.a) |
| -> Redistribute Motion 1:3 (slice2; segments: 1) (cost=10000000000.00..10000000001.05 rows=1 width=4) |
| Hash Key: rep_tab.c |
| -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) |
| -> Hash (cost=10000000001.01..10000000001.01 rows=1 width=4) |
| -> Seq Scan on dist_tab (cost=10000000000.00..10000000001.01 rows=1 width=4) |
| Optimizer: Postgres query optimizer |
| (9 rows) |
| |
| select c from rep_tab where c in (select distinct a from dist_tab); |
| c |
| --- |
| 1 |
| 2 |
| (2 rows) |
| |
| -- Table Side Derives |
| -- rep_tab pdsOuter EdtTaintedReplicated |
| -- rand_tab pdsInner EdtHashed |
| -- |
| -- join derives EdtHashed |
| explain select c from rep_tab where c in (select distinct d from rand_tab); |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) (cost=20000000001.04..20000000002.15 rows=3 width=4) |
| -> Hash Semi Join (cost=20000000001.04..20000000002.11 rows=1 width=4) |
| Hash Cond: (rep_tab.c = rand_tab.d) |
| -> Redistribute Motion 1:3 (slice2; segments: 1) (cost=10000000000.00..10000000001.05 rows=1 width=4) |
| Hash Key: rep_tab.c |
| -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) |
| -> Hash (cost=10000000001.03..10000000001.03 rows=1 width=4) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=10000000000.00..10000000001.03 rows=1 width=4) |
| Hash Key: rand_tab.d |
| -> Seq Scan on rand_tab (cost=10000000000.00..10000000001.01 rows=1 width=4) |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| select c from rep_tab where c in (select distinct d from rand_tab); |
| c |
| --- |
| 1 |
| 2 |
| (2 rows) |
| |
| -- Github Issue 13532 |
| create table t1_13532(a int, b int) distributed replicated; |
| create table t2_13532(a int, b int) distributed replicated; |
| create index idx_t2_13532 on t2_13532(b); |
| explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() and x.b = y.b; |
| QUERY PLAN |
| ---------------------------------------------------------------------- |
| Hash Join |
| Hash Cond: (x.b = y.b) |
| -> Gather Motion 1:1 (slice1; segments: 1) |
| -> Seq Scan on t1_13532 x |
| -> Hash |
| -> Result |
| -> Gather Motion 1:1 (slice2; segments: 1) |
| -> Bitmap Heap Scan on t2_13532 y |
| Filter: ((a)::double precision < random()) |
| -> Bitmap Index Scan on idx_t2_13532 |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| set enable_bitmapscan = off; |
| explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() and x.b = y.b; |
| QUERY PLAN |
| ---------------------------------------------------------------------- |
| Hash Join |
| Hash Cond: (x.b = y.b) |
| -> Gather Motion 1:1 (slice1; segments: 1) |
| -> Seq Scan on t1_13532 x |
| -> Hash |
| -> Result |
| -> Gather Motion 1:1 (slice2; segments: 1) |
| -> Index Scan using idx_t2_13532 on t2_13532 y |
| Filter: ((a)::double precision < random()) |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| -- test for optimizer_enable_replicated_table |
| explain (costs off) select * from rep_tab; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 1:1 (slice1; segments: 1) |
| -> Seq Scan on rep_tab |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| set optimizer_enable_replicated_table=off; |
| set optimizer_trace_fallback=on; |
| explain (costs off) select * from rep_tab; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 1:1 (slice1; segments: 1) |
| -> Seq Scan on rep_tab |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| reset optimizer_trace_fallback; |
| reset optimizer_enable_replicated_table; |
| -- Ensure plan with Gather Motion node is generated. |
| drop table if exists t; |
| NOTICE: table "t" does not exist, skipping |
| create table t (i int, j int) distributed replicated; |
| insert into t values (1, 2); |
| explain (costs off) select j, (select j) AS "Correlated Field" from t; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 1:1 (slice1; segments: 1) |
| -> Seq Scan on t |
| SubPlan 1 |
| -> Result |
| Optimizer: Postgres query optimizer |
| (5 rows) |
| |
| select j, (select j) AS "Correlated Field" from t; |
| j | Correlated Field |
| ---+------------------ |
| 2 | 2 |
| (1 row) |
| |
| explain (costs off) select j, (select 5) AS "Uncorrelated Field" from t; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 1:1 (slice1; segments: 1) |
| InitPlan 1 (returns $0) (slice2) |
| -> Result |
| -> Seq Scan on t |
| Optimizer: Postgres query optimizer |
| (5 rows) |
| |
| select j, (select 5) AS "Uncorrelated Field" from t; |
| j | Uncorrelated Field |
| ---+-------------------- |
| 2 | 5 |
| (1 row) |
| |
| -- start_ignore |
| drop schema rpt cascade; |
| NOTICE: drop cascades to 7 other objects |
| DETAIL: drop cascades to table foo |
| drop cascades to table bar |
| drop cascades to table baz |
| drop cascades to table qux |
| -- end_ignore |