| -- MPP-21536: Duplicated rows inserted when ORCA is turned on |
| create schema bfv_dml; |
| set search_path=bfv_dml; |
| -- create test table |
| create table m(); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry. |
| alter table m add column a int; |
| alter table m add column b int; |
| -- generate data for m |
| insert into m select i, i%5 from generate_series(1,10)i; |
| -- INSERT and UPDATE |
| create table yyy(a int, b int) distributed randomly; |
| insert into yyy select a,b from m; |
| select * from yyy order by 1, 2; |
| a | b |
| ----+--- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 0 |
| 6 | 1 |
| 7 | 2 |
| 8 | 3 |
| 9 | 4 |
| 10 | 0 |
| (10 rows) |
| |
| update yyy set a=m.b from m where m.a=yyy.b; |
| select * from yyy order by 1, 2; |
| a | b |
| ----+--- |
| 1 | 1 |
| 1 | 1 |
| 2 | 2 |
| 2 | 2 |
| 3 | 3 |
| 3 | 3 |
| 4 | 4 |
| 4 | 4 |
| 5 | 0 |
| 10 | 0 |
| (10 rows) |
| |
| drop table yyy; |
| -- UPDATE with different values |
| create table yyy(a int, b int) distributed randomly; |
| insert into yyy select a,b from m; |
| update yyy set b=m.b from m where m.a=yyy.a; |
| select * from yyy order by 1, 2; |
| a | b |
| ----+--- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 0 |
| 6 | 1 |
| 7 | 2 |
| 8 | 3 |
| 9 | 4 |
| 10 | 0 |
| (10 rows) |
| |
| drop table yyy; |
| -- DELETE |
| create table yyy(a int, b int) distributed randomly; |
| insert into yyy select a,b from m; |
| delete from yyy where a in (select a from m); |
| select * from yyy order by 1, 2; |
| a | b |
| ---+--- |
| (0 rows) |
| |
| drop table yyy; |
| create table yyy(a int, b int) distributed randomly; |
| insert into yyy select a,b from m; |
| delete from yyy where b in (select a from m); |
| select * from yyy order by 1, 2; |
| a | b |
| ----+--- |
| 5 | 0 |
| 10 | 0 |
| (2 rows) |
| |
| drop table yyy; |
| -- Now repeat all the above tests, but using a hacked master-only 'm' table |
| drop table m; |
| set optimizer_enable_master_only_queries=on; |
| -- create master-only table |
| create table m(); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry. |
| set allow_system_table_mods=true; |
| delete from gp_distribution_policy where localoid='m'::regclass; |
| reset allow_system_table_mods; |
| alter table m add column a int; |
| alter table m add column b int; |
| -- generate data for m |
| insert into m select i, i%5 from generate_series(1,10)i; |
| create table zzz(a int primary key, b int) distributed by (a); |
| insert into zzz select a,b from m; |
| select * from zzz order by 1, 2; |
| a | b |
| ----+--- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 0 |
| 6 | 1 |
| 7 | 2 |
| 8 | 3 |
| 9 | 4 |
| 10 | 0 |
| (10 rows) |
| |
| delete from zzz where a in (select a from m); |
| select * from zzz order by 1, 2; |
| a | b |
| ---+--- |
| (0 rows) |
| |
| drop table zzz; |
| create table zzz(a int primary key, b int) distributed by (a); |
| insert into zzz select a,b from m; |
| delete from zzz where b in (select a from m); |
| select * from zzz order by 1, 2; |
| a | b |
| ----+--- |
| 5 | 0 |
| 10 | 0 |
| (2 rows) |
| |
| drop table zzz; |
| create table zzz(a int primary key, b int) distributed by (a); |
| insert into zzz select a,b from m; |
| -- This update fails with duplicate key error, but it varies which segment |
| -- reports it first, i.e. it varies which row it complaints first. Silence |
| -- that difference in the error DETAIL line |
| \set VERBOSITY terse |
| update zzz set a=m.b from m where m.a=zzz.b; |
| ERROR: duplicate key value violates unique constraint "zzz_pkey" (seg1 10.152.10.32:25433 pid=2877) |
| select * from zzz order by 1, 2; |
| a | b |
| ----+--- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 0 |
| 6 | 1 |
| 7 | 2 |
| 8 | 3 |
| 9 | 4 |
| 10 | 0 |
| (10 rows) |
| |
| drop table zzz; |
| create table zzz(a int primary key, b int) distributed by (a); |
| insert into zzz select a,b from m; |
| update zzz set b=m.b from m where m.a=zzz.a; |
| select * from zzz order by 1, 2; |
| a | b |
| ----+--- |
| 1 | 1 |
| 2 | 2 |
| 3 | 3 |
| 4 | 4 |
| 5 | 0 |
| 6 | 1 |
| 7 | 2 |
| 8 | 3 |
| 9 | 4 |
| 10 | 0 |
| (10 rows) |
| |
| drop table zzz; |
| drop table m; |
| -- MPP-21622 Update with primary key: only sort if the primary key is updated |
| -- |
| -- Aside from testing that bug, this also tests EXPLAIN of an DMLActionExpr |
| -- that ORCA generates for plans that update the primary key. |
| create table update_pk_test (a int primary key, b int) distributed by (a); |
| insert into update_pk_test values(1,1); |
| explain update update_pk_test set b = 5; |
| QUERY PLAN |
| --------------------------------------------------------------------- |
| Update on update_pk_test (cost=0.00..1.01 rows=1 width=14) |
| -> Seq Scan on update_pk_test (cost=0.00..1.01 rows=1 width=14) |
| Settings: optimizer=off |
| Optimizer status: Postgres query optimizer |
| (4 rows) |
| |
| update update_pk_test set b = 5; |
| select * from update_pk_test order by 1,2; |
| a | b |
| ---+--- |
| 1 | 5 |
| (1 row) |
| |
| explain update update_pk_test set a = 5; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------- |
| Update on update_pk_test (cost=0.00..1.05 rows=1 width=22) |
| -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.05 rows=1 width=22) |
| -> Split (cost=0.00..1.01 rows=1 width=22) |
| -> Seq Scan on update_pk_test (cost=0.00..1.01 rows=1 width=22) |
| Optimizer: Postgres query optimizer |
| (5 rows) |
| |
| update update_pk_test set a = 5; |
| select * from update_pk_test order by 1,2; |
| a | b |
| ---+--- |
| 5 | 5 |
| (1 row) |
| |
| -- MPP-22599 DML queries that fallback to planner don't check for updates on |
| -- the distribution key. |
| -- |
| -- So the bug was that if ORCA fell back to the planner, then the usual |
| -- check that prohibits updating the distribution key columns was not |
| -- performed like it should. So the idea of this test is to have an UPDATE |
| -- on distribution key column, with some features in the table or the query, |
| -- such that ORCA cannot produce a plan and it falls back to the Postgres |
| -- planner. |
| set optimizer_trace_fallback = on; |
| -- Subquery that returns a row rather than a single scalar isn't supported |
| -- in ORCA currently, so we can use that to trigger fallback. |
| update update_pk_test set a=1 where row(1,2) = (SELECT 1, 2); |
| select * from update_pk_test order by 1,2; |
| a | b |
| ---+--- |
| 1 | 5 |
| (1 row) |
| |
| reset optimizer_trace_fallback; |
| -- |
| -- Check that INSERT and DELETE triggers don't fire on UPDATE. |
| -- |
| -- It may seem weird how that could happen, but with ORCA, UPDATEs are |
| -- implemented as a "split update", which is really a DELETE and an INSERT. |
| -- |
| CREATE TABLE bfv_dml_trigger_test (id int4, t text); |
| INSERT INTO bfv_dml_trigger_test VALUES (1, 'foo'); |
| CREATE OR REPLACE FUNCTION bfv_dml_error_func() RETURNS trigger AS |
| $$ |
| BEGIN |
| RAISE EXCEPTION 'trigger was called!'; |
| RETURN NEW; |
| END |
| $$ LANGUAGE 'plpgsql'; |
| CREATE TRIGGER before_trigger BEFORE INSERT or DELETE ON bfv_dml_trigger_test |
| FOR EACH ROW |
| EXECUTE PROCEDURE bfv_dml_error_func(); |
| CREATE TRIGGER after_trigger AFTER INSERT or DELETE ON bfv_dml_trigger_test |
| FOR EACH ROW |
| EXECUTE PROCEDURE bfv_dml_error_func(); |
| UPDATE bfv_dml_trigger_test SET t = 'bar'; |
| UPDATE bfv_dml_trigger_test SET id = id + 1; |
| -- |
| -- Verify that ExecInsert doesn't scribble on the old tuple, when the new |
| -- tuple comes directly from the old table. |
| -- |
| CREATE TABLE execinsert_test (id int4, t text) DISTRIBUTED BY (id); |
| INSERT INTO execinsert_test values (1, 'foo'); |
| -- Insert another identical tuple, but roll it back. If the insertion |
| -- incorrectly modified the xmin on the old tuple, then it will become |
| -- invisible when we roll back. |
| begin; |
| INSERT INTO execinsert_test select * FROM execinsert_test; |
| rollback; |
| select * from execinsert_test; |
| id | t |
| ----+----- |
| 1 | foo |
| (1 row) |
| |
| drop table execinsert_test; |
| -- Repeat with a hacked master-only table, just in case the planner decides |
| -- to add a Motion node or something that hides the problem otherwise. |
| CREATE TABLE execinsert_test (id int4, t text) DISTRIBUTED BY (id); |
| set allow_system_table_mods=true; |
| delete from gp_distribution_policy where localoid='execinsert_test'::regclass; |
| reset allow_system_table_mods; |
| INSERT INTO execinsert_test values (1, 'foo'); |
| begin; |
| INSERT INTO execinsert_test select * FROM execinsert_test; |
| rollback; |
| select * from execinsert_test; |
| id | t |
| ----+----- |
| 1 | foo |
| (1 row) |
| |
| drop table execinsert_test; |
| -- |
| -- Verify that DELETE properly redistributes in the case of joins |
| -- |
| drop table if exists foo; |
| NOTICE: table "foo" does not exist, skipping |
| drop table if exists bar; |
| NOTICE: table "bar" does not exist, skipping |
| create table foo (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| create table bar(a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| insert into foo select generate_series(1,10); |
| insert into bar select generate_series(1,10); |
| -- Previously, table foo is defined as randomly distributed and |
| -- that might lead to flaky result of the explain statement |
| -- since random cost. We set policy to random without move the |
| -- data after data is all inserted. This method can both have |
| -- a random dist table and a stable test result. |
| -- Following cases are using the same skill here. |
| alter table foo set with(REORGANIZE=false) distributed randomly; |
| analyze foo; |
| analyze bar; |
| explain delete from foo using bar where foo.a=bar.a; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------- |
| Delete on foo (cost=679.75..178739.85 rows=2471070 width=16) |
| -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=679.75..178739.85 rows=2471070 width=16) |
| -> Hash Join (cost=679.75..129318.45 rows=2471070 width=16) |
| Hash Cond: (foo.a = bar.a) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..895.00 rows=28700 width=14) |
| Hash Key: foo.a |
| -> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=14) |
| -> Hash (cost=321.00..321.00 rows=28700 width=10) |
| -> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=10) |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| delete from foo using bar where foo.a=bar.a; |
| select * from foo; |
| a | b |
| ---+--- |
| (0 rows) |
| |
| drop table foo; |
| drop table bar; |
| create table foo (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| create table bar(a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| insert into foo select generate_series(1,10); |
| insert into bar select generate_series(1,10); |
| alter table foo set with(REORGANIZE=false) distributed randomly; |
| analyze foo; |
| analyze bar; |
| explain delete from foo using bar where foo.a = bar.a returning foo.*; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=679.75..277582.65 rows=7413210 width=16) |
| -> Delete on foo (cost=679.75..178739.85 rows=2471070 width=16) |
| -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) (cost=679.75..178739.85 rows=2471070 width=16) |
| -> Hash Join (cost=679.75..129318.45 rows=2471070 width=16) |
| Hash Cond: (foo.a = bar.a) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..895.00 rows=28700 width=14) |
| Hash Key: foo.a |
| -> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=14) |
| -> Hash (cost=321.00..321.00 rows=28700 width=10) |
| -> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=10) |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| delete from foo using bar where foo.a = bar.a returning foo.*; |
| a | b |
| ----+--- |
| 1 | |
| 9 | |
| 7 | |
| 10 | |
| 2 | |
| 3 | |
| 8 | |
| 5 | |
| 6 | |
| 4 | |
| (10 rows) |
| |
| select * from foo; |
| a | b |
| ---+--- |
| (0 rows) |
| |
| drop table foo; |
| drop table bar; |
| create table foo (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| insert into foo select generate_series(1,10); |
| alter table foo set with(REORGANIZE=false) distributed randomly; |
| analyze foo; |
| explain delete from foo where foo.a=1; |
| QUERY PLAN |
| ------------------------------------------------------------- |
| Delete on foo (cost=0.00..392.75 rows=29 width=10) |
| -> Seq Scan on foo (cost=0.00..392.75 rows=29 width=10) |
| Filter: (a = 1) |
| Optimizer: Postgres query optimizer |
| (4 rows) |
| |
| delete from foo where foo.a=1; |
| drop table foo; |
| create table foo (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| create table bar(a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| insert into foo select generate_series(1,10); |
| insert into bar select generate_series(1,10); |
| alter table foo set with(REORGANIZE=false) distributed randomly; |
| analyze foo; |
| analyze bar; |
| explain delete from foo using bar where foo.a=bar.b; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------------- |
| Delete on foo (cost=1253.75..179313.85 rows=2471070 width=16) |
| -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=1253.75..179313.85 rows=2471070 width=16) |
| -> Hash Join (cost=1253.75..129892.45 rows=2471070 width=16) |
| Hash Cond: (foo.a = bar.b) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..895.00 rows=28700 width=14) |
| Hash Key: foo.a |
| -> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=14) |
| -> Hash (cost=895.00..895.00 rows=28700 width=10) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..895.00 rows=28700 width=10) |
| Hash Key: bar.b |
| -> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=10) |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| delete from foo using bar where foo.a=bar.b; |
| drop table foo; |
| drop table bar; |
| create table foo (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| insert into foo select generate_series(1,10); |
| alter table foo set with(REORGANIZE=false) distributed randomly; |
| analyze foo; |
| -- Turn off redistribute motion for ORCA just for this case. |
| -- This is to get a broadcast motion over foo_1 so that no |
| -- motion is above the resultrelation foo thus no ExplicitMotion. |
| set optimizer_enable_motion_redistribute = off; |
| explain delete from foo using foo foo_1 where foo_1.a=foo.a; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------- |
| Delete on foo (cost=1.14..2.35 rows=3 width=16) |
| -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=1.14..2.35 rows=3 width=16) |
| -> Hash Join (cost=1.14..2.29 rows=3 width=16) |
| Hash Cond: (foo.a = foo_1.a) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..895.00 rows=28700 width=14) |
| Hash Key: foo.a |
| -> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=14) |
| -> Hash (cost=895.00..895.00 rows=28700 width=10) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..895.00 rows=28700 width=10) |
| Hash Key: foo_1.a |
| -> Seq Scan on foo foo_1 (cost=0.00..321.00 rows=28700 width=10) |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| delete from foo using foo foo_1 where foo_1.a=foo.a; |
| reset optimizer_enable_motion_redistribute; |
| drop table foo; |
| create table foo (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| insert into foo select generate_series(1,10); |
| alter table foo set with(REORGANIZE=false) distributed randomly; |
| analyze foo; |
| explain delete from foo; |
| QUERY PLAN |
| ---------------------------------------------------------------- |
| Delete on foo (cost=0.00..321.00 rows=28700 width=10) |
| -> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=10) |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| delete from foo; |
| drop table foo; |
| create table foo (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| create table bar(a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| insert into foo select generate_series(1,10); |
| insert into bar select generate_series(1,10); |
| alter table foo set with(REORGANIZE=false) distributed randomly; |
| alter table bar set with(REORGANIZE=false) distributed randomly; |
| analyze foo; |
| analyze bar; |
| explain delete from foo using bar; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------- |
| Delete on foo (cost=10000000000.00..10030890236.75 rows=2471070000 width=16) |
| -> Nested Loop (cost=10000000000.00..10030890236.75 rows=2471070000 width=16) |
| -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1469.00 rows=86100 width=6) |
| -> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=6) |
| -> Materialize (cost=0.00..464.50 rows=28700 width=10) |
| -> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=10) |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| |
| delete from foo using bar; |
| drop table foo; |
| drop table bar; |
| create table foo (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| create table bar(a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| insert into bar select i, i from generate_series(1, 1000)i; |
| insert into foo select i,i from generate_series(1, 10)i; |
| alter table foo set with(REORGANIZE=false) distributed randomly; |
| alter table bar set with(REORGANIZE=false) distributed randomly; |
| analyze foo; |
| analyze bar; |
| set optimizer_enable_motion_redistribute=off; |
| explain delete from foo using bar where foo.b=bar.b; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------- |
| Delete on foo (cost=1.29..6.56 rows=3 width=16) |
| -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=1.29..6.56 rows=3 width=16) |
| -> Hash Join (cost=1.29..6.50 rows=3 width=16) |
| Hash Cond: (bar.b = foo.b) |
| -> Seq Scan on bar (cost=0.00..4.33 rows=333 width=10) |
| -> Hash (cost=1.17..1.17 rows=10 width=14) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.17 rows=10 width=14) |
| -> Seq Scan on foo (cost=0.00..1.03 rows=3 width=14) |
| Optimizer: Postgres query optimizer |
| (9 rows) |
| |
| delete from foo using bar where foo.b=bar.b; |
| drop table foo; |
| drop table bar; |
| reset optimizer_enable_motion_redistribute; |
| create table foo (a int, b int) distributed randomly; |
| create table bar (a int, b int) distributed randomly; |
| insert into foo (a, b) values (1, 2); |
| explain insert into bar select * from foo; |
| QUERY PLAN |
| --------------------------------------------------------------- |
| Insert on bar (cost=0.00..321.00 rows=28700 width=8) |
| -> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=8) |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| insert into bar select * from foo; |
| select * from bar; |
| a | b |
| ---+--- |
| 1 | 2 |
| (1 row) |
| |
| drop table foo; |
| drop table bar; |
| create table foo (a int, b int) distributed randomly; |
| create table bar (a int, b int) distributed randomly; |
| insert into foo (a, b) values (1, 2); |
| insert into bar (a, b) values (1, 2); |
| explain update foo set a=4 from bar where foo.a=bar.a; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------------- |
| Update on foo (cost=1253.75..179313.85 rows=2471070 width=24) |
| -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=1253.75..179313.85 rows=2471070 width=24) |
| -> Hash Join (cost=1253.75..129892.45 rows=2471070 width=24) |
| Hash Cond: (foo.a = bar.a) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..895.00 rows=28700 width=18) |
| Hash Key: foo.a |
| -> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=18) |
| -> Hash (cost=895.00..895.00 rows=28700 width=10) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..895.00 rows=28700 width=10) |
| Hash Key: bar.a |
| -> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=10) |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| update foo set a=4 from bar where foo.a=bar.a; |
| select * from foo; |
| a | b |
| ---+--- |
| 4 | 2 |
| (1 row) |
| |
| drop table foo; |
| drop table bar; |
| create table foo (a int, b int) distributed randomly; |
| create table bar (a int, b int) distributed randomly; |
| create table jazz (a int, b int) distributed randomly; |
| insert into foo (a, b) values (1, 2); |
| insert into bar (a, b) values (1, 2); |
| insert into jazz (a, b) values (1, 2); |
| explain insert into foo select bar.a from bar, jazz where bar.a=jazz.a; |
| QUERY PLAN |
| ---------------------------------------------------------------------------------------------------------- |
| Insert on foo (cost=1253.75..129892.45 rows=2471070 width=8) |
| -> Hash Join (cost=1253.75..129892.45 rows=2471070 width=8) |
| Hash Cond: (bar.a = jazz.a) |
| -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..895.00 rows=28700 width=4) |
| Hash Key: bar.a |
| -> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=4) |
| -> Hash (cost=895.00..895.00 rows=28700 width=4) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..895.00 rows=28700 width=4) |
| Hash Key: jazz.a |
| -> Seq Scan on jazz (cost=0.00..321.00 rows=28700 width=4) |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| insert into foo select bar.a from bar, jazz where bar.a=jazz.a; |
| select * from foo; |
| a | b |
| ---+--- |
| 1 | 2 |
| 1 | |
| (2 rows) |
| |
| drop table foo; |
| drop table bar; |
| drop table jazz; |
| create table foo (a int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| create table bar (b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table. |
| insert into foo select i from generate_series(1, 10)i; |
| insert into bar select i from generate_series(1, 10)i; |
| alter table foo set with(REORGANIZE=false) distributed randomly; |
| alter table bar set with(REORGANIZE=false) distributed randomly; |
| analyze foo; |
| analyze bar; |
| explain delete from foo using (select a from foo union all select b from bar) v; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------------------------- |
| Delete on foo (cost=10000000000.00..10200932780.50 rows=6182460000 width=42) |
| -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=10000000000.00..10200932780.50 rows=6182460000 width=42) |
| -> Nested Loop (cost=10000000000.00..10077283580.50 rows=6182460000 width=42) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1639.00 rows=96300 width=10) |
| -> Seq Scan on foo (cost=0.00..355.00 rows=32100 width=10) |
| -> Materialize (cost=0.00..1352.00 rows=64200 width=32) |
| -> Append (cost=0.00..1031.00 rows=64200 width=32) |
| -> Seq Scan on foo foo_1 (cost=0.00..355.00 rows=32100 width=32) |
| -> Seq Scan on bar (cost=0.00..355.00 rows=32100 width=32) |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| delete from foo using (select a from foo union all select b from bar) v; |
| select * from foo; |
| a |
| --- |
| (0 rows) |
| |
| drop table foo; |
| drop table bar; |
| -- This test is to verify ORCA can generate plans with empty |
| -- target lists. This can happen when inserting rows with no |
| -- columns into a table with no columns |
| create table test(); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry. |
| explain (analyze, costs off, timing off, summary off) insert into test default values; |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Insert on test (actual rows=0 loops=1) |
| -> Redistribute Motion 1:3 (slice1; segments: 1) (actual rows=1 loops=1) |
| -> Result (actual rows=1 loops=1) |
| Optimizer: Postgres query optimizer |
| (4 rows) |
| |
| -- Test delete on partition table with dropped/added columns |
| CREATE TABLE part ( |
| a int, |
| b int, |
| c text, |
| d numeric) |
| DISTRIBUTED BY (b) |
| partition by range(a) ( |
| start(1) end(6) every(2), |
| default partition def); |
| alter table part add column e int; |
| insert into part select i, i, 'abc', i*1.01,i from generate_series(1,10)i; |
| alter table part drop column b; |
| NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy |
| NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy |
| NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy |
| NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy |
| NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy |
| alter table part set WITH (reorganize=true) distributed by (e); |
| -- test delete with dropped column |
| explain delete from part where d>9; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------- |
| Delete on part (cost=0.00..742.33 rows=0 width=0) |
| Delete on part_1_prt_2 part_1 |
| Delete on part_1_prt_3 part_2 |
| Delete on part_1_prt_4 part_3 |
| Delete on part_1_prt_def part_4 |
| -> Append (cost=0.00..742.33 rows=14267 width=14) |
| -> Seq Scan on part_1_prt_2 part_1 (cost=0.00..167.75 rows=3567 width=14) |
| Filter: (d > '9'::numeric) |
| -> Seq Scan on part_1_prt_3 part_2 (cost=0.00..167.75 rows=3567 width=14) |
| Filter: (d > '9'::numeric) |
| -> Seq Scan on part_1_prt_4 part_3 (cost=0.00..167.75 rows=3567 width=14) |
| Filter: (d > '9'::numeric) |
| -> Seq Scan on part_1_prt_def part_4 (cost=0.00..167.75 rows=3567 width=14) |
| Filter: (d > '9'::numeric) |
| Optimizer: Postgres query optimizer |
| (15 rows) |
| |
| delete from part where d>9; |
| select count(*) from part; |
| count |
| ------- |
| 8 |
| (1 row) |
| |
| -- test delete with added partition key |
| explain delete from part where e=3; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------- |
| Delete on part (cost=0.00..671.21 rows=0 width=0) |
| Delete on part_1_prt_2 part_1 |
| Delete on part_1_prt_3 part_2 |
| Delete on part_1_prt_4 part_3 |
| Delete on part_1_prt_def part_4 |
| -> Append (cost=0.00..671.21 rows=43 width=14) |
| -> Seq Scan on part_1_prt_2 part_1 (cost=0.00..167.75 rows=11 width=14) |
| Filter: (e = 3) |
| -> Seq Scan on part_1_prt_3 part_2 (cost=0.00..167.75 rows=11 width=14) |
| Filter: (e = 3) |
| -> Seq Scan on part_1_prt_4 part_3 (cost=0.00..167.75 rows=11 width=14) |
| Filter: (e = 3) |
| -> Seq Scan on part_1_prt_def part_4 (cost=0.00..167.75 rows=11 width=14) |
| Filter: (e = 3) |
| Optimizer: Postgres query optimizer |
| (15 rows) |
| |
| delete from part where e=3; |
| select count(*) from part; |
| count |
| ------- |
| 7 |
| (1 row) |
| |
| -- test delete from default partition |
| explain delete from part where a=8; |
| QUERY PLAN |
| ------------------------------------------------------------------------------- |
| Delete on part (cost=0.00..167.75 rows=0 width=0) |
| Delete on part_1_prt_def part_1 |
| -> Seq Scan on part_1_prt_def part_1 (cost=0.00..167.75 rows=11 width=14) |
| Filter: (a = 8) |
| Optimizer: Postgres query optimizer |
| (5 rows) |
| |
| delete from part where a=8; |
| select count(*) from part; |
| count |
| ------- |
| 6 |
| (1 row) |
| |
| DROP TABLE IF EXISTS part; |
| CREATE TABLE part ( |
| a int, |
| b int, |
| partkey int, |
| c text, |
| d numeric) |
| DISTRIBUTED BY (b) |
| partition by range(partkey) ( |
| start(1) end(6) every(2), |
| default partition def); |
| alter table part add column e int; |
| insert into part select i, i, i, 'abc', i*1.01,i from generate_series(1,10)i; |
| alter table part drop column b; |
| NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy |
| NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy |
| NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy |
| NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy |
| NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy |
| alter table part set WITH (reorganize=true) distributed by (e); |
| -- test delete with column order change |
| explain delete from part where d>9; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------- |
| Delete on part (cost=0.00..717.78 rows=0 width=0) |
| Delete on part_1_prt_2 part_1 |
| Delete on part_1_prt_3 part_2 |
| Delete on part_1_prt_4 part_3 |
| Delete on part_1_prt_def part_4 |
| -> Append (cost=0.00..717.78 rows=13689 width=14) |
| -> Seq Scan on part_1_prt_2 part_1 (cost=0.00..162.33 rows=3422 width=14) |
| Filter: (d > '9'::numeric) |
| -> Seq Scan on part_1_prt_3 part_2 (cost=0.00..162.33 rows=3422 width=14) |
| Filter: (d > '9'::numeric) |
| -> Seq Scan on part_1_prt_4 part_3 (cost=0.00..162.33 rows=3422 width=14) |
| Filter: (d > '9'::numeric) |
| -> Seq Scan on part_1_prt_def part_4 (cost=0.00..162.33 rows=3422 width=14) |
| Filter: (d > '9'::numeric) |
| Optimizer: Postgres query optimizer |
| (15 rows) |
| |
| delete from part where d>9; |
| select count(*) from part; |
| count |
| ------- |
| 8 |
| (1 row) |
| |
| -- Test delete on mid-level partitions. Ensure Orca properly handles tuple routing |
| create table deep_part ( |
| i int, |
| j int, |
| k int, |
| s char(5) |
| ) distributed by (i) partition by list(s) subpartition by range (j) subpartition template ( |
| start(1) end(3) every(1) |
| ) ( |
| partition p1 |
| values |
| ('A'), |
| partition p2 |
| values |
| ('B') |
| ); |
| insert into deep_part values (1,1,1,'A'); |
| delete from deep_part_1_prt_p1 where j=1; |