blob: 3fdefb0b0b68bd2d97817729848fcef4a7d6776e [file]
-- MPP-21536: Duplicated rows inserted when ORCA is turned on
create schema bfv_dml;
set search_path=bfv_dml;
-- create test table
create table m();
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
alter table m add column a int;
alter table m add column b int;
-- generate data for m
insert into m select i, i%5 from generate_series(1,10)i;
-- INSERT and UPDATE
create table yyy(a int, b int) distributed randomly;
insert into yyy select a,b from m;
select * from yyy order by 1, 2;
a | b
----+---
1 | 1
2 | 2
3 | 3
4 | 4
5 | 0
6 | 1
7 | 2
8 | 3
9 | 4
10 | 0
(10 rows)
update yyy set a=m.b from m where m.a=yyy.b;
select * from yyy order by 1, 2;
a | b
----+---
1 | 1
1 | 1
2 | 2
2 | 2
3 | 3
3 | 3
4 | 4
4 | 4
5 | 0
10 | 0
(10 rows)
drop table yyy;
-- UPDATE with different values
create table yyy(a int, b int) distributed randomly;
insert into yyy select a,b from m;
update yyy set b=m.b from m where m.a=yyy.a;
select * from yyy order by 1, 2;
a | b
----+---
1 | 1
2 | 2
3 | 3
4 | 4
5 | 0
6 | 1
7 | 2
8 | 3
9 | 4
10 | 0
(10 rows)
drop table yyy;
-- DELETE
create table yyy(a int, b int) distributed randomly;
insert into yyy select a,b from m;
delete from yyy where a in (select a from m);
select * from yyy order by 1, 2;
a | b
---+---
(0 rows)
drop table yyy;
create table yyy(a int, b int) distributed randomly;
insert into yyy select a,b from m;
delete from yyy where b in (select a from m);
select * from yyy order by 1, 2;
a | b
----+---
5 | 0
10 | 0
(2 rows)
drop table yyy;
-- Now repeat all the above tests, but using a hacked master-only 'm' table
drop table m;
set optimizer_enable_master_only_queries=on;
-- create master-only table
create table m();
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
set allow_system_table_mods=true;
delete from gp_distribution_policy where localoid='m'::regclass;
reset allow_system_table_mods;
alter table m add column a int;
alter table m add column b int;
-- generate data for m
insert into m select i, i%5 from generate_series(1,10)i;
create table zzz(a int primary key, b int) distributed by (a);
insert into zzz select a,b from m;
select * from zzz order by 1, 2;
a | b
----+---
1 | 1
2 | 2
3 | 3
4 | 4
5 | 0
6 | 1
7 | 2
8 | 3
9 | 4
10 | 0
(10 rows)
delete from zzz where a in (select a from m);
select * from zzz order by 1, 2;
a | b
---+---
(0 rows)
drop table zzz;
create table zzz(a int primary key, b int) distributed by (a);
insert into zzz select a,b from m;
delete from zzz where b in (select a from m);
select * from zzz order by 1, 2;
a | b
----+---
5 | 0
10 | 0
(2 rows)
drop table zzz;
create table zzz(a int primary key, b int) distributed by (a);
insert into zzz select a,b from m;
-- This update fails with duplicate key error, but it varies which segment
-- reports it first, i.e. it varies which row it complaints first. Silence
-- that difference in the error DETAIL line
\set VERBOSITY terse
update zzz set a=m.b from m where m.a=zzz.b;
ERROR: duplicate key value violates unique constraint "zzz_pkey" (seg1 10.152.10.32:25433 pid=2877)
select * from zzz order by 1, 2;
a | b
----+---
1 | 1
2 | 2
3 | 3
4 | 4
5 | 0
6 | 1
7 | 2
8 | 3
9 | 4
10 | 0
(10 rows)
drop table zzz;
create table zzz(a int primary key, b int) distributed by (a);
insert into zzz select a,b from m;
update zzz set b=m.b from m where m.a=zzz.a;
select * from zzz order by 1, 2;
a | b
----+---
1 | 1
2 | 2
3 | 3
4 | 4
5 | 0
6 | 1
7 | 2
8 | 3
9 | 4
10 | 0
(10 rows)
drop table zzz;
drop table m;
-- MPP-21622 Update with primary key: only sort if the primary key is updated
--
-- Aside from testing that bug, this also tests EXPLAIN of an DMLActionExpr
-- that ORCA generates for plans that update the primary key.
create table update_pk_test (a int primary key, b int) distributed by (a);
insert into update_pk_test values(1,1);
explain update update_pk_test set b = 5;
QUERY PLAN
---------------------------------------------------------------------
Update on update_pk_test (cost=0.00..1.01 rows=1 width=14)
-> Seq Scan on update_pk_test (cost=0.00..1.01 rows=1 width=14)
Settings: optimizer=off
Optimizer status: Postgres query optimizer
(4 rows)
update update_pk_test set b = 5;
select * from update_pk_test order by 1,2;
a | b
---+---
1 | 5
(1 row)
explain update update_pk_test set a = 5;
QUERY PLAN
--------------------------------------------------------------------------------------------------
Update on update_pk_test (cost=0.00..1.05 rows=1 width=22)
-> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.05 rows=1 width=22)
-> Split (cost=0.00..1.01 rows=1 width=22)
-> Seq Scan on update_pk_test (cost=0.00..1.01 rows=1 width=22)
Optimizer: Postgres query optimizer
(5 rows)
update update_pk_test set a = 5;
select * from update_pk_test order by 1,2;
a | b
---+---
5 | 5
(1 row)
-- MPP-22599 DML queries that fallback to planner don't check for updates on
-- the distribution key.
--
-- So the bug was that if ORCA fell back to the planner, then the usual
-- check that prohibits updating the distribution key columns was not
-- performed like it should. So the idea of this test is to have an UPDATE
-- on distribution key column, with some features in the table or the query,
-- such that ORCA cannot produce a plan and it falls back to the Postgres
-- planner.
set optimizer_trace_fallback = on;
-- Subquery that returns a row rather than a single scalar isn't supported
-- in ORCA currently, so we can use that to trigger fallback.
update update_pk_test set a=1 where row(1,2) = (SELECT 1, 2);
select * from update_pk_test order by 1,2;
a | b
---+---
1 | 5
(1 row)
reset optimizer_trace_fallback;
--
-- Check that INSERT and DELETE triggers don't fire on UPDATE.
--
-- It may seem weird how that could happen, but with ORCA, UPDATEs are
-- implemented as a "split update", which is really a DELETE and an INSERT.
--
CREATE TABLE bfv_dml_trigger_test (id int4, t text);
INSERT INTO bfv_dml_trigger_test VALUES (1, 'foo');
CREATE OR REPLACE FUNCTION bfv_dml_error_func() RETURNS trigger AS
$$
BEGIN
RAISE EXCEPTION 'trigger was called!';
RETURN NEW;
END
$$ LANGUAGE 'plpgsql';
CREATE TRIGGER before_trigger BEFORE INSERT or DELETE ON bfv_dml_trigger_test
FOR EACH ROW
EXECUTE PROCEDURE bfv_dml_error_func();
CREATE TRIGGER after_trigger AFTER INSERT or DELETE ON bfv_dml_trigger_test
FOR EACH ROW
EXECUTE PROCEDURE bfv_dml_error_func();
UPDATE bfv_dml_trigger_test SET t = 'bar';
UPDATE bfv_dml_trigger_test SET id = id + 1;
--
-- Verify that ExecInsert doesn't scribble on the old tuple, when the new
-- tuple comes directly from the old table.
--
CREATE TABLE execinsert_test (id int4, t text) DISTRIBUTED BY (id);
INSERT INTO execinsert_test values (1, 'foo');
-- Insert another identical tuple, but roll it back. If the insertion
-- incorrectly modified the xmin on the old tuple, then it will become
-- invisible when we roll back.
begin;
INSERT INTO execinsert_test select * FROM execinsert_test;
rollback;
select * from execinsert_test;
id | t
----+-----
1 | foo
(1 row)
drop table execinsert_test;
-- Repeat with a hacked master-only table, just in case the planner decides
-- to add a Motion node or something that hides the problem otherwise.
CREATE TABLE execinsert_test (id int4, t text) DISTRIBUTED BY (id);
set allow_system_table_mods=true;
delete from gp_distribution_policy where localoid='execinsert_test'::regclass;
reset allow_system_table_mods;
INSERT INTO execinsert_test values (1, 'foo');
begin;
INSERT INTO execinsert_test select * FROM execinsert_test;
rollback;
select * from execinsert_test;
id | t
----+-----
1 | foo
(1 row)
drop table execinsert_test;
--
-- Verify that DELETE properly redistributes in the case of joins
--
drop table if exists foo;
NOTICE: table "foo" does not exist, skipping
drop table if exists bar;
NOTICE: table "bar" does not exist, skipping
create table foo (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
create table bar(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
insert into foo select generate_series(1,10);
insert into bar select generate_series(1,10);
-- Previously, table foo is defined as randomly distributed and
-- that might lead to flaky result of the explain statement
-- since random cost. We set policy to random without move the
-- data after data is all inserted. This method can both have
-- a random dist table and a stable test result.
-- Following cases are using the same skill here.
alter table foo set with(REORGANIZE=false) distributed randomly;
analyze foo;
analyze bar;
explain delete from foo using bar where foo.a=bar.a;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------
Delete on foo (cost=679.75..178739.85 rows=2471070 width=16)
-> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=679.75..178739.85 rows=2471070 width=16)
-> Hash Join (cost=679.75..129318.45 rows=2471070 width=16)
Hash Cond: (foo.a = bar.a)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..895.00 rows=28700 width=14)
Hash Key: foo.a
-> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=14)
-> Hash (cost=321.00..321.00 rows=28700 width=10)
-> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=10)
Optimizer: Postgres query optimizer
(10 rows)
delete from foo using bar where foo.a=bar.a;
select * from foo;
a | b
---+---
(0 rows)
drop table foo;
drop table bar;
create table foo (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
create table bar(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
insert into foo select generate_series(1,10);
insert into bar select generate_series(1,10);
alter table foo set with(REORGANIZE=false) distributed randomly;
analyze foo;
analyze bar;
explain delete from foo using bar where foo.a = bar.a returning foo.*;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=679.75..277582.65 rows=7413210 width=16)
-> Delete on foo (cost=679.75..178739.85 rows=2471070 width=16)
-> Explicit Redistribute Motion 3:3 (slice2; segments: 3) (cost=679.75..178739.85 rows=2471070 width=16)
-> Hash Join (cost=679.75..129318.45 rows=2471070 width=16)
Hash Cond: (foo.a = bar.a)
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..895.00 rows=28700 width=14)
Hash Key: foo.a
-> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=14)
-> Hash (cost=321.00..321.00 rows=28700 width=10)
-> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=10)
Optimizer: Postgres query optimizer
(11 rows)
delete from foo using bar where foo.a = bar.a returning foo.*;
a | b
----+---
1 |
9 |
7 |
10 |
2 |
3 |
8 |
5 |
6 |
4 |
(10 rows)
select * from foo;
a | b
---+---
(0 rows)
drop table foo;
drop table bar;
create table foo (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
insert into foo select generate_series(1,10);
alter table foo set with(REORGANIZE=false) distributed randomly;
analyze foo;
explain delete from foo where foo.a=1;
QUERY PLAN
-------------------------------------------------------------
Delete on foo (cost=0.00..392.75 rows=29 width=10)
-> Seq Scan on foo (cost=0.00..392.75 rows=29 width=10)
Filter: (a = 1)
Optimizer: Postgres query optimizer
(4 rows)
delete from foo where foo.a=1;
drop table foo;
create table foo (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
create table bar(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
insert into foo select generate_series(1,10);
insert into bar select generate_series(1,10);
alter table foo set with(REORGANIZE=false) distributed randomly;
analyze foo;
analyze bar;
explain delete from foo using bar where foo.a=bar.b;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
Delete on foo (cost=1253.75..179313.85 rows=2471070 width=16)
-> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=1253.75..179313.85 rows=2471070 width=16)
-> Hash Join (cost=1253.75..129892.45 rows=2471070 width=16)
Hash Cond: (foo.a = bar.b)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..895.00 rows=28700 width=14)
Hash Key: foo.a
-> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=14)
-> Hash (cost=895.00..895.00 rows=28700 width=10)
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..895.00 rows=28700 width=10)
Hash Key: bar.b
-> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=10)
Optimizer: Postgres query optimizer
(12 rows)
delete from foo using bar where foo.a=bar.b;
drop table foo;
drop table bar;
create table foo (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
insert into foo select generate_series(1,10);
alter table foo set with(REORGANIZE=false) distributed randomly;
analyze foo;
-- Turn off redistribute motion for ORCA just for this case.
-- This is to get a broadcast motion over foo_1 so that no
-- motion is above the resultrelation foo thus no ExplicitMotion.
set optimizer_enable_motion_redistribute = off;
explain delete from foo using foo foo_1 where foo_1.a=foo.a;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Delete on foo (cost=1.14..2.35 rows=3 width=16)
-> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=1.14..2.35 rows=3 width=16)
-> Hash Join (cost=1.14..2.29 rows=3 width=16)
Hash Cond: (foo.a = foo_1.a)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..895.00 rows=28700 width=14)
Hash Key: foo.a
-> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=14)
-> Hash (cost=895.00..895.00 rows=28700 width=10)
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..895.00 rows=28700 width=10)
Hash Key: foo_1.a
-> Seq Scan on foo foo_1 (cost=0.00..321.00 rows=28700 width=10)
Optimizer: Postgres query optimizer
(12 rows)
delete from foo using foo foo_1 where foo_1.a=foo.a;
reset optimizer_enable_motion_redistribute;
drop table foo;
create table foo (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
insert into foo select generate_series(1,10);
alter table foo set with(REORGANIZE=false) distributed randomly;
analyze foo;
explain delete from foo;
QUERY PLAN
----------------------------------------------------------------
Delete on foo (cost=0.00..321.00 rows=28700 width=10)
-> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=10)
Optimizer: Postgres query optimizer
(3 rows)
delete from foo;
drop table foo;
create table foo (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
create table bar(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
insert into foo select generate_series(1,10);
insert into bar select generate_series(1,10);
alter table foo set with(REORGANIZE=false) distributed randomly;
alter table bar set with(REORGANIZE=false) distributed randomly;
analyze foo;
analyze bar;
explain delete from foo using bar;
QUERY PLAN
--------------------------------------------------------------------------------------------------
Delete on foo (cost=10000000000.00..10030890236.75 rows=2471070000 width=16)
-> Nested Loop (cost=10000000000.00..10030890236.75 rows=2471070000 width=16)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1469.00 rows=86100 width=6)
-> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=6)
-> Materialize (cost=0.00..464.50 rows=28700 width=10)
-> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=10)
Optimizer: Postgres query optimizer
(7 rows)
delete from foo using bar;
drop table foo;
drop table bar;
create table foo (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
create table bar(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
insert into bar select i, i from generate_series(1, 1000)i;
insert into foo select i,i from generate_series(1, 10)i;
alter table foo set with(REORGANIZE=false) distributed randomly;
alter table bar set with(REORGANIZE=false) distributed randomly;
analyze foo;
analyze bar;
set optimizer_enable_motion_redistribute=off;
explain delete from foo using bar where foo.b=bar.b;
QUERY PLAN
---------------------------------------------------------------------------------------------------------
Delete on foo (cost=1.29..6.56 rows=3 width=16)
-> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=1.29..6.56 rows=3 width=16)
-> Hash Join (cost=1.29..6.50 rows=3 width=16)
Hash Cond: (bar.b = foo.b)
-> Seq Scan on bar (cost=0.00..4.33 rows=333 width=10)
-> Hash (cost=1.17..1.17 rows=10 width=14)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.17 rows=10 width=14)
-> Seq Scan on foo (cost=0.00..1.03 rows=3 width=14)
Optimizer: Postgres query optimizer
(9 rows)
delete from foo using bar where foo.b=bar.b;
drop table foo;
drop table bar;
reset optimizer_enable_motion_redistribute;
create table foo (a int, b int) distributed randomly;
create table bar (a int, b int) distributed randomly;
insert into foo (a, b) values (1, 2);
explain insert into bar select * from foo;
QUERY PLAN
---------------------------------------------------------------
Insert on bar (cost=0.00..321.00 rows=28700 width=8)
-> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=8)
Optimizer: Postgres query optimizer
(3 rows)
insert into bar select * from foo;
select * from bar;
a | b
---+---
1 | 2
(1 row)
drop table foo;
drop table bar;
create table foo (a int, b int) distributed randomly;
create table bar (a int, b int) distributed randomly;
insert into foo (a, b) values (1, 2);
insert into bar (a, b) values (1, 2);
explain update foo set a=4 from bar where foo.a=bar.a;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
Update on foo (cost=1253.75..179313.85 rows=2471070 width=24)
-> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=1253.75..179313.85 rows=2471070 width=24)
-> Hash Join (cost=1253.75..129892.45 rows=2471070 width=24)
Hash Cond: (foo.a = bar.a)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..895.00 rows=28700 width=18)
Hash Key: foo.a
-> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=18)
-> Hash (cost=895.00..895.00 rows=28700 width=10)
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..895.00 rows=28700 width=10)
Hash Key: bar.a
-> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=10)
Optimizer: Postgres query optimizer
(12 rows)
update foo set a=4 from bar where foo.a=bar.a;
select * from foo;
a | b
---+---
4 | 2
(1 row)
drop table foo;
drop table bar;
create table foo (a int, b int) distributed randomly;
create table bar (a int, b int) distributed randomly;
create table jazz (a int, b int) distributed randomly;
insert into foo (a, b) values (1, 2);
insert into bar (a, b) values (1, 2);
insert into jazz (a, b) values (1, 2);
explain insert into foo select bar.a from bar, jazz where bar.a=jazz.a;
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Insert on foo (cost=1253.75..129892.45 rows=2471070 width=8)
-> Hash Join (cost=1253.75..129892.45 rows=2471070 width=8)
Hash Cond: (bar.a = jazz.a)
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..895.00 rows=28700 width=4)
Hash Key: bar.a
-> Seq Scan on bar (cost=0.00..321.00 rows=28700 width=4)
-> Hash (cost=895.00..895.00 rows=28700 width=4)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..895.00 rows=28700 width=4)
Hash Key: jazz.a
-> Seq Scan on jazz (cost=0.00..321.00 rows=28700 width=4)
Optimizer: Postgres query optimizer
(11 rows)
insert into foo select bar.a from bar, jazz where bar.a=jazz.a;
select * from foo;
a | b
---+---
1 | 2
1 |
(2 rows)
drop table foo;
drop table bar;
drop table jazz;
create table foo (a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
create table bar (b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table.
insert into foo select i from generate_series(1, 10)i;
insert into bar select i from generate_series(1, 10)i;
alter table foo set with(REORGANIZE=false) distributed randomly;
alter table bar set with(REORGANIZE=false) distributed randomly;
analyze foo;
analyze bar;
explain delete from foo using (select a from foo union all select b from bar) v;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------
Delete on foo (cost=10000000000.00..10200932780.50 rows=6182460000 width=42)
-> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=10000000000.00..10200932780.50 rows=6182460000 width=42)
-> Nested Loop (cost=10000000000.00..10077283580.50 rows=6182460000 width=42)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1639.00 rows=96300 width=10)
-> Seq Scan on foo (cost=0.00..355.00 rows=32100 width=10)
-> Materialize (cost=0.00..1352.00 rows=64200 width=32)
-> Append (cost=0.00..1031.00 rows=64200 width=32)
-> Seq Scan on foo foo_1 (cost=0.00..355.00 rows=32100 width=32)
-> Seq Scan on bar (cost=0.00..355.00 rows=32100 width=32)
Optimizer: Postgres query optimizer
(10 rows)
delete from foo using (select a from foo union all select b from bar) v;
select * from foo;
a
---
(0 rows)
drop table foo;
drop table bar;
-- This test is to verify ORCA can generate plans with empty
-- target lists. This can happen when inserting rows with no
-- columns into a table with no columns
create table test();
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
explain (analyze, costs off, timing off, summary off) insert into test default values;
QUERY PLAN
------------------------------------------------------------------------------
Insert on test (actual rows=0 loops=1)
-> Redistribute Motion 1:3 (slice1; segments: 1) (actual rows=1 loops=1)
-> Result (actual rows=1 loops=1)
Optimizer: Postgres query optimizer
(4 rows)
-- Test delete on partition table with dropped/added columns
CREATE TABLE part (
a int,
b int,
c text,
d numeric)
DISTRIBUTED BY (b)
partition by range(a) (
start(1) end(6) every(2),
default partition def);
alter table part add column e int;
insert into part select i, i, 'abc', i*1.01,i from generate_series(1,10)i;
alter table part drop column b;
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
alter table part set WITH (reorganize=true) distributed by (e);
-- test delete with dropped column
explain delete from part where d>9;
QUERY PLAN
---------------------------------------------------------------------------------------
Delete on part (cost=0.00..742.33 rows=0 width=0)
Delete on part_1_prt_2 part_1
Delete on part_1_prt_3 part_2
Delete on part_1_prt_4 part_3
Delete on part_1_prt_def part_4
-> Append (cost=0.00..742.33 rows=14267 width=14)
-> Seq Scan on part_1_prt_2 part_1 (cost=0.00..167.75 rows=3567 width=14)
Filter: (d > '9'::numeric)
-> Seq Scan on part_1_prt_3 part_2 (cost=0.00..167.75 rows=3567 width=14)
Filter: (d > '9'::numeric)
-> Seq Scan on part_1_prt_4 part_3 (cost=0.00..167.75 rows=3567 width=14)
Filter: (d > '9'::numeric)
-> Seq Scan on part_1_prt_def part_4 (cost=0.00..167.75 rows=3567 width=14)
Filter: (d > '9'::numeric)
Optimizer: Postgres query optimizer
(15 rows)
delete from part where d>9;
select count(*) from part;
count
-------
8
(1 row)
-- test delete with added partition key
explain delete from part where e=3;
QUERY PLAN
-------------------------------------------------------------------------------------
Delete on part (cost=0.00..671.21 rows=0 width=0)
Delete on part_1_prt_2 part_1
Delete on part_1_prt_3 part_2
Delete on part_1_prt_4 part_3
Delete on part_1_prt_def part_4
-> Append (cost=0.00..671.21 rows=43 width=14)
-> Seq Scan on part_1_prt_2 part_1 (cost=0.00..167.75 rows=11 width=14)
Filter: (e = 3)
-> Seq Scan on part_1_prt_3 part_2 (cost=0.00..167.75 rows=11 width=14)
Filter: (e = 3)
-> Seq Scan on part_1_prt_4 part_3 (cost=0.00..167.75 rows=11 width=14)
Filter: (e = 3)
-> Seq Scan on part_1_prt_def part_4 (cost=0.00..167.75 rows=11 width=14)
Filter: (e = 3)
Optimizer: Postgres query optimizer
(15 rows)
delete from part where e=3;
select count(*) from part;
count
-------
7
(1 row)
-- test delete from default partition
explain delete from part where a=8;
QUERY PLAN
-------------------------------------------------------------------------------
Delete on part (cost=0.00..167.75 rows=0 width=0)
Delete on part_1_prt_def part_1
-> Seq Scan on part_1_prt_def part_1 (cost=0.00..167.75 rows=11 width=14)
Filter: (a = 8)
Optimizer: Postgres query optimizer
(5 rows)
delete from part where a=8;
select count(*) from part;
count
-------
6
(1 row)
DROP TABLE IF EXISTS part;
CREATE TABLE part (
a int,
b int,
partkey int,
c text,
d numeric)
DISTRIBUTED BY (b)
partition by range(partkey) (
start(1) end(6) every(2),
default partition def);
alter table part add column e int;
insert into part select i, i, i, 'abc', i*1.01,i from generate_series(1,10)i;
alter table part drop column b;
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
alter table part set WITH (reorganize=true) distributed by (e);
-- test delete with column order change
explain delete from part where d>9;
QUERY PLAN
---------------------------------------------------------------------------------------
Delete on part (cost=0.00..717.78 rows=0 width=0)
Delete on part_1_prt_2 part_1
Delete on part_1_prt_3 part_2
Delete on part_1_prt_4 part_3
Delete on part_1_prt_def part_4
-> Append (cost=0.00..717.78 rows=13689 width=14)
-> Seq Scan on part_1_prt_2 part_1 (cost=0.00..162.33 rows=3422 width=14)
Filter: (d > '9'::numeric)
-> Seq Scan on part_1_prt_3 part_2 (cost=0.00..162.33 rows=3422 width=14)
Filter: (d > '9'::numeric)
-> Seq Scan on part_1_prt_4 part_3 (cost=0.00..162.33 rows=3422 width=14)
Filter: (d > '9'::numeric)
-> Seq Scan on part_1_prt_def part_4 (cost=0.00..162.33 rows=3422 width=14)
Filter: (d > '9'::numeric)
Optimizer: Postgres query optimizer
(15 rows)
delete from part where d>9;
select count(*) from part;
count
-------
8
(1 row)
-- Test delete on mid-level partitions. Ensure Orca properly handles tuple routing
create table deep_part (
i int,
j int,
k int,
s char(5)
) distributed by (i) partition by list(s) subpartition by range (j) subpartition template (
start(1) end(3) every(1)
) (
partition p1
values
('A'),
partition p2
values
('B')
);
insert into deep_part values (1,1,1,'A');
delete from deep_part_1_prt_p1 where j=1;