blob: 44f3ed1e9a108d5221a87370f588e30257a3bd1b [file] [log] [blame]
-- Test DELETE and UPDATE on an inherited table.
-- The special aspect of this table is that the inherited table has
-- a different distribution key. 'p' table's distribution key matches
-- that of 'r', but 'p2's doesn't. Test that the planner adds a Motion
-- node correctly for p2.
create table todelete (a int) distributed by (a);
create table parent (a int, b int, c int) distributed by (a);
create table child (a int, b int, c int) inherits (parent) distributed by (b);
NOTICE: merging column "a" with inherited definition
NOTICE: merging column "b" with inherited definition
NOTICE: merging column "c" with inherited definition
insert into parent select g, g, g from generate_series(1,5) g;
insert into child select g, g, g from generate_series(6,10) g;
insert into todelete select generate_series(3,4);
delete from parent using todelete where parent.a = todelete.a;
insert into todelete select generate_series(5,7);
update parent set c=c+100 from todelete where parent.a = todelete.a;
select * from parent;
a | b | c
----+----+-----
1 | 1 | 1
2 | 2 | 2
8 | 8 | 8
9 | 9 | 9
10 | 10 | 10
5 | 5 | 105
6 | 6 | 106
7 | 7 | 107
(8 rows)
drop table todelete;
drop table child;
drop table parent;
-- This is similar to the above, but with a partitioned table (which is
-- implemented by inheritance) rather than an explicitly inherited table.
-- The scans on some of the partitions degenerate into Result nodes with
-- False one-time filter, which don't need a Motion node.
create table todelete (a int, b int) distributed by (a);
create table target (a int, b int, c int)
distributed by (a)
partition by range (c) (start(1) end(5) every(1), default partition extra);
insert into todelete select g, g % 4 from generate_series(1, 10) g;
insert into target select g, 0, 3 from generate_series(1, 5) g;
insert into target select g, 0, 1 from generate_series(1, 5) g;
delete from target where c = 3 and a in (select b from todelete);
insert into todelete values (1, 5);
update target set b=target.b+100 where c = 3 and a in (select b from todelete);
select * from target;
a | b | c
---+-----+---
1 | 0 | 1
2 | 0 | 1
4 | 0 | 3
5 | 100 | 3
3 | 0 | 1
4 | 0 | 1
5 | 0 | 1
(7 rows)
-- Also test an update with a qual that doesn't match any partition. The
-- Append degenerates into a dummy Result with false One-Time Filter.
alter table target drop default partition;
update target set b = 10 where c = 10;
drop table todelete;
drop table target;
--
-- Test updated on inheritance parent table, where some child tables need a
-- Split Update, but not all.
--
create table base_tbl (a int4, b int4) distributed by (a);
create table child_a (a int4, b int4) inherits (base_tbl) distributed by (a);
NOTICE: merging column "a" with inherited definition
NOTICE: merging column "b" with inherited definition
create table child_b (a int4, b int4) inherits (base_tbl) distributed by (b);
NOTICE: merging column "a" with inherited definition
NOTICE: merging column "b" with inherited definition
insert into base_tbl select g, g from generate_series(1, 5) g;
explain (costs off) update base_tbl set a=a+1;
ERROR: can't split update for inherit table: base_tbl (preptlist.c:138)
--start_ignore
QUERY PLAN
---------------------------------------------------------------
Update on base_tbl
Update on base_tbl
Update on child_a base_tbl_1
Update on child_b base_tbl_2
-> Explicit Redistribute Motion 3:3 (slice1; segments: 3)
-> Split
-> Seq Scan on base_tbl
-> Explicit Redistribute Motion 3:3 (slice2; segments: 3)
-> Split
-> Seq Scan on child_a base_tbl_1
-> Seq Scan on child_b base_tbl_2
Optimizer: Postgres query optimizer
(12 rows)
--end_ignore
update base_tbl set a = 5;
ERROR: can't split update for inherit table: base_tbl (preptlist.c:138)
--
-- Explicit Distribution motion must be added if any of the child nodes
-- contains any motion excluding the motions in initplans.
-- These test cases and expectation are applicable for GPDB planner not for ORCA.
--
SET gp_autostats_mode = NONE;
CREATE TABLE keo1 ( user_vie_project_code_pk character varying(24), user_vie_fiscal_year_period_sk character varying(24), user_vie_act_cntr_marg_cum character varying(24)) DISTRIBUTED RANDOMLY;
INSERT INTO keo1 VALUES ('1', '1', '1');
CREATE TABLE keo2 ( projects_pk character varying(24)) DISTRIBUTED RANDOMLY;
INSERT INTO keo2 VALUES ('1');
CREATE TABLE keo3 ( sky_per character varying(24), bky_per character varying(24)) DISTRIBUTED BY (sky_per);
INSERT INTO keo3 VALUES ('1', '1');
CREATE TABLE keo4 ( keo_para_required_period character varying(6), keo_para_budget_date character varying(24)) DISTRIBUTED RANDOMLY;
INSERT INTO keo4 VALUES ('1', '1');
ANALYZE keo1, keo2, keo3, keo4;
-- Explicit Redistribution motion should be added in case of GPDB Planner (test case not applicable for ORCA)
EXPLAIN (COSTS OFF) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b
ON b.projects_pk=a.user_vie_project_code_pk
WHERE a.user_vie_fiscal_year_period_sk =
(SELECT MAX (sky_per) FROM keo3 WHERE bky_per =
(SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date =
(SELECT min (keo4.keo_para_budget_date) FROM keo4)))
) t1
WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Update on keo1
-> Hash Join
Hash Cond: ((keo1.user_vie_project_code_pk)::text = (a.user_vie_project_code_pk)::text)
-> Seq Scan on keo1
-> Hash
-> Broadcast Motion 3:3 (slice1; segments: 3)
-> Hash Join
Hash Cond: ((a.user_vie_project_code_pk)::text = (b.projects_pk)::text)
-> Hash Join
Hash Cond: ((max((keo3.sky_per)::text)) = (a.user_vie_fiscal_year_period_sk)::text)
-> Redistribute Motion 1:3 (slice2; segments: 1)
-> Aggregate
-> Hash Join
Hash Cond: ((keo3.bky_per)::text = (keo4_1.keo_para_required_period)::text)
-> Gather Motion 3:1 (slice3; segments: 3)
-> Seq Scan on keo3
-> Hash
-> Assert
Assert Cond: ((row_number() OVER (?)) = 1)
-> WindowAgg
-> Gather Motion 3:1 (slice4; segments: 3)
-> Hash Join
Hash Cond: ((min((keo4.keo_para_budget_date)::text)) = (keo4_1.keo_para_budget_date)::text)
-> Redistribute Motion 1:3 (slice5; segments: 1)
-> Aggregate
-> Gather Motion 3:1 (slice6; segments: 3)
-> Seq Scan on keo4
-> Hash
-> Broadcast Motion 3:3 (slice7; segments: 3)
-> Seq Scan on keo4 keo4_1
-> Hash
-> Broadcast Motion 3:3 (slice8; segments: 3)
-> Seq Scan on keo1 a
-> Hash
-> Broadcast Motion 3:3 (slice9; segments: 3)
-> Seq Scan on keo2 b
Optimizer: Pivotal Optimizer (GPORCA)
(37 rows)
UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b
ON b.projects_pk=a.user_vie_project_code_pk
WHERE a.user_vie_fiscal_year_period_sk =
(SELECT MAX (sky_per) FROM keo3 WHERE bky_per =
(SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date =
(SELECT min (keo4.keo_para_budget_date) FROM keo4)))
) t1
WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk;
SELECT user_vie_act_cntr_marg_cum FROM keo1;
user_vie_act_cntr_marg_cum
----------------------------
234.682
(1 row)
-- Explicit Redistribution motion should not be added in case of GPDB Planner (test case not applicable to ORCA)
CREATE TABLE keo5 (x int, y int) DISTRIBUTED BY (x);
INSERT INTO keo5 VALUES (1,1);
EXPLAIN (COSTS OFF) DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2));
QUERY PLAN
------------------------------------------------------------------------------------
Delete on keo5
-> Hash Semi Join
Hash Cond: (keo5.x = keo5_2.x)
-> Seq Scan on keo5
-> Hash
-> Nested Loop Semi Join
Join Filter: true
-> Seq Scan on keo5 keo5_2
-> Materialize
-> Broadcast Motion 1:3 (slice1; segments: 1)
-> Limit
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on keo5 keo5_1
Filter: (x < 2)
Optimizer: Pivotal Optimizer (GPORCA)
(15 rows)
DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2));
SELECT x FROM keo5;
x
---
(0 rows)
RESET gp_autostats_mode;
DROP TABLE keo1;
DROP TABLE keo2;
DROP TABLE keo3;
DROP TABLE keo4;
DROP TABLE keo5;
--
-- text types. We should support the following updates.
--
CREATE TEMP TABLE ttab1 (a varchar(15), b integer) DISTRIBUTED BY (a);
CREATE TEMP TABLE ttab2 (a varchar(15), b integer) DISTRIBUTED BY (a);
UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
DROP TABLE ttab1;
DROP TABLE ttab2;
CREATE TEMP TABLE ttab1 (a text, b integer) DISTRIBUTED BY (a);
CREATE TEMP TABLE ttab2 (a text, b integer) DISTRIBUTED BY (a);
UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
DROP TABLE ttab1;
DROP TABLE ttab2;
CREATE TEMP TABLE ttab1 (a varchar, b integer) DISTRIBUTED BY (a);
CREATE TEMP TABLE ttab2 (a varchar, b integer) DISTRIBUTED BY (a);
UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
DROP TABLE ttab1;
DROP TABLE ttab2;
CREATE TEMP TABLE ttab1 (a char(15), b integer) DISTRIBUTED BY (a);
CREATE TEMP TABLE ttab2 (a char(15), b integer) DISTRIBUTED BY (a);
UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
DROP TABLE IF EXISTS update_distr_key;
NOTICE: table "update_distr_key" does not exist, skipping
CREATE TEMP TABLE update_distr_key (a int, b int) DISTRIBUTED BY (a);
INSERT INTO update_distr_key select i, i* 10 from generate_series(0, 9) i;
UPDATE update_distr_key SET a = 5 WHERE b = 10;
SELECT * from update_distr_key;
a | b
---+----
0 | 0
2 | 20
4 | 40
6 | 60
8 | 80
5 | 10
3 | 30
5 | 50
7 | 70
9 | 90
(10 rows)
DROP TABLE update_distr_key;
-- below cases is to test multi-hash-cols
CREATE TABLE tab3(c1 int, c2 int, c3 int, c4 int, c5 int) DISTRIBUTED BY (c1, c2, c3);
CREATE TABLE tab5(c1 int, c2 int, c3 int, c4 int, c5 int) DISTRIBUTED BY (c1, c2, c3, c4, c5);
INSERT INTO tab3 SELECT i, i, i, i, i FROM generate_series(1, 10)i;
INSERT INTO tab5 SELECT i, i, i, i, i FROM generate_series(1, 10)i;
-- test tab3
SELECT gp_segment_id, * FROM tab3;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
0 | 5 | 5 | 5 | 5 | 5
0 | 8 | 8 | 8 | 8 | 8
1 | 3 | 3 | 3 | 3 | 3
1 | 4 | 4 | 4 | 4 | 4
1 | 9 | 9 | 9 | 9 | 9
1 | 10 | 10 | 10 | 10 | 10
2 | 1 | 1 | 1 | 1 | 1
2 | 2 | 2 | 2 | 2 | 2
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
(10 rows)
UPDATE tab3 set c1 = 9 where c4 = 1;
SELECT gp_segment_id, * FROM tab3;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
1 | 3 | 3 | 3 | 3 | 3
1 | 4 | 4 | 4 | 4 | 4
1 | 9 | 9 | 9 | 9 | 9
1 | 10 | 10 | 10 | 10 | 10
2 | 2 | 2 | 2 | 2 | 2
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
0 | 5 | 5 | 5 | 5 | 5
0 | 8 | 8 | 8 | 8 | 8
0 | 9 | 1 | 1 | 1 | 1
(10 rows)
UPDATE tab3 set (c1,c2) = (5,6) where c4 = 1;
SELECT gp_segment_id, * FROM tab3;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
1 | 3 | 3 | 3 | 3 | 3
1 | 4 | 4 | 4 | 4 | 4
1 | 9 | 9 | 9 | 9 | 9
1 | 10 | 10 | 10 | 10 | 10
0 | 5 | 5 | 5 | 5 | 5
0 | 8 | 8 | 8 | 8 | 8
2 | 2 | 2 | 2 | 2 | 2
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 5 | 6 | 1 | 1 | 1
(10 rows)
UPDATE tab3 set (c1,c2,c3) = (3,2,1) where c4 = 1;
SELECT gp_segment_id, * FROM tab3;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
2 | 2 | 2 | 2 | 2 | 2
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 3 | 2 | 1 | 1 | 1
1 | 3 | 3 | 3 | 3 | 3
1 | 4 | 4 | 4 | 4 | 4
1 | 9 | 9 | 9 | 9 | 9
1 | 10 | 10 | 10 | 10 | 10
0 | 5 | 5 | 5 | 5 | 5
0 | 8 | 8 | 8 | 8 | 8
(10 rows)
UPDATE tab3 set c1 = 11 where c2 = 10 and c2 < 1;
SELECT gp_segment_id, * FROM tab3;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
0 | 5 | 5 | 5 | 5 | 5
0 | 8 | 8 | 8 | 8 | 8
2 | 2 | 2 | 2 | 2 | 2
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 3 | 2 | 1 | 1 | 1
1 | 3 | 3 | 3 | 3 | 3
1 | 4 | 4 | 4 | 4 | 4
1 | 9 | 9 | 9 | 9 | 9
1 | 10 | 10 | 10 | 10 | 10
(10 rows)
-- test tab5
SELECT gp_segment_id, * FROM tab5;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
0 | 4 | 4 | 4 | 4 | 4
0 | 9 | 9 | 9 | 9 | 9
0 | 10 | 10 | 10 | 10 | 10
1 | 1 | 1 | 1 | 1 | 1
1 | 2 | 2 | 2 | 2 | 2
1 | 3 | 3 | 3 | 3 | 3
1 | 5 | 5 | 5 | 5 | 5
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 8 | 8 | 8 | 8 | 8
(10 rows)
UPDATE tab5 set c1 = 1000 where c4 = 1;
SELECT gp_segment_id, * FROM tab5;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+------+----+----+----+----
0 | 4 | 4 | 4 | 4 | 4
0 | 9 | 9 | 9 | 9 | 9
0 | 10 | 10 | 10 | 10 | 10
1 | 2 | 2 | 2 | 2 | 2
1 | 3 | 3 | 3 | 3 | 3
1 | 5 | 5 | 5 | 5 | 5
1 | 1000 | 1 | 1 | 1 | 1
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 8 | 8 | 8 | 8 | 8
(10 rows)
UPDATE tab5 set (c1,c2) = (9,10) where c4 = 1;
SELECT gp_segment_id, * FROM tab5;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
0 | 4 | 4 | 4 | 4 | 4
0 | 9 | 9 | 9 | 9 | 9
0 | 10 | 10 | 10 | 10 | 10
0 | 9 | 10 | 1 | 1 | 1
1 | 2 | 2 | 2 | 2 | 2
1 | 3 | 3 | 3 | 3 | 3
1 | 5 | 5 | 5 | 5 | 5
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 8 | 8 | 8 | 8 | 8
(10 rows)
UPDATE tab5 set (c1,c2,c4) = (5,8,6) where c4 = 1;
SELECT gp_segment_id, * FROM tab5;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
0 | 4 | 4 | 4 | 4 | 4
0 | 9 | 9 | 9 | 9 | 9
0 | 10 | 10 | 10 | 10 | 10
0 | 5 | 8 | 1 | 6 | 1
1 | 2 | 2 | 2 | 2 | 2
1 | 3 | 3 | 3 | 3 | 3
1 | 5 | 5 | 5 | 5 | 5
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 8 | 8 | 8 | 8 | 8
(10 rows)
UPDATE tab5 set (c1,c2,c3,c4,c5) = (1,2,3,0,6) where c5 = 1;
SELECT gp_segment_id, * FROM tab5;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
0 | 4 | 4 | 4 | 4 | 4
0 | 9 | 9 | 9 | 9 | 9
0 | 10 | 10 | 10 | 10 | 10
1 | 2 | 2 | 2 | 2 | 2
1 | 3 | 3 | 3 | 3 | 3
1 | 5 | 5 | 5 | 5 | 5
1 | 1 | 2 | 3 | 0 | 6
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 8 | 8 | 8 | 8 | 8
(10 rows)
UPDATE tab5 set c1 = 11 where c3 = 10 and c3 < 1;
SELECT gp_segment_id, * FROM tab5;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
0 | 4 | 4 | 4 | 4 | 4
0 | 9 | 9 | 9 | 9 | 9
0 | 10 | 10 | 10 | 10 | 10
1 | 2 | 2 | 2 | 2 | 2
1 | 3 | 3 | 3 | 3 | 3
1 | 5 | 5 | 5 | 5 | 5
1 | 1 | 2 | 3 | 0 | 6
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 8 | 8 | 8 | 8 | 8
(10 rows)
EXPLAIN (COSTS OFF ) UPDATE tab3 SET C1 = C1 + 1, C5 = C5+1;
QUERY PLAN
------------------------------------------------------------------
Update on tab3
-> Result
One-Time Filter: true
-> Result
-> Redistribute Motion 3:3 (slice1; segments: 3)
Hash Key: c1, c2, c3
-> Split
-> Seq Scan on tab3
Optimizer: Pivotal Optimizer (GPORCA)
(9 rows)
-- clean up
drop table tab3;
drop table tab5;
-- Update distribution key
-- start_ignore
drop table if exists r;
NOTICE: table "r" does not exist, skipping
drop table if exists s;
NOTICE: table "s" does not exist, skipping
drop table if exists update_dist;
NOTICE: table "update_dist" does not exist, skipping
drop table if exists update_ao_table;
NOTICE: table "update_ao_table" does not exist, skipping
drop table if exists update_aoco_table;
NOTICE: table "update_aoco_table" does not exist, skipping
-- end_ignore
-- Update normal table distribution key
create table update_dist(a int) distributed by (a);
insert into update_dist values(1);
update update_dist set a=0 where a=1;
select * from update_dist;
a
---
0
(1 row)
-- Update distribution key with join
create table r (a int, b int) distributed by (a);
create table s (a int, b int) distributed by (a);
insert into r select generate_series(1, 5), generate_series(1, 5) * 2;
insert into s select generate_series(1, 5), generate_series(1, 5) * 2;
select * from r;
a | b
---+----
1 | 2
2 | 4
3 | 6
4 | 8
5 | 10
(5 rows)
select * from s;
a | b
---+----
1 | 2
2 | 4
3 | 6
4 | 8
5 | 10
(5 rows)
update r set a = r.a + 1 from s where r.a = s.a;
select * from r;
a | b
---+----
4 | 6
5 | 8
6 | 10
3 | 4
2 | 2
(5 rows)
update r set a = r.a + 1 where a in (select a from s);
select * from r;
a | b
---+----
6 | 10
3 | 2
5 | 6
6 | 8
4 | 4
(5 rows)
-- Update redistribution
delete from r;
delete from s;
insert into r select generate_series(1, 5), generate_series(1, 5);
insert into s select generate_series(1, 5), generate_series(1, 5) * 2;
select * from r;
a | b
---+---
3 | 3
4 | 4
5 | 5
1 | 1
2 | 2
(5 rows)
select * from s;
a | b
---+----
3 | 6
4 | 8
5 | 10
1 | 2
2 | 4
(5 rows)
update r set a = r.a + 1 from s where r.b = s.b;
select * from r;
a | b
---+---
3 | 3
5 | 5
3 | 2
5 | 4
1 | 1
(5 rows)
update r set a = r.a + 1 where b in (select b from s);
select * from r;
a | b
---+---
3 | 3
5 | 5
6 | 4
4 | 2
1 | 1
(5 rows)
-- Update hash aggreate group by
delete from r;
delete from s;
insert into r select generate_series(1, 5), generate_series(1, 5) * 2;
insert into s select generate_series(1, 5), generate_series(1, 5);
select * from r;
a | b
---+----
1 | 2
2 | 4
3 | 6
4 | 8
5 | 10
(5 rows)
select * from s;
a | b
---+---
3 | 3
4 | 4
5 | 5
1 | 1
2 | 2
(5 rows)
update s set a = s.a + 1 where exists (select 1 from r where s.a = r.b);
select * from s;
a | b
---+---
1 | 1
3 | 3
5 | 5
5 | 4
3 | 2
(5 rows)
-- Update ao table distribution key
create table update_ao_table (a int, b int) WITH (appendonly=true) distributed by (a);
insert into update_ao_table select g, g from generate_series(1, 5) g;
select * from update_ao_table;
a | b
---+---
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
update update_ao_table set a = a + 1 where b = 3;
select * from update_ao_table;
a | b
---+---
1 | 1
2 | 2
4 | 4
5 | 5
4 | 3
(5 rows)
-- Update aoco table distribution key
create table update_aoco_table (a int, b int) WITH (appendonly=true, orientation=column) distributed by (a);
insert into update_aoco_table select g,g from generate_series(1, 5) g;
select * from update_aoco_table;
a | b
---+---
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
update update_aoco_table set a = a + 1 where b = 3;
select * from update_aoco_table;
a | b
---+---
1 | 1
2 | 2
4 | 4
5 | 5
4 | 3
(5 rows)
-- Update prepare
delete from s;
insert into s select generate_series(1, 5), generate_series(1, 5);
select * from r;
a | b
---+----
1 | 2
2 | 4
3 | 6
4 | 8
5 | 10
(5 rows)
select * from s;
a | b
---+---
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
prepare update_s(int) as update s set a = s.a + $1 where exists (select 1 from r where s.a = r.b);
execute update_s(10);
select * from s;
a | b
----+---
1 | 1
14 | 4
3 | 3
5 | 5
12 | 2
(5 rows)
-- Confirm that a split update is not created for a table excluded by
-- constraints in the planner.
create table nosplitupdate (a int) distributed by (a);
explain update nosplitupdate set a=0 where a=1 and a<1;
QUERY PLAN
-------------------------------------------------------------------------
Update on nosplitupdate (cost=0.00..0.00 rows=0 width=1)
-> Result (cost=0.00..0.00 rows=0 width=0)
One-Time Filter: true
-> Result (cost=0.00..0.00 rows=0 width=22)
-> Result (cost=0.00..0.00 rows=0 width=18)
-> Split (cost=0.00..0.00 rows=0 width=18)
-> Result (cost=0.00..0.00 rows=0 width=18)
One-Time Filter: false
Optimizer: Pivotal Optimizer (GPORCA)
(9 rows)
-- test split-update when split-node's flow is entry
create table tsplit_entry (c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into tsplit_entry values (1), (2);
analyze tsplit_entry;
-- start_ignore
explain update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------
Update on tsplit_entry (cost=0.00..882689.54 rows=1 width=1)
-> Result (cost=0.00..0.00 rows=0 width=0)
One-Time Filter: true
-> Result (cost=0.00..0.00 rows=0 width=0)
-> Redistribute Motion 1:3 (slice1) (cost=0.00..882689.42 rows=4 width=22)
Hash Key: tsplit_entry_1.c
-> Result (cost=0.00..882689.42 rows=4 width=22)
-> Split (cost=0.00..882689.42 rows=4 width=18)
-> Nested Loop (cost=0.00..882689.42 rows=2 width=22)
Join Filter: true
-> Aggregate (cost=0.00..0.00 rows=1 width=8)
-> Function Scan on gp_get_segment_configuration (cost=0.00..0.00 rows=1000 width=1)
-> Materialize (cost=0.00..431.00 rows=2 width=14)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=2 width=14)
-> Seq Scan on tsplit_entry tsplit_entry_1 (cost=0.00..431.00 rows=1 width=14)
Optimizer: Pivotal Optimizer (GPORCA)
(16 rows)
-- end_ignore
update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s;
CREATE TABLE update_gp_foo (
a_dist int,
b int,
c_part int,
d int
)
WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
(
PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo_1_prt_p20190305', appendonly=false)
);
CREATE TABLE update_gp_foo1 (
a_dist int,
b int,
c_part int,
d int
)
WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
(
PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo1_1_prt_p20190305', appendonly=false)
);
INSERT INTO update_gp_foo VALUES (12, 40, 1, 50);
INSERT INTO update_gp_foo1 VALUES (12, 3, 1, 50);
UPDATE update_gp_foo
SET b = update_gp_foo.c_part,
d = update_gp_foo1.a_dist
FROM update_gp_foo1;
SELECT * from update_gp_foo;
a_dist | b | c_part | d
--------+---+--------+----
12 | 1 | 1 | 12
(1 row)
-- Test insert on conflict do update
-- Insert on conflict do update is an insert statement but might
-- invoke ExecUpdate on segments, but updating distkeys of a table
-- may lead to wrong data distribution. We will check this before
-- planning, if a `insert on conflict do update` statement set the
-- dist keys of the table, it will raise an error.
-- See github issue: https://github.com/greenplum-db/gpdb/issues/9444
create table t_insert_on_conflict_update_distkey(a int, b int) distributed by (a);
create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b);
-- the following statement should error out because the on conflict update want to
-- modify the tuple's distkey which might lead to wrong data distribution
insert into t_insert_on_conflict_update_distkey values (1, 1) on conflict(a, b) do update set a = 1;
ERROR: modification of distribution columns in OnConflictUpdate is not supported
drop index uidx_t_insert_on_conflict_update_distkey;
drop table t_insert_on_conflict_update_distkey;
-- randomly distributed table cannot add unique constrain, so next we test replicated table
create table t_insert_on_conflict_update_distkey(a int, b int) distributed replicated;
create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b);
-- the following statement should succeed because replicated table does not contain distkey
insert into t_insert_on_conflict_update_distkey values (1, 1) on conflict(a, b) do update set a = 1;
-- Some tests on a partitioned table.
CREATE TABLE update_gp_rangep (a int, b int, orig_a int) DISTRIBUTED BY (b) PARTITION BY RANGE (a);
CREATE TABLE update_gp_rangep_1_to_10 PARTITION OF update_gp_rangep FOR VALUES FROM (1) TO (10);
NOTICE: table has parent, setting distribution columns to match parent table
CREATE TABLE update_gp_rangep_10_to_20 PARTITION OF update_gp_rangep FOR VALUES FROM (10) TO (20);
NOTICE: table has parent, setting distribution columns to match parent table
INSERT INTO update_gp_rangep SELECT g, g, g FROM generate_series(1, 4) g;
-- Simple case: Same partition, same node.
UPDATE update_gp_rangep SET a = 9 WHERE a = 1;
-- Distribution key update, same partition.
UPDATE update_gp_rangep SET b = 1 WHERE a = 2;
-- Move row to different partition, but no change in distribution key
UPDATE update_gp_rangep SET a = 10 WHERE a = 3;
-- Move row to different partition and also change distribution key
UPDATE update_gp_rangep SET a = 11, b = 1 WHERE a = 4;
SELECT tableoid::regclass, * FROM update_gp_rangep ORDER BY orig_a;
tableoid | a | b | orig_a
---------------------------+----+---+--------
update_gp_rangep_1_to_10 | 9 | 1 | 1
update_gp_rangep_1_to_10 | 2 | 1 | 2
update_gp_rangep_10_to_20 | 10 | 3 | 3
update_gp_rangep_10_to_20 | 11 | 1 | 4
(4 rows)
-- Also do a lookup with specific distribution key. If the rows were not
-- correctly moved across segments, this would fail to find them, assuming
-- that direct dispatch is effective.
SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b = 1;
tableoid | a | b | orig_a
---------------------------+----+---+--------
update_gp_rangep_1_to_10 | 9 | 1 | 1
update_gp_rangep_1_to_10 | 2 | 1 | 2
update_gp_rangep_10_to_20 | 11 | 1 | 4
(3 rows)
-- Test for update with LASJ_NOTIN
-- See Issue: https://github.com/greenplum-db/gpdb/issues/13265
-- Actually master branch does not have the above issue even master
-- does have the same problematic code (other parts of code are
-- refactored). Also cherry-pick the case to master and keep it
-- since more test cases do no harm.
create table t1_13265(a int, b int, c int, d int) distributed by (a);
create table t2_13265(a int, b int, c int, d int) distributed by (a);
insert into t1_13265 values (1, null, 1, 1);
insert into t2_13265 values (2, null, 2, 2);
explain (verbose, costs off)
update t1_13265 set b = 2 where
(c, d) not in (select c, d from t2_13265 where a = 2);
QUERY PLAN
-------------------------------------------------------------------------------------
Update on public.t1_13265
-> Nested Loop Left Anti Semi (Not-In) Join
Output: 2, t1_13265.ctid, t1_13265.gp_segment_id, t2_13265.ctid
Join Filter: ((t1_13265.c = t2_13265.c) AND (t1_13265.d = t2_13265.d))
-> Seq Scan on public.t1_13265
Output: t1_13265.ctid, t1_13265.gp_segment_id, t1_13265.c, t1_13265.d
-> Materialize
Output: t2_13265.ctid, t2_13265.c, t2_13265.d
-> Broadcast Motion 1:3 (slice1; segments: 1)
Output: t2_13265.ctid, t2_13265.c, t2_13265.d
-> Seq Scan on public.t2_13265
Output: t2_13265.ctid, t2_13265.c, t2_13265.d
Filter: (t2_13265.a = 2)
Settings: optimizer = 'on'
Optimizer: Postgres query optimizer
(15 rows)
update t1_13265 set b = 2 where
(c, d) not in (select c, d from t2_13265 where a = 2);
select * from t1_13265;
a | b | c | d
---+---+---+---
1 | 2 | 1 | 1
(1 row)
-- test for update on partition table
CREATE TABLE into_table (
a numeric(10,0) NOT NULL,
b numeric(10,0) NOT NULL,
c numeric(10,0) NOT NULL,
d character varying(4),
e character varying(10),
f int
) DISTRIBUTED BY (a, b, c) PARTITION BY RANGE(f) (start (1) end(5) every(1));
NOTICE: CREATE TABLE will create partition "into_table_1_prt_1" for table "into_table"
NOTICE: CREATE TABLE will create partition "into_table_1_prt_2" for table "into_table"
NOTICE: CREATE TABLE will create partition "into_table_1_prt_3" for table "into_table"
NOTICE: CREATE TABLE will create partition "into_table_1_prt_4" for table "into_table"
CREATE TABLE from_table (
a numeric(10,0) NOT NULL,
b numeric(10,0) NOT NULL,
c numeric(10,0) NOT NULL,
d character varying(4),
e character varying(10),
f int
) DISTRIBUTED BY (a);
insert into into_table select i*1.5,i*2,i*3,'dd'||i,'ee'||i, i from generate_series(1,4) i;
insert into from_table select i*1.5,i*2,i*3,'xx'||i,'yy'||i, i+1 from generate_series(1,3) i;
explain (costs off)
update into_table set d=from_table.d, e=from_table.e, f=from_table.f from from_table
where into_table.a=from_table.a and into_table.b=from_table.b and into_table.c=from_table.c;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Update on into_table
Update on into_table_1_prt_1 into_table_1
Update on into_table_1_prt_2 into_table_2
Update on into_table_1_prt_3 into_table_3
Update on into_table_1_prt_4 into_table_4
-> Hash Join
Hash Cond: ((into_table.a = from_table.a) AND (into_table.b = from_table.b) AND (into_table.c = from_table.c))
-> Append
-> Seq Scan on into_table_1_prt_1 into_table_1
-> Seq Scan on into_table_1_prt_2 into_table_2
-> Seq Scan on into_table_1_prt_3 into_table_3
-> Seq Scan on into_table_1_prt_4 into_table_4
-> Hash
-> Redistribute Motion 3:3 (slice1; segments: 3)
Hash Key: from_table.a, from_table.b, from_table.c
-> Seq Scan on from_table
Optimizer: Postgres query optimizer
(17 rows)
update into_table set d=from_table.d, e=from_table.e, f=from_table.f from from_table
where into_table.a=from_table.a and into_table.b=from_table.b and into_table.c=from_table.c;
select * from into_table order by a;
a | b | c | d | e | f
---+---+----+-----+-----+---
2 | 2 | 3 | xx1 | yy1 | 2
3 | 4 | 6 | xx2 | yy2 | 3
5 | 6 | 9 | xx3 | yy3 | 4
6 | 8 | 12 | dd4 | ee4 | 4
(4 rows)
-- start_ignore
drop table r;
drop table s;
drop table update_dist;
drop table update_ao_table;
drop table update_aoco_table;
drop table nosplitupdate;
drop table tsplit_entry;
drop table t1_13265;
drop table t2_13265;
drop table into_table;
drop table from_table;
-- end_ignore