blob: 2e019cf7f8c12a6f33e74edbfcd53d96adae1cf9 [file] [log] [blame]
-- ALTER TABLE ... SET DISTRIBUTED BY
-- This is the main interface for system expansion
\set DATA values(1, 2), (2, 3), (3, 4)
-- Basic sanity tests
set optimizer_print_missing_stats = off;
create table atsdb (i int, j text) distributed by (i);
insert into atsdb :DATA;
analyze atsdb;
-- should fail
alter table atsdb set distributed by ();
ERROR: syntax error at or near ")"
LINE 1: alter table atsdb set distributed by ();
^
alter table atsdb set distributed by (m);
ERROR: column "m" does not exist
alter table atsdb set distributed by (i, i);
ERROR: duplicate column in DISTRIBUTED BY clause
LINE 1: alter table atsdb set distributed by (i, i);
^
alter table atsdb set distributed by (i, m);
ERROR: column "m" does not exist
alter table atsdb set distributed by (i);
WARNING: distribution policy of relation "atsdb" already set to (i)
HINT: Use ALTER TABLE "atsdb" SET WITH (REORGANIZE=TRUE) DISTRIBUTED BY (i) to force redistribution
-- should work
alter table atsdb set distributed randomly;
select localoid::regclass, distkey from gp_distribution_policy where localoid = 'atsdb'::regclass;
localoid | distkey
----------+---------
atsdb |
(1 row)
-- not possible to correctly verify random distribution
alter table atsdb set distributed by (j);
select localoid::regclass, distkey from gp_distribution_policy where localoid = 'atsdb'::regclass;
localoid | distkey
----------+---------
atsdb | 2
(1 row)
-- verify that the data is correctly redistributed by building a fresh
-- table with the same policy
create table ats_test (i int, j text) distributed by (j);
insert into ats_test :DATA;
select gp_segment_id, * from ats_test except
select gp_segment_id, * from atsdb;
gp_segment_id | i | j
---------------+---+---
(0 rows)
drop table ats_test;
alter table atsdb set distributed by (i, j);
select localoid::regclass, distkey from gp_distribution_policy where localoid = 'atsdb'::regclass;
localoid | distkey
----------+---------
atsdb | 1 2
(1 row)
-- verify
create table ats_test (i int, j text) distributed by (i, j);
insert into ats_test :DATA;
select gp_segment_id, * from ats_test except
select gp_segment_id, * from atsdb;
gp_segment_id | i | j
---------------+---+---
(0 rows)
drop table ats_test;
alter table atsdb set distributed by (j, i);
select localoid::regclass, distkey from gp_distribution_policy where localoid = 'atsdb'::regclass;
localoid | distkey
----------+---------
atsdb | 2 1
(1 row)
-- verify
create table ats_test (i int, j text) distributed by (j, i);
insert into ats_test :DATA;
select gp_segment_id, * from ats_test except
select gp_segment_id, * from atsdb;
gp_segment_id | i | j
---------------+---+---
(0 rows)
drop table ats_test;
-- Now make sure indexes work.
create index atsdb_i_idx on atsdb(i);
set enable_seqscan to off;
explain select * from atsdb where i = 1;
QUERY PLAN
---------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..200.27 rows=1 width=6)
-> Index Scan using atsdb_i_idx on atsdb (cost=0.00..200.27 rows=1 width=6)
Index Cond: i = 1
Settings: enable_seqscan=off
(4 rows)
select * from atsdb where i = 1;
i | j
---+---
1 | 2
(1 row)
alter table atsdb set distributed by (i);
explain select * from atsdb where i = 1;
QUERY PLAN
---------------------------------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..200.27 rows=1 width=6)
-> Index Scan using atsdb_i_idx on atsdb (cost=0.00..200.27 rows=1 width=6)
Index Cond: i = 1
Settings: enable_seqscan=off
(4 rows)
select * from atsdb where i = 1;
i | j
---+---
1 | 2
(1 row)
drop table atsdb;
-- Now try AO
create table atsdb_ao (i int, j text) with (appendonly=true) distributed by (i);
insert into atsdb_ao select i, (i+1)::text from generate_series(1, 100) i;
insert into atsdb_ao select i, (i+1)::text from generate_series(1, 100) i;
-- check that we're an AO table
select count(*) from pg_appendonly where relid='atsdb_ao'::regclass;
count
-------
1
(1 row)
select count(*) from atsdb_ao;
count
-------
200
(1 row)
alter table atsdb_ao set distributed by (j);
-- Still AO?
select count(*) from pg_appendonly where relid='atsdb_ao'::regclass;
count
-------
1
(1 row)
select count(*) from atsdb_ao;
count
-------
200
(1 row)
-- check alter, vacuum analyze, and then alter
delete from atsdb_ao where i = any(array(select generate_series(1,90)));
vacuum analyze atsdb_ao;
alter table atsdb_ao set distributed randomly;
select count(*) from atsdb_ao;
count
-------
20
(1 row)
drop table atsdb_ao;
-- Can't redistribute system catalogs
alter table pg_class set distributed by (relname);
ERROR: permission denied: "pg_class" is a system catalog
alter table pg_class set with(appendonly = true);
ERROR: permission denied: "pg_class" is a system catalog
alter table pg_class set with(appendonly = true);
ERROR: permission denied: "pg_class" is a system catalog
-- WITH clause
create table atsdb (i int, j text) distributed by (j);
insert into atsdb select i, i::text from generate_series(1, 10) i;
alter table atsdb set with(appendonly = true);
select relname, segrelid != 0, reloptions from pg_class, pg_appendonly where pg_class.oid =
'atsdb'::regclass and relid = pg_class.oid;
relname | ?column? | reloptions
---------+----------+------------
atsdb | t |
(1 row)
select * from atsdb;
i | j
----+----
1 | 1
4 | 4
7 | 7
2 | 2
5 | 5
8 | 8
3 | 3
6 | 6
9 | 9
10 | 10
(10 rows)
drop table atsdb;
create view distcheck as select relname as rel, attname from
gp_distribution_policy g, pg_attribute p, pg_class c
where g.localoid = p.attrelid and attnum = any(g.distkey) and
c.oid = p.attrelid;
-- dropped columns
create table atsdb (i int, j int, t text, n numeric) distributed by (j);
insert into atsdb select i, i+1, i+2, i+3 from generate_series(1, 100) i;
alter table atsdb drop column i;
select * from atsdb;
j | t | n
-----+-----+-----
2 | 3 | 4
4 | 5 | 6
6 | 7 | 8
8 | 9 | 10
10 | 11 | 12
12 | 13 | 14
14 | 15 | 16
16 | 17 | 18
18 | 19 | 20
20 | 21 | 22
22 | 23 | 24
24 | 25 | 26
26 | 27 | 28
28 | 29 | 30
30 | 31 | 32
32 | 33 | 34
34 | 35 | 36
36 | 37 | 38
38 | 39 | 40
40 | 41 | 42
42 | 43 | 44
44 | 45 | 46
46 | 47 | 48
48 | 49 | 50
50 | 51 | 52
52 | 53 | 54
54 | 55 | 56
56 | 57 | 58
58 | 59 | 60
60 | 61 | 62
62 | 63 | 64
64 | 65 | 66
66 | 67 | 68
68 | 69 | 70
70 | 71 | 72
72 | 73 | 74
74 | 75 | 76
76 | 77 | 78
78 | 79 | 80
80 | 81 | 82
82 | 83 | 84
84 | 85 | 86
86 | 87 | 88
88 | 89 | 90
90 | 91 | 92
92 | 93 | 94
94 | 95 | 96
96 | 97 | 98
98 | 99 | 100
100 | 101 | 102
3 | 4 | 5
5 | 6 | 7
7 | 8 | 9
9 | 10 | 11
11 | 12 | 13
13 | 14 | 15
15 | 16 | 17
17 | 18 | 19
19 | 20 | 21
21 | 22 | 23
23 | 24 | 25
25 | 26 | 27
27 | 28 | 29
29 | 30 | 31
31 | 32 | 33
33 | 34 | 35
35 | 36 | 37
37 | 38 | 39
39 | 40 | 41
41 | 42 | 43
43 | 44 | 45
45 | 46 | 47
47 | 48 | 49
49 | 50 | 51
51 | 52 | 53
53 | 54 | 55
55 | 56 | 57
57 | 58 | 59
59 | 60 | 61
61 | 62 | 63
63 | 64 | 65
65 | 66 | 67
67 | 68 | 69
69 | 70 | 71
71 | 72 | 73
73 | 74 | 75
75 | 76 | 77
77 | 78 | 79
79 | 80 | 81
81 | 82 | 83
83 | 84 | 85
85 | 86 | 87
87 | 88 | 89
89 | 90 | 91
91 | 92 | 93
93 | 94 | 95
95 | 96 | 97
97 | 98 | 99
99 | 100 | 101
101 | 102 | 103
(100 rows)
alter table atsdb set distributed by (t);
select * from distcheck where rel = 'atsdb';
rel | attname
-------+---------
atsdb | t
(1 row)
alter table atsdb drop column n;
alter table atsdb set with(appendonly = true, compresslevel = 3);
select relname, segrelid != 0, reloptions from pg_class, pg_appendonly where pg_class.oid =
'atsdb'::regclass and relid = pg_class.oid;
relname | ?column? | reloptions
---------+----------+-------------------
atsdb | t | {compresslevel=3}
(1 row)
select * from distcheck where rel = 'atsdb';
rel | attname
-------+---------
atsdb | t
(1 row)
select * from atsdb;
j | t
-----+-----
9 | 10
11 | 12
13 | 14
15 | 16
17 | 18
29 | 30
31 | 32
33 | 34
35 | 36
37 | 38
49 | 50
51 | 52
53 | 54
55 | 56
57 | 58
69 | 70
71 | 72
73 | 74
75 | 76
77 | 78
89 | 90
91 | 92
93 | 94
95 | 96
97 | 98
99 | 100
101 | 102
2 | 3
4 | 5
6 | 7
8 | 9
20 | 21
22 | 23
24 | 25
26 | 27
28 | 29
40 | 41
42 | 43
44 | 45
46 | 47
48 | 49
60 | 61
62 | 63
64 | 65
66 | 67
68 | 69
80 | 81
82 | 83
84 | 85
86 | 87
88 | 89
3 | 4
5 | 6
7 | 8
19 | 20
21 | 22
23 | 24
25 | 26
27 | 28
39 | 40
41 | 42
43 | 44
45 | 46
47 | 48
59 | 60
61 | 62
63 | 64
65 | 66
67 | 68
79 | 80
81 | 82
83 | 84
85 | 86
87 | 88
10 | 11
12 | 13
14 | 15
16 | 17
18 | 19
30 | 31
32 | 33
34 | 35
36 | 37
38 | 39
50 | 51
52 | 53
54 | 55
56 | 57
58 | 59
70 | 71
72 | 73
74 | 75
76 | 77
78 | 79
90 | 91
92 | 93
94 | 95
96 | 97
98 | 99
100 | 101
(100 rows)
alter table atsdb set distributed by (j);
select * from distcheck where rel = 'atsdb';
rel | attname
-------+---------
atsdb | j
(1 row)
select relname, segrelid != 0, reloptions from pg_class, pg_appendonly where pg_class.oid =
'atsdb'::regclass and relid = pg_class.oid;
relname | ?column? | reloptions
---------+----------+-------------------
atsdb | t | {compresslevel=3}
(1 row)
select * from atsdb;
j | t
-----+-----
14 | 15
15 | 16
31 | 32
33 | 34
36 | 37
37 | 38
48 | 49
52 | 53
70 | 71
84 | 85
85 | 86
2 | 3
16 | 17
29 | 30
34 | 35
49 | 50
68 | 69
71 | 72
83 | 84
13 | 14
17 | 18
28 | 29
30 | 31
35 | 36
50 | 51
51 | 52
63 | 64
69 | 70
72 | 73
86 | 87
3 | 4
6 | 7
18 | 19
20 | 21
42 | 43
53 | 54
73 | 74
76 | 77
77 | 78
88 | 89
90 | 91
91 | 92
5 | 6
19 | 20
22 | 23
38 | 39
40 | 41
41 | 42
54 | 55
55 | 56
74 | 75
87 | 88
97 | 98
4 | 5
7 | 8
21 | 22
39 | 40
56 | 57
57 | 58
75 | 76
89 | 90
96 | 97
12 | 13
23 | 24
26 | 27
27 | 28
43 | 44
58 | 59
60 | 61
65 | 66
82 | 83
93 | 94
99 | 100
8 | 9
9 | 10
25 | 26
32 | 33
46 | 47
47 | 48
59 | 60
62 | 63
64 | 65
78 | 79
80 | 81
81 | 82
92 | 93
94 | 95
95 | 96
100 | 101
101 | 102
10 | 11
11 | 12
24 | 25
44 | 45
45 | 46
61 | 62
66 | 67
67 | 68
79 | 80
98 | 99
(100 rows)
-- validate parameters
alter table atsdb set with (appendonly = ff);
ERROR: appendonly requires a Boolean value
alter table atsdb set with (reorganize = true);
alter table atsdb set with (fgdfgef = asds);
ERROR: invalid storage type
LINE 1: alter table atsdb set with (fgdfgef = asds);
^
alter table atsdb set with(reorganize = true, reorganize = false) distributed
randomly;
ERROR: cannot specify more than one option in WITH clause
drop table atsdb;
-- check distribution after dropping distribution key column.
create table atsdb (i int, j int, t text, n numeric) distributed by (i, j);
insert into atsdb select i, i+1, i+2, i+3 from generate_series(1, 20) i;
alter table atsdb drop column i;
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
select * from atsdb;
j | t | n
----+----+----
2 | 3 | 4
9 | 10 | 11
11 | 12 | 13
12 | 13 | 14
21 | 22 | 23
4 | 5 | 6
7 | 8 | 9
8 | 9 | 10
10 | 11 | 12
14 | 15 | 16
15 | 16 | 17
16 | 17 | 18
18 | 19 | 20
19 | 20 | 21
20 | 21 | 22
3 | 4 | 5
5 | 6 | 7
6 | 7 | 8
13 | 14 | 15
17 | 18 | 19
(20 rows)
select * from distcheck where rel = 'atsdb';
rel | attname
-----+---------
(0 rows)
drop table atsdb;
-- check DROP TYPE..CASCADE updates distribution policy to random if
-- any a dist key column is dropped for multi key distribution key columns
create domain int_new as int;
create table atsdb (i int_new, j int, t text, n numeric) distributed by (i, j);
insert into atsdb select i, i+1, i+2, i+3 from generate_series(1, 20) i;
drop type int_new cascade;
NOTICE: drop cascades to column i of table atsdb
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
select * from atsdb;
j | t | n
----+----+----
4 | 5 | 6
7 | 8 | 9
8 | 9 | 10
10 | 11 | 12
14 | 15 | 16
15 | 16 | 17
16 | 17 | 18
18 | 19 | 20
19 | 20 | 21
20 | 21 | 22
3 | 4 | 5
5 | 6 | 7
6 | 7 | 8
13 | 14 | 15
17 | 18 | 19
2 | 3 | 4
9 | 10 | 11
11 | 12 | 13
12 | 13 | 14
21 | 22 | 23
(20 rows)
select * from distcheck where rel = 'atsdb';
rel | attname
-----+---------
(0 rows)
drop table atsdb;
-- Check that we correctly cascade for partitioned tables
create table atsdb (i int, j int, k int) distributed by (i) partition by range(k)
(start(1) end(10) every(1));
insert into atsdb select i+2, i+1, i from generate_series(1, 9) i;
select * from distcheck where rel like 'atsdb%';
rel | attname
---------------+---------
atsdb | i
atsdb_1_prt_1 | i
atsdb_1_prt_2 | i
atsdb_1_prt_3 | i
atsdb_1_prt_4 | i
atsdb_1_prt_5 | i
atsdb_1_prt_6 | i
atsdb_1_prt_7 | i
atsdb_1_prt_8 | i
atsdb_1_prt_9 | i
(10 rows)
alter table atsdb set distributed by (j);
select * from distcheck where rel like 'atsdb%';
rel | attname
---------------+---------
atsdb | j
atsdb_1_prt_1 | j
atsdb_1_prt_2 | j
atsdb_1_prt_3 | j
atsdb_1_prt_4 | j
atsdb_1_prt_5 | j
atsdb_1_prt_6 | j
atsdb_1_prt_7 | j
atsdb_1_prt_8 | j
atsdb_1_prt_9 | j
(10 rows)
select * from atsdb order by 1, 2, 3;
i | j | k
----+----+---
3 | 2 | 1
4 | 3 | 2
5 | 4 | 3
6 | 5 | 4
7 | 6 | 5
8 | 7 | 6
9 | 8 | 7
10 | 9 | 8
11 | 10 | 9
(9 rows)
alter table atsdb set with(appendonly = true);
ERROR: cannot change access method of a partitioned table
select relname, a.blocksize, compresslevel, compresstype, checksum from pg_class c, pg_appendonly a where
relname like 'atsdb%' and c.oid = a.relid order by 1;
relname | blocksize | compresslevel | compresstype | checksum
---------+-----------+---------------+--------------+----------
(0 rows)
select * from atsdb order by 1, 2, 3;
i | j | k
----+----+---
3 | 2 | 1
4 | 3 | 2
5 | 4 | 3
6 | 5 | 4
7 | 6 | 5
8 | 7 | 6
9 | 8 | 7
10 | 9 | 8
11 | 10 | 9
(9 rows)
insert into atsdb select i+2, i+1, i from generate_series(1, 9) i;
select * from atsdb order by 1, 2, 3;
i | j | k
----+----+---
3 | 2 | 1
3 | 2 | 1
4 | 3 | 2
4 | 3 | 2
5 | 4 | 3
5 | 4 | 3
6 | 5 | 4
6 | 5 | 4
7 | 6 | 5
7 | 6 | 5
8 | 7 | 6
8 | 7 | 6
9 | 8 | 7
9 | 8 | 7
10 | 9 | 8
10 | 9 | 8
11 | 10 | 9
11 | 10 | 9
(18 rows)
drop table atsdb;
-- check distribution correctly cascaded for inherited tables
create table dropColumnCascade (a int, b int, e int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table dropColumnCascadeChild (c int) inherits (dropColumnCascade);
NOTICE: table has parent, setting distribution columns to match parent table
create table dropColumnCascadeAnother (d int) inherits (dropColumnCascadeChild);
NOTICE: table has parent, setting distribution columns to match parent table
insert into dropColumnCascadeAnother select i,i,i from generate_series(1,10)i;
alter table dropColumnCascade drop column a;
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
select * from distcheck where rel like 'dropcolumnicascade%';
rel | attname
-----+---------
(0 rows)
drop table dropColumnCascade cascade;
NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to table dropcolumncascadechild
drop cascades to table dropcolumncascadeanother
-- check DROP TYPE..CASCADE for dist key type for inherited tables
-- distribution should be set to randomly for base and inherited tables
create domain int_new as int;
create table dropColumnCascade (a int_new, b int, e int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table dropColumnCascadeChild (c int) inherits (dropColumnCascade);
NOTICE: table has parent, setting distribution columns to match parent table
create table dropColumnCascadeAnother (d int) inherits (dropColumnCascadeChild);
NOTICE: table has parent, setting distribution columns to match parent table
insert into dropColumnCascadeAnother select i,i,i from generate_series(1,10)i;
drop type int_new cascade;
NOTICE: drop cascades to 3 other objects
DETAIL: drop cascades to column a of table dropcolumncascade
drop cascades to column a of table dropcolumncascadechild
drop cascades to column a of table dropcolumncascadeanother
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
select * from distcheck where rel like 'dropcolumncascade%';
rel | attname
-----+---------
(0 rows)
drop table dropColumnCascade cascade;
NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to table dropcolumncascadechild
drop cascades to table dropcolumncascadeanother
-- Test corner cases in dropping distkey as inherited columns
create table p1 (f1 int, f2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table c1 (f1 int not null) inherits(p1);
NOTICE: table has parent, setting distribution columns to match parent table
NOTICE: merging column "f1" with inherited definition
alter table p1 drop column f1;
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
-- only p1 is randomly distributed, c1 is still distributed by c1.f1
select * from distcheck where rel in ('p1', 'c1');
rel | attname
-----+---------
c1 | f1
(1 row)
alter table c1 drop column f1;
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
-- both c1 and p1 randomly distributed
select * from distcheck where rel in ('p1', 'c1');
rel | attname
-----+---------
(0 rows)
drop table p1 cascade;
NOTICE: drop cascades to table c1
create table p1 (f1 int, f2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table c1 () inherits(p1);
NOTICE: table has parent, setting distribution columns to match parent table
alter table only p1 drop column f1;
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
-- only p1 is randomly distributed, c1 is still distributed by c1.f1
select * from distcheck where rel in ('p1', 'c1');
rel | attname
-----+---------
c1 | f1
(1 row)
drop table p1 cascade;
NOTICE: drop cascades to table c1
-- check DROP TYPE..CASCADE for dist key type for inherited tables
-- distribution should be set to randomly for base and inherited tables
create domain int_new as int;
create table p1 (f1 int_new, f2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table c1 (f1 int_new not null) inherits(p1);
NOTICE: table has parent, setting distribution columns to match parent table
NOTICE: merging column "f1" with inherited definition
create table p1_inh (f1 int_new, f2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table c1_inh () inherits(p1_inh);
NOTICE: table has parent, setting distribution columns to match parent table
drop type int_new cascade;
NOTICE: drop cascades to 4 other objects
DETAIL: drop cascades to column f1 of table p1
drop cascades to column f1 of table c1
drop cascades to column f1 of table p1_inh
drop cascades to column f1 of table c1_inh
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy
-- all above tables set to randomly distributed
select * from distcheck where rel in ('p1', 'c1');
rel | attname
-----+---------
(0 rows)
drop table p1 cascade;
NOTICE: drop cascades to table c1
drop table p1_inh cascade;
NOTICE: drop cascades to table c1_inh
drop view distcheck;
-- MPP-5452
-- Should succeed
create table atsdb (i int, k int) distributed by (i) partition by range(i) (start (1) end(10)
every(1));
alter table atsdb alter partition for (5) set distributed by (i);
WARNING: distribution policy of relation "atsdb_1_prt_5" already set to (i)
HINT: Use ALTER TABLE "atsdb_1_prt_5" SET WITH (REORGANIZE=TRUE) DISTRIBUTED BY (i) to force redistribution
alter table atsdb alter partition for (5) set distributed by (i);
WARNING: distribution policy of relation "atsdb_1_prt_5" already set to (i)
HINT: Use ALTER TABLE "atsdb_1_prt_5" SET WITH (REORGANIZE=TRUE) DISTRIBUTED BY (i) to force redistribution
alter table atsdb alter partition for (5) set distributed by (i);
WARNING: distribution policy of relation "atsdb_1_prt_5" already set to (i)
HINT: Use ALTER TABLE "atsdb_1_prt_5" SET WITH (REORGANIZE=TRUE) DISTRIBUTED BY (i) to force redistribution
drop table atsdb;
--MPP-5500
CREATE TABLE test_add_drop_rename_column_change_datatype(
text_col text,
bigint_col bigint,
char_vary_col character varying(30),
numeric_col numeric,
int_col int4,
float_col float4,
int_array_col int[],
drop_col numeric,
before_rename_col int4,
change_datatype_col numeric,
a_ts_without timestamp without time zone,
b_ts_with timestamp with time zone,
date_column date) distributed randomly;
insert into test_add_drop_rename_column_change_datatype values ('0_zero', 0, '0_zero', 0, 0, 0, '{0}', 0, 0, 0, '2004-10-19 10:23:54', '2004-10-19 10:23:54+02', '1-1-2000');
insert into test_add_drop_rename_column_change_datatype values ('1_zero', 1, '1_zero', 1, 1, 1, '{1}', 1, 1, 1, '2005-10-19 10:23:54', '2005-10-19 10:23:54+02', '1-1-2001');
insert into test_add_drop_rename_column_change_datatype values ('2_zero', 2, '2_zero', 2, 2, 2, '{2}', 2, 2, 2, '2006-10-19 10:23:54', '2006-10-19 10:23:54+02', '1-1-2002');
ALTER TABLE test_add_drop_rename_column_change_datatype ADD COLUMN added_col character varying(30);
ALTER TABLE test_add_drop_rename_column_change_datatype DROP COLUMN drop_col ;
ALTER TABLE test_add_drop_rename_column_change_datatype RENAME COLUMN before_rename_col TO after_rename_col;
ALTER TABLE test_add_drop_rename_column_change_datatype ALTER COLUMN change_datatype_col TYPE int4;
alter table test_add_drop_rename_column_change_datatype set with(reorganize =
true) distributed randomly;
select * from test_add_drop_rename_column_change_datatype ;
text_col | bigint_col | char_vary_col | numeric_col | int_col | float_col | int_array_col | after_rename_col | change_datatype_col | a_ts_without | b_ts_with | date_column | added_col
----------+------------+---------------+-------------+---------+-----------+---------------+------------------+---------------------+--------------------------+------------------------------+-------------+-----------
1_zero | 1 | 1_zero | 1 | 1 | 1 | {1} | 1 | 1 | Wed Oct 19 10:23:54 2005 | Wed Oct 19 01:23:54 2005 PDT | 01-01-2001 |
2_zero | 2 | 2_zero | 2 | 2 | 2 | {2} | 2 | 2 | Thu Oct 19 10:23:54 2006 | Thu Oct 19 01:23:54 2006 PDT | 01-01-2002 |
0_zero | 0 | 0_zero | 0 | 0 | 0 | {0} | 0 | 0 | Tue Oct 19 10:23:54 2004 | Tue Oct 19 01:23:54 2004 PDT | 01-01-2000 |
(3 rows)
drop table test_add_drop_rename_column_change_datatype ;
-- MPP-5501
-- should run without error
create table atsdb with (appendonly=true) as select * from
generate_series(1,1000);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'generate_series' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
alter table only atsdb set with(reorganize=true) distributed by (generate_series);
select count(*) from atsdb;
count
-------
1000
(1 row)
drop table atsdb;
-- MPP-5746
create table mpp5746 (c int[], t text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into mpp5746 select array[i], i from generate_series(1, 100) i;
alter table mpp5746 set with (reorganize=true, appendonly = true);
ERROR: reorganize isn't supported with other options in SET WITH
select * from mpp5746 order by 1;
c | t
-------+-----
{1} | 1
{2} | 2
{3} | 3
{4} | 4
{5} | 5
{6} | 6
{7} | 7
{8} | 8
{9} | 9
{10} | 10
{11} | 11
{12} | 12
{13} | 13
{14} | 14
{15} | 15
{16} | 16
{17} | 17
{18} | 18
{19} | 19
{20} | 20
{21} | 21
{22} | 22
{23} | 23
{24} | 24
{25} | 25
{26} | 26
{27} | 27
{28} | 28
{29} | 29
{30} | 30
{31} | 31
{32} | 32
{33} | 33
{34} | 34
{35} | 35
{36} | 36
{37} | 37
{38} | 38
{39} | 39
{40} | 40
{41} | 41
{42} | 42
{43} | 43
{44} | 44
{45} | 45
{46} | 46
{47} | 47
{48} | 48
{49} | 49
{50} | 50
{51} | 51
{52} | 52
{53} | 53
{54} | 54
{55} | 55
{56} | 56
{57} | 57
{58} | 58
{59} | 59
{60} | 60
{61} | 61
{62} | 62
{63} | 63
{64} | 64
{65} | 65
{66} | 66
{67} | 67
{68} | 68
{69} | 69
{70} | 70
{71} | 71
{72} | 72
{73} | 73
{74} | 74
{75} | 75
{76} | 76
{77} | 77
{78} | 78
{79} | 79
{80} | 80
{81} | 81
{82} | 82
{83} | 83
{84} | 84
{85} | 85
{86} | 86
{87} | 87
{88} | 88
{89} | 89
{90} | 90
{91} | 91
{92} | 92
{93} | 93
{94} | 94
{95} | 95
{96} | 96
{97} | 97
{98} | 98
{99} | 99
{100} | 100
(100 rows)
alter table mpp5746 drop column t;
select * from mpp5746 order by 1;
c
-------
{1}
{2}
{3}
{4}
{5}
{6}
{7}
{8}
{9}
{10}
{11}
{12}
{13}
{14}
{15}
{16}
{17}
{18}
{19}
{20}
{21}
{22}
{23}
{24}
{25}
{26}
{27}
{28}
{29}
{30}
{31}
{32}
{33}
{34}
{35}
{36}
{37}
{38}
{39}
{40}
{41}
{42}
{43}
{44}
{45}
{46}
{47}
{48}
{49}
{50}
{51}
{52}
{53}
{54}
{55}
{56}
{57}
{58}
{59}
{60}
{61}
{62}
{63}
{64}
{65}
{66}
{67}
{68}
{69}
{70}
{71}
{72}
{73}
{74}
{75}
{76}
{77}
{78}
{79}
{80}
{81}
{82}
{83}
{84}
{85}
{86}
{87}
{88}
{89}
{90}
{91}
{92}
{93}
{94}
{95}
{96}
{97}
{98}
{99}
{100}
(100 rows)
alter table mpp5746 set with (reorganize=true, appendonly = false);
ERROR: reorganize isn't supported with other options in SET WITH
select * from mpp5746 order by 1;
c
-------
{1}
{2}
{3}
{4}
{5}
{6}
{7}
{8}
{9}
{10}
{11}
{12}
{13}
{14}
{15}
{16}
{17}
{18}
{19}
{20}
{21}
{22}
{23}
{24}
{25}
{26}
{27}
{28}
{29}
{30}
{31}
{32}
{33}
{34}
{35}
{36}
{37}
{38}
{39}
{40}
{41}
{42}
{43}
{44}
{45}
{46}
{47}
{48}
{49}
{50}
{51}
{52}
{53}
{54}
{55}
{56}
{57}
{58}
{59}
{60}
{61}
{62}
{63}
{64}
{65}
{66}
{67}
{68}
{69}
{70}
{71}
{72}
{73}
{74}
{75}
{76}
{77}
{78}
{79}
{80}
{81}
{82}
{83}
{84}
{85}
{86}
{87}
{88}
{89}
{90}
{91}
{92}
{93}
{94}
{95}
{96}
{97}
{98}
{99}
{100}
(100 rows)
drop table mpp5746;
-- MPP-5738
create table mpp5738 (a int, b int, c int, d int)
partition by range(d) (start(1) end(10) inclusive every(1));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into mpp5738 select i, i+1, i+2, i from generate_series(1, 10) i;
select * from mpp5738;
a | b | c | d
----+----+----+----
3 | 4 | 5 | 3
4 | 5 | 6 | 4
5 | 6 | 7 | 5
6 | 7 | 8 | 6
7 | 8 | 9 | 7
1 | 2 | 3 | 1
2 | 3 | 4 | 2
8 | 9 | 10 | 8
9 | 10 | 11 | 9
10 | 11 | 12 | 10
(10 rows)
alter table mpp5738 alter partition for (1) set with (appendonly=true);
ERROR: table "mpp5738_1_prt_1" is not partitioned
select * from mpp5738;
a | b | c | d
----+----+----+----
1 | 2 | 3 | 1
2 | 3 | 4 | 2
3 | 4 | 5 | 3
4 | 5 | 6 | 4
5 | 6 | 7 | 5
6 | 7 | 8 | 6
7 | 8 | 9 | 7
8 | 9 | 10 | 8
9 | 10 | 11 | 9
10 | 11 | 12 | 10
(10 rows)
drop table mpp5738;
drop table if exists mpp5754;
NOTICE: table "mpp5754" does not exist, skipping
CREATE TABLE mpp5754 (
N_NATIONKEY INTEGER,
N_NAME CHAR(25),
N_REGIONKEY INTEGER,
N_COMMENT VARCHAR(152)
) with (appendonly = true, checksum = true)
distributed by (N_NATIONKEY);
copy mpp5754 from stdin with delimiter '|';
select * from mpp5754 order by n_nationkey;
n_nationkey | n_name | n_regionkey | n_comment
-------------+---------------------------+-------------+-----------------------------------------------------
0 | ALGERIA | 0 | haggle. carefully final deposits detect slyly agai
(1 row)
alter table mpp5754 set distributed randomly;
select count(*) from mpp5754;
count
-------
1
(1 row)
alter table mpp5754 set distributed by (n_nationkey);
select * from mpp5754 order by n_nationkey;
n_nationkey | n_name | n_regionkey | n_comment
-------------+---------------------------+-------------+-----------------------------------------------------
0 | ALGERIA | 0 | haggle. carefully final deposits detect slyly agai
(1 row)
drop table mpp5754;
-- MPP-5918
create role atsdb;
NOTICE: resource queue required -- using default resource queue "pg_default"
create table owner_test(i int, toast text) distributed randomly;
alter table owner_test owner to atsdb;
alter table owner_test set with (reorganize = true) distributed by (i);
-- verify, atsdb should own all three
select a.relname,
x.rolname as relowner,
y.rolname as toastowner,
z.rolname as toastidxowner
from pg_class a
inner join pg_class b on b.oid = a.reltoastrelid
inner join pg_index ti on ti.indrelid = b.oid
inner join pg_class c on c.oid = ti.indexrelid,
pg_authid x, pg_authid y, pg_authid z
where a.relname='owner_test'
and x.oid = a.relowner
and y.oid = b.relowner
and z.oid = c.relowner;
relname | relowner | toastowner | toastidxowner
------------+----------+------------+---------------
owner_test | atsdb | atsdb | atsdb
(1 row)
-- MPP-9663 - Check that the ownership is consistent on the segments as well
select a.relname,
x.rolname as relowner,
y.rolname as toastowner,
z.rolname as toastidxowner
from gp_dist_random('pg_class') a
inner join gp_dist_random('pg_class') b on b.oid = a.reltoastrelid
inner join pg_index ti on ti.indrelid = b.oid
inner join gp_dist_random('pg_class') c on c.oid = ti.indexrelid,
pg_authid x, pg_authid y, pg_authid z
where a.relname='owner_test'
and x.oid = a.relowner
and y.oid = b.relowner
and z.oid = c.relowner
and a.gp_segment_id = 0
and b.gp_segment_id = 0
and c.gp_segment_id = 0;
relname | relowner | toastowner | toastidxowner
------------+----------+------------+---------------
owner_test | atsdb | atsdb | atsdb
(1 row)
-- MPP-9663 - The code path is different when the table has dropped columns
alter table owner_test add column d text;
alter table owner_test drop column d;
alter table owner_test set with (reorganize = true) distributed by (i);
select a.relname,
x.rolname as relowner,
y.rolname as toastowner,
z.rolname as toastidxowner
from gp_dist_random('pg_class') a
inner join gp_dist_random('pg_class') b on b.oid = a.reltoastrelid
inner join pg_index ti on ti.indrelid = b.oid
inner join gp_dist_random('pg_class') c on c.oid = ti.indexrelid,
pg_authid x, pg_authid y, pg_authid z
where a.relname='owner_test'
and x.oid = a.relowner
and y.oid = b.relowner
and z.oid = c.relowner
and a.gp_segment_id = 0
and b.gp_segment_id = 0
and c.gp_segment_id = 0;
relname | relowner | toastowner | toastidxowner
------------+----------+------------+---------------
owner_test | atsdb | atsdb | atsdb
(1 row)
drop table owner_test;
drop role atsdb;
-- MPP-6332
create table abc (a int, b int, c int) distributed by (a);
Alter table abc set distributed randomly;
Alter table abc set with (reorganize=false) distributed randomly;
WARNING: distribution policy of relation "abc" already set to DISTRIBUTED RANDOMLY
HINT: Use ALTER TABLE "abc" SET WITH (REORGANIZE=TRUE) DISTRIBUTED RANDOMLY to force a random redistribution.
drop table abc;
-- MPP-18660: duplicate entry in gp_distribution_policy
set enable_indexscan=on;
set enable_seqscan=off;
drop table if exists distrib_index_test;
NOTICE: table "distrib_index_test" does not exist, skipping
create table distrib_index_test (a int, b text) distributed by (a);
select count(*) from gp_distribution_policy
where localoid in (select oid from pg_class where relname='distrib_index_test');
count
-------
1
(1 row)
begin;
drop table distrib_index_test;
rollback;
select count(*) from gp_distribution_policy
where localoid in (select oid from pg_class where relname='distrib_index_test');
count
-------
1
(1 row)
reset enable_indexscan;
reset enable_seqscan;
drop table distrib_index_test;
-- alter partitioned table crash
-- Alter partitioned table set distributed by will crash when:
-- 1. reorganize = false.
-- 2. table have index.
-- 3. partition table have "with" option.
drop index if exists distrib_part_test_idx;
NOTICE: index "distrib_part_test_idx" does not exist, skipping
drop table if exists distrib_part_test;
NOTICE: table "distrib_part_test" does not exist, skipping
CREATE TABLE distrib_part_test
(
col1 int,
col2 decimal,
col3 text,
col4 bool
)
distributed by (col1)
partition by list(col2)
(
partition part1 values(1,2,3,4,5,6,7,8,9,10) WITH (appendonly=false )
);
create index distrib_part_test_idx on distrib_part_test(col1);
ALTER TABLE public.distrib_part_test SET with (reorganize=false) DISTRIBUTED RANDOMLY;
-- MPP-23801
--
-- ALTER TABLE set distribution key should check compatible with unique index.
-- case 1
CREATE TABLE t_dist1(col1 INTEGER, col2 INTEGER, CONSTRAINT pk_t_dist1 PRIMARY KEY(col2)) DISTRIBUTED BY(col2);
ALTER TABLE t_dist1 SET DISTRIBUTED BY(col1);
ERROR: distribution policy is not compatible with the table's PRIMARY KEY
DETAIL: Distribution key column "col1" is not included in the constraint.
-- case 2
CREATE TABLE t_dist2(col1 INTEGER, col2 INTEGER, col3 INTEGER, col4 INTEGER) DISTRIBUTED BY(col1);
CREATE UNIQUE INDEX idx1_t_dist2 ON t_dist2(col1, col2);
CREATE UNIQUE INDEX idx2_t_dist2 ON t_dist2(col1, col2, col3);
CREATE UNIQUE INDEX idx3_t_dist2 ON t_dist2(col1, col2, col4);
ALTER TABLE t_dist2 SET DISTRIBUTED BY(col1);
WARNING: distribution policy of relation "t_dist2" already set to (col1)
HINT: Use ALTER TABLE "t_dist2" SET WITH (REORGANIZE=TRUE) DISTRIBUTED BY (col1) to force redistribution
ALTER TABLE t_dist2 SET DISTRIBUTED BY(col2);
ALTER TABLE t_dist2 SET DISTRIBUTED BY(col1, col2);
ALTER TABLE t_dist2 SET DISTRIBUTED BY(col1, col2, col3);
ERROR: distribution policy is not compatible with UNIQUE index "idx1_t_dist2"
DETAIL: Distribution key column "col3" is not included in the constraint.
ALTER TABLE t_dist2 SET DISTRIBUTED BY(col3);
ERROR: distribution policy is not compatible with UNIQUE index "idx1_t_dist2"
DETAIL: Distribution key column "col3" is not included in the constraint.
ALTER TABLE t_dist2 SET DISTRIBUTED BY(col4);
ERROR: distribution policy is not compatible with UNIQUE index "idx1_t_dist2"
DETAIL: Distribution key column "col4" is not included in the constraint.
-- Altering distribution policy for temp tables
create temp table atsdb (c1 int, c2 int) distributed randomly;
select * from atsdb;
c1 | c2
----+----
(0 rows)
alter table atsdb set distributed by (c1);
select * from atsdb;
c1 | c2
----+----
(0 rows)
alter table atsdb set distributed by (c2);
select * from atsdb;
c1 | c2
----+----
(0 rows)
--
-- ALTER TABLE SET DATA TYPE tests, where the column is part of the
-- distribution key.
--
CREATE TABLE distpol_typechange (i int2) DISTRIBUTED BY (i);
INSERT INTO distpol_typechange values (123);
ALTER TABLE distpol_typechange ALTER COLUMN i SET DATA TYPE int4;
DROP TABLE distpol_typechange;
CREATE TABLE distpol_typechange (p text) DISTRIBUTED BY (p);
-- This should throw an error, you can't change the datatype of a distribution
-- key column.
INSERT INTO distpol_typechange VALUES ('(1,1)');
ALTER TABLE distpol_typechange ALTER COLUMN p TYPE point USING p::point;
ERROR: cannot alter type of a column used in a distribution policy
-- unless it's completely empty! But 'point' doesn't have hash a opclass,
-- so it cannot be part of the distribution key. We silently turn the
-- table randomly distributed.
TRUNCATE distpol_typechange;
ALTER TABLE distpol_typechange ALTER COLUMN p TYPE point USING p::point;
select policytype, distkey, distclass from gp_distribution_policy where localoid='distpol_typechange'::regclass;
policytype | distkey | distclass
------------+---------+-----------
p | |
(1 row)
-- Similar case, but with CREATE UNIQUE INDEX, rather than ALTER TABLE.
-- Creating a unique index on a completely empty table automatically updates
-- the distribution key to match the unique index. (This allows the common
-- case, where no DISTRIBUTED BY was given explicitly, and the system just
-- picked the first column, which isn't compatible with the unique index
-- that's created later, to work.) But not if the unique column doesn't
-- have a hash opclass!
CREATE TABLE tstab (i int4, t tsvector) distributed by (i);
CREATE UNIQUE INDEX tstab_idx ON tstab(t);
ERROR: UNIQUE index must contain all columns in the table's distribution key
DETAIL: Distribution key column "i" is not included in the constraint.
INSERT INTO tstab VALUES (1, 'foo');
-- ALTER TABLE SET DISTRIBUTED RANDOMLY should not work on a table
-- that has a primary key or unique index.
CREATE TABLE alter_table_with_primary_key (a int primary key);
ALTER TABLE alter_table_with_primary_key SET DISTRIBUTED RANDOMLY;
ERROR: cannot set to DISTRIBUTED RANDOMLY because relation has primary Key
HINT: Drop the primary key first
CREATE TABLE alter_table_with_unique_index (a int unique);
ALTER TABLE alter_table_with_unique_index SET DISTRIBUTED RANDOMLY;
ERROR: cannot set to DISTRIBUTED RANDOMLY because relation has unique index
HINT: Drop the unique index first.
-- Enable reorg partition leaf table
create table reorg_leaf (a int, b int, c int) distributed by (c)
partition by range(a)
subpartition by range (b)
subpartition template
(start(0) end (10) every (5))
(partition p0 start (0) end (5),
partition p1 start (5) end (10));
insert into reorg_leaf select i, i, i from generate_series(0, 9) i;
select *, gp_segment_id from reorg_leaf_1_prt_p0;
a | b | c | gp_segment_id
---+---+---+---------------
2 | 2 | 2 | 0
3 | 3 | 3 | 0
4 | 4 | 4 | 0
0 | 0 | 0 | 1
1 | 1 | 1 | 1
(5 rows)
-- fail: cannot change the distribution key of one partition
alter table reorg_leaf_1_prt_p0 set with (reorganize=true) distributed by(b);
ERROR: can't set the distribution policy of "reorg_leaf_1_prt_p0"
HINT: Distribution policy can not be set for an interior branch.
-- distribution key is already 'c', so this is allowed
alter table reorg_leaf_1_prt_p0 set with (reorganize=true) distributed by(c);
ERROR: can't set the distribution policy of "reorg_leaf_1_prt_p0"
HINT: Distribution policy can not be set for an interior branch.
alter table reorg_leaf_1_prt_p0 set with (reorganize=true);
-- same with a leaf partition
alter table reorg_leaf_1_prt_p0_2_prt_1 set with (reorganize=true) distributed by(b);
ERROR: can't set the distribution policy of "reorg_leaf_1_prt_p0_2_prt_1"
HINT: Distribution policy of a partition can only be the same as its parent's.
alter table reorg_leaf_1_prt_p0_2_prt_1 set with (reorganize=true) distributed by(c);
select *, gp_segment_id from reorg_leaf_1_prt_p0;
a | b | c | gp_segment_id
---+---+---+---------------
2 | 2 | 2 | 0
3 | 3 | 3 | 0
4 | 4 | 4 | 0
0 | 0 | 0 | 1
1 | 1 | 1 | 1
(5 rows)
alter table reorg_leaf_1_prt_p0_2_prt_1 set with (reorganize=true);
select *, gp_segment_id from reorg_leaf_1_prt_p0;
a | b | c | gp_segment_id
---+---+---+---------------
2 | 2 | 2 | 0
3 | 3 | 3 | 0
4 | 4 | 4 | 0
0 | 0 | 0 | 1
1 | 1 | 1 | 1
(5 rows)
--
-- Test case for GUC gp_force_random_redistribution.
-- Manually toggle the GUC should control the behavior of redistribution for randomly-distributed tables.
-- But REORGANIZE=true should redistribute no matter what.
--
-- this only affects postgres planner;
set optimizer = false;
-- check the distribution difference between 't1' and 't2' after executing 'query_string'
-- return true if data distribution changed, otherwise false.
-- Note: in extremely rare cases, even after 't2' being randomly-distributed from 't1', they could still have the
-- exact same distribution. So let the tables have a reasonably large number of rows to reduce that possibility.
CREATE OR REPLACE FUNCTION check_redistributed(query_string text, t1 text, t2 text)
RETURNS BOOLEAN AS
$$
DECLARE
before_query TEXT;
after_query TEXT;
comparison_query TEXT;
comparison_count INT;
BEGIN
-- Prepare the query strings
before_query := format('SELECT gp_segment_id as segid, count(*) AS tupcount FROM %I GROUP BY gp_segment_id', t1);
after_query := format('SELECT gp_segment_id as segid, count(*) AS tupcount FROM %I GROUP BY gp_segment_id', t2);
comparison_query := format('SELECT COUNT(*) FROM ((TABLE %I EXCEPT TABLE %I) UNION ALL (TABLE %I EXCEPT TABLE %I))q', 'distribution1', 'distribution2', 'distribution2', 'distribution1');
-- Create temp tables to store the result
EXECUTE format('CREATE TEMP TABLE distribution1 AS %s DISTRIBUTED REPLICATED', before_query);
-- Execute provided query string
EXECUTE query_string;
EXECUTE format('CREATE TEMP TABLE distribution2 AS %s DISTRIBUTED REPLICATED', after_query);
-- Compare the tables using EXCEPT clause
EXECUTE comparison_query INTO comparison_count;
-- Drop temp tables
EXECUTE 'DROP TABLE distribution1';
EXECUTE 'DROP TABLE distribution2';
-- If count is greater than zero, then there's a difference
RETURN comparison_count > 0;
END;
$$
LANGUAGE plpgsql;
-- CO table builds temp table first instead of doing CTAS during REORGANIZE=true
create table t_reorganize(a int, b int) using ao_column distributed by (a);
insert into t_reorganize select 0,i from generate_series(1,1000)i;
select gp_segment_id, count(*) from t_reorganize group by gp_segment_id;
gp_segment_id | count
---------------+-------
1 | 1000
(1 row)
-- firstly, no redistribute
set gp_force_random_redistribution = off;
select check_redistributed('alter table t_reorganize set with (reorganize=true) distributed randomly', 't_reorganize', 't_reorganize');
check_redistributed
---------------------
t
(1 row)
-- reorganize from randomly to randomly should still redistribute
select check_redistributed('alter table t_reorganize set with (reorganize=true) distributed randomly', 't_reorganize', 't_reorganize');
check_redistributed
---------------------
t
(1 row)
-- but insert into table won't redistribute
create table t_random (like t_reorganize) distributed randomly;
select check_redistributed('insert into t_random select * from t_reorganize', 't_reorganize', 't_random');
check_redistributed
---------------------
f
(1 row)
-- but insert into a different distribution policy would still redistribute
create table t_distbya (like t_reorganize) distributed by (a);
select check_redistributed('insert into t_distbya select * from t_reorganize', 't_reorganize', 't_distbya');
check_redistributed
---------------------
t
(1 row)
-- now force distribute should redistribute in all cases
set gp_force_random_redistribution = on;
select check_redistributed('alter table t_reorganize set with (reorganize=true) distributed randomly', 't_reorganize', 't_reorganize');
check_redistributed
---------------------
t
(1 row)
select check_redistributed('alter table t_reorganize set with (reorganize=true) distributed randomly', 't_reorganize', 't_reorganize');
check_redistributed
---------------------
t
(1 row)
create table t_random (like t_reorganize) distributed randomly;
ERROR: relation "t_random" already exists
select check_redistributed('insert into t_random select * from t_reorganize', 't_reorganize', 't_random');
check_redistributed
---------------------
t
(1 row)
create table t_distbya (like t_reorganize) distributed by (a);
ERROR: relation "t_distbya" already exists
select check_redistributed('insert into t_distbya select * from t_reorganize', 't_reorganize', 't_distbya');
check_redistributed
---------------------
t
(1 row)
reset optimizer;
reset gp_force_random_redistribution;
-- When reorganize=false, we won't reorganize and this shouldn't be affected by the existing reloptions.
CREATE TABLE public.t_reorganize_false (
a integer,
b integer
) with (appendonly=false, autovacuum_enabled=false) DISTRIBUTED BY (a);
-- Insert values which will all be on one segment
INSERT INTO t_reorganize_false VALUES (0, generate_series(1,100));
SELECT gp_segment_id,count(*) from t_reorganize_false GROUP BY 1;
gp_segment_id | 
---------------+-----
1 | 100
(1 row)
-- Change the distribution policy but because REORGANIZE=false, it should NOT be re-distributed
ALTER TABLE t_reorganize_false SET WITH (REORGANIZE=false) DISTRIBUTED RANDOMLY;
SELECT gp_segment_id,count(*) from t_reorganize_false GROUP BY 1;
gp_segment_id | 
---------------+-----
1 | 100
(1 row)
DROP TABLE t_reorganize_false;
-- Same rule should apply to partitioned table too
CREATE TABLE public.t_reorganize_false (
a integer,
b integer
)
DISTRIBUTED BY (a) PARTITION BY RANGE(b)
(
PARTITION "00" START (0) END (1000) WITH (tablename='t_reorganize_false_0', appendonly='false', autovacuum_enabled=false),
PARTITION "01" START (1000) END (2000) WITH (tablename='t_reorganize_false_1', appendonly='false', autovacuum_enabled=false),
DEFAULT PARTITION def WITH (tablename='t_reorganize_false_def', appendonly='false', autovacuum_enabled=false)
);
-- Insert values which will all be on one segment
INSERT INTO t_reorganize_false VALUES (0, generate_series(1,100));
SELECT gp_segment_id,count(*) from t_reorganize_false GROUP BY 1;
gp_segment_id | count
---------------+-------
1 | 100
(1 row)
-- Should NOT be re-distributed
ALTER TABLE t_reorganize_false SET WITH (REORGANIZE=false) DISTRIBUTED RANDOMLY;
SELECT gp_segment_id,count(*) from t_reorganize_false GROUP BY 1;
gp_segment_id | count
---------------+-------
1 | 100
(1 row)
DROP TABLE t_reorganize_false;
-- Check that AT SET DISTRIBUTED BY cannot be combined with other subcommands
-- on the same table
CREATE TABLE atsdby_multiple(i int, j int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
ALTER TABLE atsdby_multiple SET DISTRIBUTED BY(j), ADD COLUMN k int;
ERROR: cannot alter distribution with other subcommands for relation "atsdby_multiple"
HINT: consider separating into multiple statements
ALTER TABLE atsdby_multiple SET WITH (reorganize=true), ADD COLUMN k int;
ERROR: cannot alter distribution with other subcommands for relation "atsdby_multiple"
HINT: consider separating into multiple statements