| set optimizer_print_missing_stats = off; |
| set optimizer_trace_fallback = on; |
| drop table if exists ctas_src; |
| NOTICE: table "ctas_src" does not exist, skipping |
| drop table if exists ctas_dst; |
| NOTICE: table "ctas_dst" does not exist, skipping |
| create table ctas_src (domain integer, class integer, attr text, value integer) distributed by (domain); |
| insert into ctas_src values(1, 1, 'A', 1); |
| insert into ctas_src values(2, 1, 'A', 0); |
| insert into ctas_src values(3, 0, 'B', 1); |
| -- MPP-2859 |
| explain (costs off) create table ctas_dst as |
| SELECT attr, class, (select count(distinct class) from ctas_src) as dclass FROM ctas_src GROUP BY attr, class distributed by (attr); |
| QUERY PLAN |
| -------------------------------------------------------------------------------- |
| HashAggregate |
| Group Key: ctas_src.attr, ctas_src.class |
| InitPlan 1 (returns $0) (slice2) |
| -> Finalize Aggregate |
| -> Gather Motion 3:1 (slice3; segments: 3) |
| -> Partial Aggregate |
| -> HashAggregate |
| Group Key: ctas_src_1.class |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: ctas_src_1.class |
| -> Streaming HashAggregate |
| Group Key: ctas_src_1.class |
| -> Seq Scan on ctas_src ctas_src_1 |
| -> Redistribute Motion 3:3 (slice1; segments: 3) |
| Hash Key: ctas_src.attr |
| -> HashAggregate |
| Group Key: ctas_src.attr, ctas_src.class |
| -> Seq Scan on ctas_src |
| Optimizer: Postgres query optimizer |
| (19 rows) |
| |
| create table ctas_dst as |
| SELECT attr, class, (select count(distinct class) from ctas_src) as dclass FROM ctas_src GROUP BY attr, class distributed by (attr); |
| select * from ctas_dst; |
| attr | class | dclass |
| ------+-------+-------- |
| B | 0 | 2 |
| A | 1 | 2 |
| (2 rows) |
| |
| drop table ctas_dst; |
| explain (costs off) create table ctas_dst as |
| SELECT attr, class, (select max(class) from ctas_src) as maxclass FROM ctas_src GROUP BY attr, class distributed by (attr); |
| QUERY PLAN |
| ----------------------------------------------------------- |
| HashAggregate |
| Group Key: ctas_src.attr, ctas_src.class |
| InitPlan 1 (returns $0) (slice2) |
| -> Finalize Aggregate |
| -> Gather Motion 3:1 (slice3; segments: 3) |
| -> Partial Aggregate |
| -> Seq Scan on ctas_src ctas_src_1 |
| -> Redistribute Motion 3:3 (slice1; segments: 3) |
| Hash Key: ctas_src.attr |
| -> HashAggregate |
| Group Key: ctas_src.attr, ctas_src.class |
| -> Seq Scan on ctas_src |
| Optimizer: Postgres query optimizer |
| (13 rows) |
| |
| create table ctas_dst as |
| SELECT attr, class, (select max(class) from ctas_src) as maxclass FROM ctas_src GROUP BY attr, class distributed by (attr); |
| select * from ctas_dst; |
| attr | class | maxclass |
| ------+-------+---------- |
| B | 0 | 1 |
| A | 1 | 1 |
| (2 rows) |
| |
| drop table ctas_dst; |
| explain (costs off) create table ctas_dst as |
| SELECT attr, class, (select count(distinct class) from ctas_src) as dclass, (select max(class) from ctas_src) as maxclass, (select min(class) from ctas_src) as minclass FROM ctas_src GROUP BY attr, class distributed by (attr); |
| QUERY PLAN |
| -------------------------------------------------------------------------------- |
| HashAggregate |
| Group Key: ctas_src.attr, ctas_src.class |
| InitPlan 1 (returns $0) (slice2) |
| -> Finalize Aggregate |
| -> Gather Motion 3:1 (slice3; segments: 3) |
| -> Partial Aggregate |
| -> HashAggregate |
| Group Key: ctas_src_1.class |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: ctas_src_1.class |
| -> Streaming HashAggregate |
| Group Key: ctas_src_1.class |
| -> Seq Scan on ctas_src ctas_src_1 |
| InitPlan 2 (returns $1) (slice5) |
| -> Finalize Aggregate |
| -> Gather Motion 3:1 (slice6; segments: 3) |
| -> Partial Aggregate |
| -> Seq Scan on ctas_src ctas_src_2 |
| InitPlan 3 (returns $2) (slice7) |
| -> Finalize Aggregate |
| -> Gather Motion 3:1 (slice8; segments: 3) |
| -> Partial Aggregate |
| -> Seq Scan on ctas_src ctas_src_3 |
| -> Redistribute Motion 3:3 (slice1; segments: 3) |
| Hash Key: ctas_src.attr |
| -> HashAggregate |
| Group Key: ctas_src.attr, ctas_src.class |
| -> Seq Scan on ctas_src |
| Optimizer: Postgres query optimizer |
| (29 rows) |
| |
| create table ctas_dst as |
| SELECT attr, class, (select count(distinct class) from ctas_src) as dclass, (select max(class) from ctas_src) as maxclass, (select min(class) from ctas_src) as minclass FROM ctas_src GROUP BY attr, class distributed by (attr); |
| select * from ctas_dst; |
| attr | class | dclass | maxclass | minclass |
| ------+-------+--------+----------+---------- |
| A | 1 | 2 | 1 | 0 |
| B | 0 | 2 | 1 | 0 |
| (2 rows) |
| |
| -- MPP-4298: "unknown" datatypes. |
| drop table if exists ctas_foo; |
| NOTICE: table "ctas_foo" does not exist, skipping |
| drop table if exists ctas_bar; |
| NOTICE: table "ctas_bar" does not exist, skipping |
| drop table if exists ctas_baz; |
| NOTICE: table "ctas_baz" does not exist, skipping |
| create table ctas_foo as select * from generate_series(1, 100); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| create table ctas_bar as select a.generate_series as a, b.generate_series as b from ctas_foo a, ctas_foo b; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| create table ctas_baz as select 'delete me' as action, * from ctas_bar distributed by (a); |
| -- "action" becomes text |
| \d ctas_baz |
| Table "public.ctas_baz" |
| Column | Type | Collation | Nullable | Default |
| --------+---------+-----------+----------+--------- |
| action | text | | | |
| a | integer | | | |
| b | integer | | | |
| Distributed by: (a) |
| |
| select action, b from ctas_baz order by 1,2 limit 5; |
| action | b |
| -----------+--- |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| (5 rows) |
| |
| select action, b from ctas_baz order by 2 limit 5; |
| action | b |
| -----------+--- |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| (5 rows) |
| |
| select action::text, b from ctas_baz order by 1,2 limit 5; |
| action | b |
| -----------+--- |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| (5 rows) |
| |
| alter table ctas_baz alter column action type text; |
| \d ctas_baz |
| Table "public.ctas_baz" |
| Column | Type | Collation | Nullable | Default |
| --------+---------+-----------+----------+--------- |
| action | text | | | |
| a | integer | | | |
| b | integer | | | |
| Distributed by: (a) |
| |
| select action, b from ctas_baz order by 1,2 limit 5; |
| action | b |
| -----------+--- |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| (5 rows) |
| |
| select action, b from ctas_baz order by 2 limit 5; |
| action | b |
| -----------+--- |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| (5 rows) |
| |
| select action::text, b from ctas_baz order by 1,2 limit 5; |
| action | b |
| -----------+--- |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| delete me | 1 |
| (5 rows) |
| |
| -- Test CTAS with a function that executes another query that's dispatched. |
| -- Once upon a time, we had a bug in dispatching the table's OID in this |
| -- scenario. |
| create table ctas_input(x int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into ctas_input select * from generate_series(1, 10); |
| CREATE FUNCTION ctas_inputArray() RETURNS INT[] AS $$ |
| DECLARE theArray INT[]; |
| BEGIN |
| SELECT array(SELECT * FROM ctas_input ORDER BY x) INTO theArray; |
| RETURN theArray; |
| --EXCEPTION WHEN OTHERS THEN RAISE NOTICE 'Catching the exception ...%', SQLERRM; |
| END; |
| $$ LANGUAGE plpgsql; |
| create table ctas_output as select ctas_inputArray()::int[] as x; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| -- Test CTAS with VALUES. |
| CREATE TEMP TABLE yolo(i, j, k) AS (VALUES (0,0,0), (1, NULL, 0), (2, NULL, 0), (3, NULL, 0)) DISTRIBUTED BY (i); |
| -- |
| -- Test that the rows are distributed correctly in CTAS, even if the query |
| -- has an ORDER BY. This used to tickle a bug at one point. |
| -- |
| DROP TABLE IF EXISTS ctas_src, ctas_dst; |
| CREATE TABLE ctas_src( |
| col1 int4, |
| col2 decimal, |
| col3 char, |
| col4 boolean, |
| col5 int |
| ) DISTRIBUTED by(col4); |
| -- I'm not sure why, but dropping a column was crucial to tickling the |
| -- original bug. |
| ALTER TABLE ctas_src DROP COLUMN col2; |
| INSERT INTO ctas_src(col1, col3,col4,col5) |
| SELECT g, 'a',True,g from generate_series(1,5) g; |
| CREATE TABLE ctas_dst as SELECT col1,col3,col4,col5 FROM ctas_src order by 1; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| -- This will fail to find some of the rows, if they're distributed incorrectly. |
| SELECT * FROM ctas_src, ctas_dst WHERE ctas_src.col1 = ctas_dst.col1; |
| col1 | col3 | col4 | col5 | col1 | col3 | col4 | col5 |
| ------+------+------+------+------+------+------+------ |
| 5 | a | t | 5 | 5 | a | t | 5 |
| 2 | a | t | 2 | 2 | a | t | 2 |
| 3 | a | t | 3 | 3 | a | t | 3 |
| 4 | a | t | 4 | 4 | a | t | 4 |
| 1 | a | t | 1 | 1 | a | t | 1 |
| (5 rows) |
| |
| -- Github Issue 9365: https://github.com/greenplum-db/gpdb/issues/9365 |
| -- Previously, the following CTAS case miss dispatching OIDs to QEs, which leads to |
| -- errors. |
| CREATE OR REPLACE FUNCTION array_unnest_2d_to_1d( |
| x ANYARRAY, |
| OUT unnest_row_id INT, |
| OUT unnest_result ANYARRAY |
| ) |
| RETURNS SETOF RECORD |
| AS |
| $BODY$ |
| SELECT t2.r::int, array_agg($1[t2.r][t2.c] order by t2.c) FROM |
| ( |
| SELECT generate_series(array_lower($1,2),array_upper($1,2)) as c, t1.r |
| FROM |
| ( |
| SELECT generate_series(array_lower($1,1),array_upper($1,1)) as r |
| ) t1 |
| ) t2 |
| GROUP BY t2.r |
| $BODY$ LANGUAGE SQL IMMUTABLE |
| ; |
| DROP TABLE IF EXISTS unnest_2d_tbl01; |
| NOTICE: table "unnest_2d_tbl01" does not exist, skipping |
| CREATE TABLE unnest_2d_tbl01 (id INT, val DOUBLE PRECISION[][]); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| INSERT INTO unnest_2d_tbl01 VALUES |
| (1, ARRAY[[1::float8,2],[3::float8,4],[5::float8,6],[7::float8,8]]), |
| (2, ARRAY[[101::float8,202],[303::float8,404],[505::float8,606]]) |
| ; |
| DROP TABLE IF EXISTS unnest_2d_tbl01_out; |
| NOTICE: table "unnest_2d_tbl01_out" does not exist, skipping |
| -- The following CTAS fails previously, see Github Issue 9365 |
| CREATE TABLE unnest_2d_tbl01_out AS |
| SELECT id, (array_unnest_2d_to_1d(val)).* FROM unnest_2d_tbl01; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| -- Github issue 9790. |
| -- Previously, CTAS with no data won't handle the 'WITH' clause |
| CREATE TABLE ctas_base(a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE ctas_aocs WITH (appendonly=true, orientation=column) AS SELECT * FROM ctas_base WITH NO DATA; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| SELECT * FROM ctas_aocs; |
| a | b |
| ---+--- |
| (0 rows) |
| |
| DROP TABLE ctas_base; |
| DROP TABLE ctas_aocs; |
| -- Github Issue 10760 |
| -- Previously CTAS with Initplan will dispatch oids twice |
| -- start_ignore |
| DROP TABLE IF EXISTS t1_github_issue_10760; |
| NOTICE: table "t1_github_issue_10760" does not exist, skipping |
| DROP TABLE IF EXISTS t2_github_issue_10760; |
| NOTICE: table "t2_github_issue_10760" does not exist, skipping |
| create table t1_github_issue_10760(a int, b int) distributed randomly; |
| -- end_ignore |
| -- Because there is no Initplan in ORCA of this test case, there is no |
| -- 10760 problem in ORCA. So here manually set the optimizer to |
| -- ensure that there is Initplan in the execution plan. |
| set optimizer=off; |
| explain select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760); |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=392.82..1168.23 rows=28700 width=8) |
| InitPlan 1 (returns $0) (slice2) |
| -> Finalize Aggregate (cost=392.81..392.82 rows=1 width=8) |
| -> Gather Motion 3:1 (slice3; segments: 3) (cost=392.75..392.80 rows=3 width=8) |
| -> Partial Aggregate (cost=392.75..392.76 rows=1 width=8) |
| -> Seq Scan on t1_github_issue_10760 t1_github_issue_10760_1 (cost=0.00..321.00 rows=28700 width=0) |
| -> Seq Scan on t1_github_issue_10760 (cost=0.00..392.75 rows=9567 width=8) |
| Filter: (b > $0) |
| Optimizer: Postgres query optimizer |
| (9 rows) |
| |
| begin; |
| create table t2_github_issue_10760 as select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760) distributed randomly; |
| drop table t2_github_issue_10760; |
| create table t2_github_issue_10760 as select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760) distributed randomly; |
| end; |
| select count (distinct oid) from (select oid from pg_class where relname = 't2_github_issue_10760' union all select oid from gp_dist_random('pg_class') where relname = 't2_github_issue_10760')x; |
| count |
| ------- |
| 1 |
| (1 row) |
| |
| drop table t1_github_issue_10760; |
| drop table t2_github_issue_10760; |
| reset optimizer; |
| -- CTAS with no data will not lead to catalog inconsistent |
| -- See github issue: https://github.com/greenplum-db/gpdb/issues/11999 |
| create or replace function mv_action_select_issue_11999() returns bool language sql as |
| 'declare c cursor for select 1/0; select true'; |
| create materialized view sro_mv_issue_11999 as select mv_action_select_issue_11999() with no data; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mv_action_select_issue_11999' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table t_sro_mv_issue_11999 as select mv_action_select_issue_11999() with no data; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mv_action_select_issue_11999' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select count(*) |
| from |
| ( |
| select localoid, policytype, numsegments, distkey |
| from gp_distribution_policy |
| where localoid::regclass::text = 'sro_mv_issue_11999' or |
| localoid::regclass::text = 't_sro_mv_issue_11999' |
| union all |
| select localoid, policytype, numsegments, distkey |
| from gp_dist_random('gp_distribution_policy') |
| where localoid::regclass::text = 'sro_mv_issue_11999' or |
| localoid::regclass::text = 't_sro_mv_issue_11999' |
| )x; |
| count |
| ------- |
| 8 |
| (1 row) |
| |
| select count(distinct (localoid, policytype, numsegments, distkey)) |
| from |
| ( |
| select localoid, policytype, numsegments, distkey |
| from gp_distribution_policy |
| where localoid::regclass::text = 'sro_mv_issue_11999' or |
| localoid::regclass::text = 't_sro_mv_issue_11999' |
| union all |
| select localoid, policytype, numsegments, distkey |
| from gp_dist_random('gp_distribution_policy') |
| where localoid::regclass::text = 'sro_mv_issue_11999' or |
| localoid::regclass::text = 't_sro_mv_issue_11999' |
| )x; |
| count |
| ------- |
| 2 |
| (1 row) |
| |
| -- then refresh should error out |
| refresh materialized view sro_mv_issue_11999; |
| ERROR: division by zero |
| CONTEXT: SQL function "mv_action_select_issue_11999" statement 1 |
| -- Test CTAS + initplan, and an exception was raised in preprocess_initplans |
| CREATE OR REPLACE FUNCTION public.exception_func() |
| RETURNS refcursor |
| LANGUAGE plpgsql |
| AS $function$declare cname refcursor = 'result'; begin open cname for select 1; raise sqlstate '02000'; return cname; exception when sqlstate '02000' then return cname; end;$function$; |
| SELECT exception_func() INTO TEMPORARY test_tmp1; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| SELECT * FROM test_tmp1; |
| exception_func |
| ---------------- |
| result |
| (1 row) |
| |
| CREATE TEMPORARY TABLE test_tmp2 AS SELECT exception_func(); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| SELECT * FROM test_tmp2; |
| exception_func |
| ---------------- |
| result |
| (1 row) |
| |
| DROP FUNCTION public.exception_func(); |
| DROP TABLE test_tmp1; |
| DROP TABLE test_tmp2; |
| reset optimizer_trace_fallback; |