blob: cb0c2536bfe897ecab61d644cd59941611f8047a [file] [log] [blame]
set optimizer_print_missing_stats = off;
set optimizer_trace_fallback = on;
drop table if exists ctas_src;
NOTICE: table "ctas_src" does not exist, skipping
drop table if exists ctas_dst;
NOTICE: table "ctas_dst" does not exist, skipping
create table ctas_src (domain integer, class integer, attr text, value integer) distributed by (domain);
insert into ctas_src values(1, 1, 'A', 1);
insert into ctas_src values(2, 1, 'A', 0);
insert into ctas_src values(3, 0, 'B', 1);
-- MPP-2859
explain (costs off) create table ctas_dst as
SELECT attr, class, (select count(distinct class) from ctas_src) as dclass FROM ctas_src GROUP BY attr, class distributed by (attr);
QUERY PLAN
--------------------------------------------------------------------------------
HashAggregate
Group Key: ctas_src.attr, ctas_src.class
InitPlan 1 (returns $0) (slice2)
-> Finalize Aggregate
-> Gather Motion 3:1 (slice3; segments: 3)
-> Partial Aggregate
-> HashAggregate
Group Key: ctas_src_1.class
-> Redistribute Motion 3:3 (slice4; segments: 3)
Hash Key: ctas_src_1.class
-> Streaming HashAggregate
Group Key: ctas_src_1.class
-> Seq Scan on ctas_src ctas_src_1
-> Redistribute Motion 3:3 (slice1; segments: 3)
Hash Key: ctas_src.attr
-> HashAggregate
Group Key: ctas_src.attr, ctas_src.class
-> Seq Scan on ctas_src
Optimizer: Postgres query optimizer
(19 rows)
create table ctas_dst as
SELECT attr, class, (select count(distinct class) from ctas_src) as dclass FROM ctas_src GROUP BY attr, class distributed by (attr);
select * from ctas_dst;
attr | class | dclass
------+-------+--------
B | 0 | 2
A | 1 | 2
(2 rows)
drop table ctas_dst;
explain (costs off) create table ctas_dst as
SELECT attr, class, (select max(class) from ctas_src) as maxclass FROM ctas_src GROUP BY attr, class distributed by (attr);
QUERY PLAN
-----------------------------------------------------------
HashAggregate
Group Key: ctas_src.attr, ctas_src.class
InitPlan 1 (returns $0) (slice2)
-> Finalize Aggregate
-> Gather Motion 3:1 (slice3; segments: 3)
-> Partial Aggregate
-> Seq Scan on ctas_src ctas_src_1
-> Redistribute Motion 3:3 (slice1; segments: 3)
Hash Key: ctas_src.attr
-> HashAggregate
Group Key: ctas_src.attr, ctas_src.class
-> Seq Scan on ctas_src
Optimizer: Postgres query optimizer
(13 rows)
create table ctas_dst as
SELECT attr, class, (select max(class) from ctas_src) as maxclass FROM ctas_src GROUP BY attr, class distributed by (attr);
select * from ctas_dst;
attr | class | maxclass
------+-------+----------
B | 0 | 1
A | 1 | 1
(2 rows)
drop table ctas_dst;
explain (costs off) create table ctas_dst as
SELECT attr, class, (select count(distinct class) from ctas_src) as dclass, (select max(class) from ctas_src) as maxclass, (select min(class) from ctas_src) as minclass FROM ctas_src GROUP BY attr, class distributed by (attr);
QUERY PLAN
--------------------------------------------------------------------------------
HashAggregate
Group Key: ctas_src.attr, ctas_src.class
InitPlan 1 (returns $0) (slice2)
-> Finalize Aggregate
-> Gather Motion 3:1 (slice3; segments: 3)
-> Partial Aggregate
-> HashAggregate
Group Key: ctas_src_1.class
-> Redistribute Motion 3:3 (slice4; segments: 3)
Hash Key: ctas_src_1.class
-> Streaming HashAggregate
Group Key: ctas_src_1.class
-> Seq Scan on ctas_src ctas_src_1
InitPlan 2 (returns $1) (slice5)
-> Finalize Aggregate
-> Gather Motion 3:1 (slice6; segments: 3)
-> Partial Aggregate
-> Seq Scan on ctas_src ctas_src_2
InitPlan 3 (returns $2) (slice7)
-> Finalize Aggregate
-> Gather Motion 3:1 (slice8; segments: 3)
-> Partial Aggregate
-> Seq Scan on ctas_src ctas_src_3
-> Redistribute Motion 3:3 (slice1; segments: 3)
Hash Key: ctas_src.attr
-> HashAggregate
Group Key: ctas_src.attr, ctas_src.class
-> Seq Scan on ctas_src
Optimizer: Postgres query optimizer
(29 rows)
create table ctas_dst as
SELECT attr, class, (select count(distinct class) from ctas_src) as dclass, (select max(class) from ctas_src) as maxclass, (select min(class) from ctas_src) as minclass FROM ctas_src GROUP BY attr, class distributed by (attr);
select * from ctas_dst;
attr | class | dclass | maxclass | minclass
------+-------+--------+----------+----------
A | 1 | 2 | 1 | 0
B | 0 | 2 | 1 | 0
(2 rows)
-- MPP-4298: "unknown" datatypes.
drop table if exists ctas_foo;
NOTICE: table "ctas_foo" does not exist, skipping
drop table if exists ctas_bar;
NOTICE: table "ctas_bar" does not exist, skipping
drop table if exists ctas_baz;
NOTICE: table "ctas_baz" does not exist, skipping
create table ctas_foo as select * from generate_series(1, 100);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
create table ctas_bar as select a.generate_series as a, b.generate_series as b from ctas_foo a, ctas_foo b;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
create table ctas_baz as select 'delete me' as action, * from ctas_bar distributed by (a);
-- "action" becomes text
\d ctas_baz
Table "public.ctas_baz"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
action | text | | |
a | integer | | |
b | integer | | |
Distributed by: (a)
select action, b from ctas_baz order by 1,2 limit 5;
action | b
-----------+---
delete me | 1
delete me | 1
delete me | 1
delete me | 1
delete me | 1
(5 rows)
select action, b from ctas_baz order by 2 limit 5;
action | b
-----------+---
delete me | 1
delete me | 1
delete me | 1
delete me | 1
delete me | 1
(5 rows)
select action::text, b from ctas_baz order by 1,2 limit 5;
action | b
-----------+---
delete me | 1
delete me | 1
delete me | 1
delete me | 1
delete me | 1
(5 rows)
alter table ctas_baz alter column action type text;
\d ctas_baz
Table "public.ctas_baz"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
action | text | | |
a | integer | | |
b | integer | | |
Distributed by: (a)
select action, b from ctas_baz order by 1,2 limit 5;
action | b
-----------+---
delete me | 1
delete me | 1
delete me | 1
delete me | 1
delete me | 1
(5 rows)
select action, b from ctas_baz order by 2 limit 5;
action | b
-----------+---
delete me | 1
delete me | 1
delete me | 1
delete me | 1
delete me | 1
(5 rows)
select action::text, b from ctas_baz order by 1,2 limit 5;
action | b
-----------+---
delete me | 1
delete me | 1
delete me | 1
delete me | 1
delete me | 1
(5 rows)
-- Test CTAS with a function that executes another query that's dispatched.
-- Once upon a time, we had a bug in dispatching the table's OID in this
-- scenario.
create table ctas_input(x int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into ctas_input select * from generate_series(1, 10);
CREATE FUNCTION ctas_inputArray() RETURNS INT[] AS $$
DECLARE theArray INT[];
BEGIN
SELECT array(SELECT * FROM ctas_input ORDER BY x) INTO theArray;
RETURN theArray;
--EXCEPTION WHEN OTHERS THEN RAISE NOTICE 'Catching the exception ...%', SQLERRM;
END;
$$ LANGUAGE plpgsql;
create table ctas_output as select ctas_inputArray()::int[] as x;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- Test CTAS with VALUES.
CREATE TEMP TABLE yolo(i, j, k) AS (VALUES (0,0,0), (1, NULL, 0), (2, NULL, 0), (3, NULL, 0)) DISTRIBUTED BY (i);
--
-- Test that the rows are distributed correctly in CTAS, even if the query
-- has an ORDER BY. This used to tickle a bug at one point.
--
DROP TABLE IF EXISTS ctas_src, ctas_dst;
CREATE TABLE ctas_src(
col1 int4,
col2 decimal,
col3 char,
col4 boolean,
col5 int
) DISTRIBUTED by(col4);
-- I'm not sure why, but dropping a column was crucial to tickling the
-- original bug.
ALTER TABLE ctas_src DROP COLUMN col2;
INSERT INTO ctas_src(col1, col3,col4,col5)
SELECT g, 'a',True,g from generate_series(1,5) g;
CREATE TABLE ctas_dst as SELECT col1,col3,col4,col5 FROM ctas_src order by 1;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
-- This will fail to find some of the rows, if they're distributed incorrectly.
SELECT * FROM ctas_src, ctas_dst WHERE ctas_src.col1 = ctas_dst.col1;
col1 | col3 | col4 | col5 | col1 | col3 | col4 | col5
------+------+------+------+------+------+------+------
5 | a | t | 5 | 5 | a | t | 5
2 | a | t | 2 | 2 | a | t | 2
3 | a | t | 3 | 3 | a | t | 3
4 | a | t | 4 | 4 | a | t | 4
1 | a | t | 1 | 1 | a | t | 1
(5 rows)
-- Github Issue 9365: https://github.com/greenplum-db/gpdb/issues/9365
-- Previously, the following CTAS case miss dispatching OIDs to QEs, which leads to
-- errors.
CREATE OR REPLACE FUNCTION array_unnest_2d_to_1d(
x ANYARRAY,
OUT unnest_row_id INT,
OUT unnest_result ANYARRAY
)
RETURNS SETOF RECORD
AS
$BODY$
SELECT t2.r::int, array_agg($1[t2.r][t2.c] order by t2.c) FROM
(
SELECT generate_series(array_lower($1,2),array_upper($1,2)) as c, t1.r
FROM
(
SELECT generate_series(array_lower($1,1),array_upper($1,1)) as r
) t1
) t2
GROUP BY t2.r
$BODY$ LANGUAGE SQL IMMUTABLE
;
DROP TABLE IF EXISTS unnest_2d_tbl01;
NOTICE: table "unnest_2d_tbl01" does not exist, skipping
CREATE TABLE unnest_2d_tbl01 (id INT, val DOUBLE PRECISION[][]);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO unnest_2d_tbl01 VALUES
(1, ARRAY[[1::float8,2],[3::float8,4],[5::float8,6],[7::float8,8]]),
(2, ARRAY[[101::float8,202],[303::float8,404],[505::float8,606]])
;
DROP TABLE IF EXISTS unnest_2d_tbl01_out;
NOTICE: table "unnest_2d_tbl01_out" does not exist, skipping
-- The following CTAS fails previously, see Github Issue 9365
CREATE TABLE unnest_2d_tbl01_out AS
SELECT id, (array_unnest_2d_to_1d(val)).* FROM unnest_2d_tbl01;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- Github issue 9790.
-- Previously, CTAS with no data won't handle the 'WITH' clause
CREATE TABLE ctas_base(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE ctas_aocs WITH (appendonly=true, orientation=column) AS SELECT * FROM ctas_base WITH NO DATA;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
SELECT * FROM ctas_aocs;
a | b
---+---
(0 rows)
DROP TABLE ctas_base;
DROP TABLE ctas_aocs;
-- Github Issue 10760
-- Previously CTAS with Initplan will dispatch oids twice
-- start_ignore
DROP TABLE IF EXISTS t1_github_issue_10760;
NOTICE: table "t1_github_issue_10760" does not exist, skipping
DROP TABLE IF EXISTS t2_github_issue_10760;
NOTICE: table "t2_github_issue_10760" does not exist, skipping
create table t1_github_issue_10760(a int, b int) distributed randomly;
-- end_ignore
-- Because there is no Initplan in ORCA of this test case, there is no
-- 10760 problem in ORCA. So here manually set the optimizer to
-- ensure that there is Initplan in the execution plan.
set optimizer=off;
explain select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=392.82..1168.23 rows=28700 width=8)
InitPlan 1 (returns $0) (slice2)
-> Finalize Aggregate (cost=392.81..392.82 rows=1 width=8)
-> Gather Motion 3:1 (slice3; segments: 3) (cost=392.75..392.80 rows=3 width=8)
-> Partial Aggregate (cost=392.75..392.76 rows=1 width=8)
-> Seq Scan on t1_github_issue_10760 t1_github_issue_10760_1 (cost=0.00..321.00 rows=28700 width=0)
-> Seq Scan on t1_github_issue_10760 (cost=0.00..392.75 rows=9567 width=8)
Filter: (b > $0)
Optimizer: Postgres query optimizer
(9 rows)
begin;
create table t2_github_issue_10760 as select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760) distributed randomly;
drop table t2_github_issue_10760;
create table t2_github_issue_10760 as select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760) distributed randomly;
end;
select count (distinct oid) from (select oid from pg_class where relname = 't2_github_issue_10760' union all select oid from gp_dist_random('pg_class') where relname = 't2_github_issue_10760')x;
count
-------
1
(1 row)
drop table t1_github_issue_10760;
drop table t2_github_issue_10760;
reset optimizer;
-- CTAS with no data will not lead to catalog inconsistent
-- See github issue: https://github.com/greenplum-db/gpdb/issues/11999
create or replace function mv_action_select_issue_11999() returns bool language sql as
'declare c cursor for select 1/0; select true';
create materialized view sro_mv_issue_11999 as select mv_action_select_issue_11999() with no data;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mv_action_select_issue_11999' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table t_sro_mv_issue_11999 as select mv_action_select_issue_11999() with no data;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mv_action_select_issue_11999' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select count(*)
from
(
select localoid, policytype, numsegments, distkey
from gp_distribution_policy
where localoid::regclass::text = 'sro_mv_issue_11999' or
localoid::regclass::text = 't_sro_mv_issue_11999'
union all
select localoid, policytype, numsegments, distkey
from gp_dist_random('gp_distribution_policy')
where localoid::regclass::text = 'sro_mv_issue_11999' or
localoid::regclass::text = 't_sro_mv_issue_11999'
)x;
count
-------
8
(1 row)
select count(distinct (localoid, policytype, numsegments, distkey))
from
(
select localoid, policytype, numsegments, distkey
from gp_distribution_policy
where localoid::regclass::text = 'sro_mv_issue_11999' or
localoid::regclass::text = 't_sro_mv_issue_11999'
union all
select localoid, policytype, numsegments, distkey
from gp_dist_random('gp_distribution_policy')
where localoid::regclass::text = 'sro_mv_issue_11999' or
localoid::regclass::text = 't_sro_mv_issue_11999'
)x;
count
-------
2
(1 row)
-- then refresh should error out
refresh materialized view sro_mv_issue_11999;
ERROR: division by zero
CONTEXT: SQL function "mv_action_select_issue_11999" statement 1
-- Test CTAS + initplan, and an exception was raised in preprocess_initplans
CREATE OR REPLACE FUNCTION public.exception_func()
RETURNS refcursor
LANGUAGE plpgsql
AS $function$declare cname refcursor = 'result'; begin open cname for select 1; raise sqlstate '02000'; return cname; exception when sqlstate '02000' then return cname; end;$function$;
SELECT exception_func() INTO TEMPORARY test_tmp1;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
SELECT * FROM test_tmp1;
exception_func
----------------
result
(1 row)
CREATE TEMPORARY TABLE test_tmp2 AS SELECT exception_func();
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
SELECT * FROM test_tmp2;
exception_func
----------------
result
(1 row)
DROP FUNCTION public.exception_func();
DROP TABLE test_tmp1;
DROP TABLE test_tmp2;
reset optimizer_trace_fallback;