src/test/regress/expected/gpctas.out - cloudberry - Git at Google

 set optimizer_print_missing_stats = off;
 set optimizer_trace_fallback = on;
 drop table if exists ctas_src;
 NOTICE:  table "ctas_src" does not exist, skipping
 drop table if exists ctas_dst;
 NOTICE:  table "ctas_dst" does not exist, skipping
 create table ctas_src (domain integer, class integer, attr text, value integer) distributed by (domain);
 insert into ctas_src values(1, 1, 'A', 1);
 insert into ctas_src values(2, 1, 'A', 0);
 insert into ctas_src values(3, 0, 'B', 1);
 -- MPP-2859
 explain (costs off) create table ctas_dst as
 SELECT attr, class, (select count(distinct class) from ctas_src) as dclass FROM ctas_src GROUP BY attr, class distributed by (attr);
                                    QUERY PLAN
 --------------------------------------------------------------------------------
  HashAggregate
    Group Key: ctas_src.attr, ctas_src.class
    InitPlan 1 (returns $0)  (slice2)
      ->  Finalize Aggregate
            ->  Gather Motion 3:1  (slice3; segments: 3)
                  ->  Partial Aggregate
                        ->  HashAggregate
                              Group Key: ctas_src_1.class
                              ->  Redistribute Motion 3:3  (slice4; segments: 3)
                                    Hash Key: ctas_src_1.class
                                    ->  Streaming HashAggregate
                                          Group Key: ctas_src_1.class
                                          ->  Seq Scan on ctas_src ctas_src_1
    ->  Redistribute Motion 3:3  (slice1; segments: 3)
          Hash Key: ctas_src.attr
          ->  HashAggregate
                Group Key: ctas_src.attr, ctas_src.class
                ->  Seq Scan on ctas_src
  Optimizer: Postgres query optimizer
 (19 rows)

 create table ctas_dst as
 SELECT attr, class, (select count(distinct class) from ctas_src) as dclass FROM ctas_src GROUP BY attr, class distributed by (attr);
 select * from ctas_dst;
  attr | class | dclass
 ------+-------+--------
  B    |     0 |      2
  A    |     1 |      2
 (2 rows)

 drop table ctas_dst;
 explain (costs off) create table ctas_dst as
 SELECT attr, class, (select max(class) from ctas_src) as maxclass FROM ctas_src GROUP BY attr, class distributed by (attr);
                         QUERY PLAN
 -----------------------------------------------------------
  HashAggregate
    Group Key: ctas_src.attr, ctas_src.class
    InitPlan 1 (returns $0)  (slice2)
      ->  Finalize Aggregate
            ->  Gather Motion 3:1  (slice3; segments: 3)
                  ->  Partial Aggregate
                        ->  Seq Scan on ctas_src ctas_src_1
    ->  Redistribute Motion 3:3  (slice1; segments: 3)
          Hash Key: ctas_src.attr
          ->  HashAggregate
                Group Key: ctas_src.attr, ctas_src.class
                ->  Seq Scan on ctas_src
  Optimizer: Postgres query optimizer
 (13 rows)

 create table ctas_dst as
 SELECT attr, class, (select max(class) from ctas_src) as maxclass FROM ctas_src GROUP BY attr, class distributed by (attr);
 select * from ctas_dst;
  attr | class | maxclass
 ------+-------+----------
  B    |     0 |        1
  A    |     1 |        1
 (2 rows)

 drop table ctas_dst;
 explain (costs off) create table ctas_dst as
 SELECT attr, class, (select count(distinct class) from ctas_src) as dclass, (select max(class) from ctas_src) as maxclass, (select min(class) from ctas_src) as minclass FROM ctas_src GROUP BY attr, class distributed by (attr);
                                    QUERY PLAN
 --------------------------------------------------------------------------------
  HashAggregate
    Group Key: ctas_src.attr, ctas_src.class
    InitPlan 1 (returns $0)  (slice2)
      ->  Finalize Aggregate
            ->  Gather Motion 3:1  (slice3; segments: 3)
                  ->  Partial Aggregate
                        ->  HashAggregate
                              Group Key: ctas_src_1.class
                              ->  Redistribute Motion 3:3  (slice4; segments: 3)
                                    Hash Key: ctas_src_1.class
                                    ->  Streaming HashAggregate
                                          Group Key: ctas_src_1.class
                                          ->  Seq Scan on ctas_src ctas_src_1
    InitPlan 2 (returns $1)  (slice5)
      ->  Finalize Aggregate
            ->  Gather Motion 3:1  (slice6; segments: 3)
                  ->  Partial Aggregate
                        ->  Seq Scan on ctas_src ctas_src_2
    InitPlan 3 (returns $2)  (slice7)
      ->  Finalize Aggregate
            ->  Gather Motion 3:1  (slice8; segments: 3)
                  ->  Partial Aggregate
                        ->  Seq Scan on ctas_src ctas_src_3
    ->  Redistribute Motion 3:3  (slice1; segments: 3)
          Hash Key: ctas_src.attr
          ->  HashAggregate
                Group Key: ctas_src.attr, ctas_src.class
                ->  Seq Scan on ctas_src
  Optimizer: Postgres query optimizer
 (29 rows)

 create table ctas_dst as
 SELECT attr, class, (select count(distinct class) from ctas_src) as dclass, (select max(class) from ctas_src) as maxclass, (select min(class) from ctas_src) as minclass FROM ctas_src GROUP BY attr, class distributed by (attr);
 select * from ctas_dst;
  attr | class | dclass | maxclass | minclass
 ------+-------+--------+----------+----------
  A    |     1 |      2 |        1 |        0
  B    |     0 |      2 |        1 |        0
 (2 rows)

 -- MPP-4298: "unknown" datatypes.
 drop table if exists ctas_foo;
 NOTICE:  table "ctas_foo" does not exist, skipping
 drop table if exists ctas_bar;
 NOTICE:  table "ctas_bar" does not exist, skipping
 drop table if exists ctas_baz;
 NOTICE:  table "ctas_baz" does not exist, skipping
 create table ctas_foo as select * from generate_series(1, 100);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
 create table ctas_bar as select a.generate_series as a, b.generate_series as b from ctas_foo a, ctas_foo b;
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
 create table ctas_baz as select 'delete me' as action, * from ctas_bar distributed by (a);
 -- "action" becomes text
 \d ctas_baz
               Table "public.ctas_baz"
  Column |  Type   | Collation | Nullable | Default
 --------+---------+-----------+----------+---------
  action | text    |           |          |
  a      | integer |           |          |
  b      | integer |           |          |
 Distributed by: (a)

 select action, b from ctas_baz order by 1,2 limit 5;
   action   | b
 -----------+---
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
 (5 rows)

 select action, b from ctas_baz order by 2 limit 5;
   action   | b
 -----------+---
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
 (5 rows)

 select action::text, b from ctas_baz order by 1,2 limit 5;
   action   | b
 -----------+---
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
 (5 rows)

 alter table ctas_baz alter column action type text;
 \d ctas_baz
               Table "public.ctas_baz"
  Column |  Type   | Collation | Nullable | Default
 --------+---------+-----------+----------+---------
  action | text    |           |          |
  a      | integer |           |          |
  b      | integer |           |          |
 Distributed by: (a)

 select action, b from ctas_baz order by 1,2 limit 5;
   action   | b
 -----------+---
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
 (5 rows)

 select action, b from ctas_baz order by 2 limit 5;
   action   | b
 -----------+---
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
 (5 rows)

 select action::text, b from ctas_baz order by 1,2 limit 5;
   action   | b
 -----------+---
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
  delete me | 1
 (5 rows)

 -- Test CTAS with a function that executes another query that's dispatched.
 -- Once upon a time, we had a bug in dispatching the table's OID in this
 -- scenario.
 create table ctas_input(x int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into ctas_input select * from generate_series(1, 10);
 CREATE FUNCTION ctas_inputArray() RETURNS INT[] AS $$
 DECLARE theArray INT[];
 BEGIN
    SELECT array(SELECT * FROM ctas_input ORDER BY x) INTO theArray;
    RETURN theArray;
 --EXCEPTION WHEN OTHERS THEN RAISE NOTICE 'Catching the exception ...%', SQLERRM;
 END;
 $$ LANGUAGE plpgsql;
 create table ctas_output as select ctas_inputArray()::int[] as x;
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 -- Test CTAS with VALUES.
 CREATE TEMP TABLE yolo(i, j, k) AS (VALUES (0,0,0), (1, NULL, 0), (2, NULL, 0), (3, NULL, 0)) DISTRIBUTED BY (i);
 --
 -- Test that the rows are distributed correctly in CTAS, even if the query
 -- has an ORDER BY. This used to tickle a bug at one point.
 --
 DROP TABLE IF EXISTS ctas_src, ctas_dst;
 CREATE TABLE ctas_src(
 col1 int4,
 col2 decimal,
 col3 char,
 col4 boolean,
 col5 int
 ) DISTRIBUTED by(col4);
 -- I'm not sure why, but dropping a column was crucial to tickling the
 -- original bug.
 ALTER TABLE ctas_src DROP COLUMN col2;
 INSERT INTO ctas_src(col1, col3,col4,col5)
     SELECT g, 'a',True,g from generate_series(1,5) g;
 CREATE TABLE ctas_dst as SELECT col1,col3,col4,col5 FROM ctas_src order by 1;
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
 -- This will fail to find some of the rows, if they're distributed incorrectly.
 SELECT * FROM ctas_src, ctas_dst WHERE ctas_src.col1 = ctas_dst.col1;
  col1 | col3 | col4 | col5 | col1 | col3 | col4 | col5
 ------+------+------+------+------+------+------+------
     5 | a    | t    |    5 |    5 | a    | t    |    5
     2 | a    | t    |    2 |    2 | a    | t    |    2
     3 | a    | t    |    3 |    3 | a    | t    |    3
     4 | a    | t    |    4 |    4 | a    | t    |    4
     1 | a    | t    |    1 |    1 | a    | t    |    1
 (5 rows)

 -- Github Issue 9365: https://github.com/greenplum-db/gpdb/issues/9365
 -- Previously, the following CTAS case miss dispatching OIDs to QEs, which leads to
 -- errors.
 CREATE OR REPLACE FUNCTION array_unnest_2d_to_1d(
   x ANYARRAY,
   OUT unnest_row_id INT,
   OUT unnest_result ANYARRAY
 )
 RETURNS SETOF RECORD
 AS
 $BODY$
   SELECT t2.r::int, array_agg($1[t2.r][t2.c] order by t2.c) FROM
   (
     SELECT generate_series(array_lower($1,2),array_upper($1,2)) as c, t1.r
     FROM
     (
       SELECT generate_series(array_lower($1,1),array_upper($1,1)) as r
     ) t1
   ) t2
 GROUP BY t2.r
 $BODY$ LANGUAGE SQL IMMUTABLE
 ;
 DROP TABLE IF EXISTS unnest_2d_tbl01;
 NOTICE:  table "unnest_2d_tbl01" does not exist, skipping
 CREATE TABLE unnest_2d_tbl01 (id INT, val DOUBLE PRECISION[][]);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 INSERT INTO unnest_2d_tbl01 VALUES
   (1, ARRAY[[1::float8,2],[3::float8,4],[5::float8,6],[7::float8,8]]),
   (2, ARRAY[[101::float8,202],[303::float8,404],[505::float8,606]])
 ;
 DROP TABLE IF EXISTS unnest_2d_tbl01_out;
 NOTICE:  table "unnest_2d_tbl01_out" does not exist, skipping
 -- The following CTAS fails previously, see Github Issue 9365
 CREATE TABLE unnest_2d_tbl01_out AS
   SELECT id, (array_unnest_2d_to_1d(val)).* FROM unnest_2d_tbl01;
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 -- Github issue 9790.
 -- Previously, CTAS with no data won't handle the 'WITH' clause
 CREATE TABLE ctas_base(a int, b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 CREATE TABLE ctas_aocs WITH (appendonly=true, orientation=column) AS SELECT * FROM ctas_base WITH NO DATA;
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
 SELECT * FROM ctas_aocs;
  a | b
 ---+---
 (0 rows)

 DROP TABLE ctas_base;
 DROP TABLE ctas_aocs;
 -- Github Issue 10760
 -- Previously CTAS with Initplan will dispatch oids twice
 -- start_ignore
 DROP TABLE IF EXISTS t1_github_issue_10760;
 NOTICE:  table "t1_github_issue_10760" does not exist, skipping
 DROP TABLE IF EXISTS t2_github_issue_10760;
 NOTICE:  table "t2_github_issue_10760" does not exist, skipping
 create table t1_github_issue_10760(a int, b int) distributed randomly;
 -- end_ignore
 -- Because there is no Initplan in ORCA of this test case, there is no
 -- 10760 problem in ORCA. So here manually set the optimizer to
 -- ensure that there is Initplan in the execution plan.
 set optimizer=off;
 explain select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760);
                                                          QUERY PLAN
 -----------------------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=392.82..1168.23 rows=28700 width=8)
    InitPlan 1 (returns $0)  (slice2)
      ->  Finalize Aggregate  (cost=392.81..392.82 rows=1 width=8)
            ->  Gather Motion 3:1  (slice3; segments: 3)  (cost=392.75..392.80 rows=3 width=8)
                  ->  Partial Aggregate  (cost=392.75..392.76 rows=1 width=8)
                        ->  Seq Scan on t1_github_issue_10760 t1_github_issue_10760_1  (cost=0.00..321.00 rows=28700 width=0)
    ->  Seq Scan on t1_github_issue_10760  (cost=0.00..392.75 rows=9567 width=8)
          Filter: (b > $0)
  Optimizer: Postgres query optimizer
 (9 rows)

 begin;
         create table t2_github_issue_10760 as select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760) distributed randomly;
         drop table t2_github_issue_10760;
         create table t2_github_issue_10760 as select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760) distributed randomly;
 end;
 select count (distinct oid) from  (select oid from pg_class where relname = 't2_github_issue_10760' union all select oid from gp_dist_random('pg_class') where relname = 't2_github_issue_10760')x;
  count
 -------
      1
 (1 row)

 drop table t1_github_issue_10760;
 drop table t2_github_issue_10760;
 reset optimizer;
 -- CTAS with no data will not lead to catalog inconsistent
 -- See github issue: https://github.com/greenplum-db/gpdb/issues/11999
 create or replace function mv_action_select_issue_11999() returns bool language sql as
 'declare c cursor for select 1/0; select true';
 create materialized view sro_mv_issue_11999 as select mv_action_select_issue_11999() with no data;
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mv_action_select_issue_11999' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table t_sro_mv_issue_11999 as select mv_action_select_issue_11999() with no data;
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mv_action_select_issue_11999' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 select count(*)
 from
 (
   select localoid, policytype, numsegments, distkey
   from gp_distribution_policy
   where localoid::regclass::text = 'sro_mv_issue_11999' or
         localoid::regclass::text = 't_sro_mv_issue_11999'
   union all
   select localoid, policytype, numsegments, distkey
   from gp_dist_random('gp_distribution_policy')
   where localoid::regclass::text = 'sro_mv_issue_11999' or
         localoid::regclass::text = 't_sro_mv_issue_11999'
 )x;
  count
 -------
      8
 (1 row)

 select count(distinct (localoid, policytype, numsegments, distkey))
 from
 (
   select localoid, policytype, numsegments, distkey
   from gp_distribution_policy
   where localoid::regclass::text = 'sro_mv_issue_11999' or
         localoid::regclass::text = 't_sro_mv_issue_11999'
   union all
   select localoid, policytype, numsegments, distkey
   from gp_dist_random('gp_distribution_policy')
   where localoid::regclass::text = 'sro_mv_issue_11999' or
         localoid::regclass::text = 't_sro_mv_issue_11999'
 )x;
  count
 -------
      2
 (1 row)

 -- then refresh should error out
 refresh materialized view sro_mv_issue_11999;
 ERROR:  division by zero
 CONTEXT:  SQL function "mv_action_select_issue_11999" statement 1
 -- Test CTAS + initplan, and an exception was raised in preprocess_initplans
 CREATE OR REPLACE FUNCTION public.exception_func()
  RETURNS refcursor
  LANGUAGE plpgsql
 AS $function$declare cname refcursor = 'result'; begin open cname for select 1; raise sqlstate '02000'; return cname; exception when sqlstate '02000' then  return cname; end;$function$;
 SELECT exception_func() INTO TEMPORARY test_tmp1;
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 SELECT * FROM test_tmp1;
  exception_func
 ----------------
  result
 (1 row)

 CREATE TEMPORARY TABLE test_tmp2 AS SELECT exception_func();
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 SELECT * FROM test_tmp2;
  exception_func
 ----------------
  result
 (1 row)

 DROP FUNCTION public.exception_func();
 DROP TABLE test_tmp1;
 DROP TABLE test_tmp2;
 reset optimizer_trace_fallback;
	set optimizer_print_missing_stats = off;
	set optimizer_trace_fallback = on;
	drop table if exists ctas_src;
	NOTICE: table "ctas_src" does not exist, skipping
	drop table if exists ctas_dst;
	NOTICE: table "ctas_dst" does not exist, skipping
	create table ctas_src (domain integer, class integer, attr text, value integer) distributed by (domain);
	insert into ctas_src values(1, 1, 'A', 1);
	insert into ctas_src values(2, 1, 'A', 0);
	insert into ctas_src values(3, 0, 'B', 1);
	-- MPP-2859
	explain (costs off) create table ctas_dst as
	SELECT attr, class, (select count(distinct class) from ctas_src) as dclass FROM ctas_src GROUP BY attr, class distributed by (attr);
	QUERY PLAN
	--------------------------------------------------------------------------------
	HashAggregate
	Group Key: ctas_src.attr, ctas_src.class
	InitPlan 1 (returns $0) (slice2)
	-> Finalize Aggregate
	-> Gather Motion 3:1 (slice3; segments: 3)
	-> Partial Aggregate
	-> HashAggregate
	Group Key: ctas_src_1.class
	-> Redistribute Motion 3:3 (slice4; segments: 3)
	Hash Key: ctas_src_1.class
	-> Streaming HashAggregate
	Group Key: ctas_src_1.class
	-> Seq Scan on ctas_src ctas_src_1
	-> Redistribute Motion 3:3 (slice1; segments: 3)
	Hash Key: ctas_src.attr
	-> HashAggregate
	Group Key: ctas_src.attr, ctas_src.class
	-> Seq Scan on ctas_src
	Optimizer: Postgres query optimizer
	(19 rows)

	create table ctas_dst as
	SELECT attr, class, (select count(distinct class) from ctas_src) as dclass FROM ctas_src GROUP BY attr, class distributed by (attr);
	select * from ctas_dst;
	attr \| class \| dclass
	------+-------+--------
	B \| 0 \| 2
	A \| 1 \| 2
	(2 rows)

	drop table ctas_dst;
	explain (costs off) create table ctas_dst as
	SELECT attr, class, (select max(class) from ctas_src) as maxclass FROM ctas_src GROUP BY attr, class distributed by (attr);
	QUERY PLAN
	-----------------------------------------------------------
	HashAggregate
	Group Key: ctas_src.attr, ctas_src.class
	InitPlan 1 (returns $0) (slice2)
	-> Finalize Aggregate
	-> Gather Motion 3:1 (slice3; segments: 3)
	-> Partial Aggregate
	-> Seq Scan on ctas_src ctas_src_1
	-> Redistribute Motion 3:3 (slice1; segments: 3)
	Hash Key: ctas_src.attr
	-> HashAggregate
	Group Key: ctas_src.attr, ctas_src.class
	-> Seq Scan on ctas_src
	Optimizer: Postgres query optimizer
	(13 rows)

	create table ctas_dst as
	SELECT attr, class, (select max(class) from ctas_src) as maxclass FROM ctas_src GROUP BY attr, class distributed by (attr);
	select * from ctas_dst;
	attr \| class \| maxclass
	------+-------+----------
	B \| 0 \| 1
	A \| 1 \| 1
	(2 rows)

	drop table ctas_dst;
	explain (costs off) create table ctas_dst as
	SELECT attr, class, (select count(distinct class) from ctas_src) as dclass, (select max(class) from ctas_src) as maxclass, (select min(class) from ctas_src) as minclass FROM ctas_src GROUP BY attr, class distributed by (attr);
	QUERY PLAN
	--------------------------------------------------------------------------------
	HashAggregate
	Group Key: ctas_src.attr, ctas_src.class
	InitPlan 1 (returns $0) (slice2)
	-> Finalize Aggregate
	-> Gather Motion 3:1 (slice3; segments: 3)
	-> Partial Aggregate
	-> HashAggregate
	Group Key: ctas_src_1.class
	-> Redistribute Motion 3:3 (slice4; segments: 3)
	Hash Key: ctas_src_1.class
	-> Streaming HashAggregate
	Group Key: ctas_src_1.class
	-> Seq Scan on ctas_src ctas_src_1
	InitPlan 2 (returns $1) (slice5)
	-> Finalize Aggregate
	-> Gather Motion 3:1 (slice6; segments: 3)
	-> Partial Aggregate
	-> Seq Scan on ctas_src ctas_src_2
	InitPlan 3 (returns $2) (slice7)
	-> Finalize Aggregate
	-> Gather Motion 3:1 (slice8; segments: 3)
	-> Partial Aggregate
	-> Seq Scan on ctas_src ctas_src_3
	-> Redistribute Motion 3:3 (slice1; segments: 3)
	Hash Key: ctas_src.attr
	-> HashAggregate
	Group Key: ctas_src.attr, ctas_src.class
	-> Seq Scan on ctas_src
	Optimizer: Postgres query optimizer
	(29 rows)

	create table ctas_dst as
	SELECT attr, class, (select count(distinct class) from ctas_src) as dclass, (select max(class) from ctas_src) as maxclass, (select min(class) from ctas_src) as minclass FROM ctas_src GROUP BY attr, class distributed by (attr);
	select * from ctas_dst;
	attr \| class \| dclass \| maxclass \| minclass
	------+-------+--------+----------+----------
	A \| 1 \| 2 \| 1 \| 0
	B \| 0 \| 2 \| 1 \| 0
	(2 rows)

	-- MPP-4298: "unknown" datatypes.
	drop table if exists ctas_foo;
	NOTICE: table "ctas_foo" does not exist, skipping
	drop table if exists ctas_bar;
	NOTICE: table "ctas_bar" does not exist, skipping
	drop table if exists ctas_baz;
	NOTICE: table "ctas_baz" does not exist, skipping
	create table ctas_foo as select * from generate_series(1, 100);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
	create table ctas_bar as select a.generate_series as a, b.generate_series as b from ctas_foo a, ctas_foo b;
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
	create table ctas_baz as select 'delete me' as action, * from ctas_bar distributed by (a);
	-- "action" becomes text
	\d ctas_baz
	Table "public.ctas_baz"
	Column \| Type \| Collation \| Nullable \| Default
	--------+---------+-----------+----------+---------
	action \| text \| \| \|
	a \| integer \| \| \|
	b \| integer \| \| \|
	Distributed by: (a)

	select action, b from ctas_baz order by 1,2 limit 5;
	action \| b
	-----------+---
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	(5 rows)

	select action, b from ctas_baz order by 2 limit 5;
	action \| b
	-----------+---
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	(5 rows)

	select action::text, b from ctas_baz order by 1,2 limit 5;
	action \| b
	-----------+---
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	(5 rows)

	alter table ctas_baz alter column action type text;
	\d ctas_baz
	Table "public.ctas_baz"
	Column \| Type \| Collation \| Nullable \| Default
	--------+---------+-----------+----------+---------
	action \| text \| \| \|
	a \| integer \| \| \|
	b \| integer \| \| \|
	Distributed by: (a)

	select action, b from ctas_baz order by 1,2 limit 5;
	action \| b
	-----------+---
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	(5 rows)

	select action, b from ctas_baz order by 2 limit 5;
	action \| b
	-----------+---
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	(5 rows)

	select action::text, b from ctas_baz order by 1,2 limit 5;
	action \| b
	-----------+---
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	delete me \| 1
	(5 rows)

	-- Test CTAS with a function that executes another query that's dispatched.
	-- Once upon a time, we had a bug in dispatching the table's OID in this
	-- scenario.
	create table ctas_input(x int);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	insert into ctas_input select * from generate_series(1, 10);
	CREATE FUNCTION ctas_inputArray() RETURNS INT[] AS $$
	DECLARE theArray INT[];
	BEGIN
	SELECT array(SELECT * FROM ctas_input ORDER BY x) INTO theArray;
	RETURN theArray;
	--EXCEPTION WHEN OTHERS THEN RAISE NOTICE 'Catching the exception ...%', SQLERRM;
	END;
	$$ LANGUAGE plpgsql;
	create table ctas_output as select ctas_inputArray()::int[] as x;
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	-- Test CTAS with VALUES.
	CREATE TEMP TABLE yolo(i, j, k) AS (VALUES (0,0,0), (1, NULL, 0), (2, NULL, 0), (3, NULL, 0)) DISTRIBUTED BY (i);
	--
	-- Test that the rows are distributed correctly in CTAS, even if the query
	-- has an ORDER BY. This used to tickle a bug at one point.
	--
	DROP TABLE IF EXISTS ctas_src, ctas_dst;
	CREATE TABLE ctas_src(
	col1 int4,
	col2 decimal,
	col3 char,
	col4 boolean,
	col5 int
	) DISTRIBUTED by(col4);
	-- I'm not sure why, but dropping a column was crucial to tickling the
	-- original bug.
	ALTER TABLE ctas_src DROP COLUMN col2;
	INSERT INTO ctas_src(col1, col3,col4,col5)
	SELECT g, 'a',True,g from generate_series(1,5) g;
	CREATE TABLE ctas_dst as SELECT col1,col3,col4,col5 FROM ctas_src order by 1;
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
	-- This will fail to find some of the rows, if they're distributed incorrectly.
	SELECT * FROM ctas_src, ctas_dst WHERE ctas_src.col1 = ctas_dst.col1;
	col1 \| col3 \| col4 \| col5 \| col1 \| col3 \| col4 \| col5
	------+------+------+------+------+------+------+------
	5 \| a \| t \| 5 \| 5 \| a \| t \| 5
	2 \| a \| t \| 2 \| 2 \| a \| t \| 2
	3 \| a \| t \| 3 \| 3 \| a \| t \| 3
	4 \| a \| t \| 4 \| 4 \| a \| t \| 4
	1 \| a \| t \| 1 \| 1 \| a \| t \| 1
	(5 rows)

	-- Github Issue 9365: https://github.com/greenplum-db/gpdb/issues/9365
	-- Previously, the following CTAS case miss dispatching OIDs to QEs, which leads to
	-- errors.
	CREATE OR REPLACE FUNCTION array_unnest_2d_to_1d(
	x ANYARRAY,
	OUT unnest_row_id INT,
	OUT unnest_result ANYARRAY
	)
	RETURNS SETOF RECORD
	AS
	$BODY$
	SELECT t2.r::int, array_agg($1[t2.r][t2.c] order by t2.c) FROM
	(
	SELECT generate_series(array_lower($1,2),array_upper($1,2)) as c, t1.r
	FROM
	(
	SELECT generate_series(array_lower($1,1),array_upper($1,1)) as r
	) t1
	) t2
	GROUP BY t2.r
	$BODY$ LANGUAGE SQL IMMUTABLE
	;
	DROP TABLE IF EXISTS unnest_2d_tbl01;
	NOTICE: table "unnest_2d_tbl01" does not exist, skipping
	CREATE TABLE unnest_2d_tbl01 (id INT, val DOUBLE PRECISION[][]);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	INSERT INTO unnest_2d_tbl01 VALUES
	(1, ARRAY[[1::float8,2],[3::float8,4],[5::float8,6],[7::float8,8]]),
	(2, ARRAY[[101::float8,202],[303::float8,404],[505::float8,606]])
	;
	DROP TABLE IF EXISTS unnest_2d_tbl01_out;
	NOTICE: table "unnest_2d_tbl01_out" does not exist, skipping
	-- The following CTAS fails previously, see Github Issue 9365
	CREATE TABLE unnest_2d_tbl01_out AS
	SELECT id, (array_unnest_2d_to_1d(val)).* FROM unnest_2d_tbl01;
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	-- Github issue 9790.
	-- Previously, CTAS with no data won't handle the 'WITH' clause
	CREATE TABLE ctas_base(a int, b int);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	CREATE TABLE ctas_aocs WITH (appendonly=true, orientation=column) AS SELECT * FROM ctas_base WITH NO DATA;
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
	SELECT * FROM ctas_aocs;
	a \| b
	---+---
	(0 rows)

	DROP TABLE ctas_base;
	DROP TABLE ctas_aocs;
	-- Github Issue 10760
	-- Previously CTAS with Initplan will dispatch oids twice
	-- start_ignore
	DROP TABLE IF EXISTS t1_github_issue_10760;
	NOTICE: table "t1_github_issue_10760" does not exist, skipping
	DROP TABLE IF EXISTS t2_github_issue_10760;
	NOTICE: table "t2_github_issue_10760" does not exist, skipping
	create table t1_github_issue_10760(a int, b int) distributed randomly;
	-- end_ignore
	-- Because there is no Initplan in ORCA of this test case, there is no
	-- 10760 problem in ORCA. So here manually set the optimizer to
	-- ensure that there is Initplan in the execution plan.
	set optimizer=off;
	explain select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760);
	QUERY PLAN
	-----------------------------------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=392.82..1168.23 rows=28700 width=8)
	InitPlan 1 (returns $0) (slice2)
	-> Finalize Aggregate (cost=392.81..392.82 rows=1 width=8)
	-> Gather Motion 3:1 (slice3; segments: 3) (cost=392.75..392.80 rows=3 width=8)
	-> Partial Aggregate (cost=392.75..392.76 rows=1 width=8)
	-> Seq Scan on t1_github_issue_10760 t1_github_issue_10760_1 (cost=0.00..321.00 rows=28700 width=0)
	-> Seq Scan on t1_github_issue_10760 (cost=0.00..392.75 rows=9567 width=8)
	Filter: (b > $0)
	Optimizer: Postgres query optimizer
	(9 rows)

	begin;
	create table t2_github_issue_10760 as select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760) distributed randomly;
	drop table t2_github_issue_10760;
	create table t2_github_issue_10760 as select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760) distributed randomly;
	end;
	select count (distinct oid) from (select oid from pg_class where relname = 't2_github_issue_10760' union all select oid from gp_dist_random('pg_class') where relname = 't2_github_issue_10760')x;
	count
	-------
	1
	(1 row)

	drop table t1_github_issue_10760;
	drop table t2_github_issue_10760;
	reset optimizer;
	-- CTAS with no data will not lead to catalog inconsistent
	-- See github issue: https://github.com/greenplum-db/gpdb/issues/11999
	create or replace function mv_action_select_issue_11999() returns bool language sql as
	'declare c cursor for select 1/0; select true';
	create materialized view sro_mv_issue_11999 as select mv_action_select_issue_11999() with no data;
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mv_action_select_issue_11999' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	create table t_sro_mv_issue_11999 as select mv_action_select_issue_11999() with no data;
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mv_action_select_issue_11999' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	select count(*)
	from
	(
	select localoid, policytype, numsegments, distkey
	from gp_distribution_policy
	where localoid::regclass::text = 'sro_mv_issue_11999' or
	localoid::regclass::text = 't_sro_mv_issue_11999'
	union all
	select localoid, policytype, numsegments, distkey
	from gp_dist_random('gp_distribution_policy')
	where localoid::regclass::text = 'sro_mv_issue_11999' or
	localoid::regclass::text = 't_sro_mv_issue_11999'
	)x;
	count
	-------
	8
	(1 row)

	select count(distinct (localoid, policytype, numsegments, distkey))
	from
	(
	select localoid, policytype, numsegments, distkey
	from gp_distribution_policy
	where localoid::regclass::text = 'sro_mv_issue_11999' or
	localoid::regclass::text = 't_sro_mv_issue_11999'
	union all
	select localoid, policytype, numsegments, distkey
	from gp_dist_random('gp_distribution_policy')
	where localoid::regclass::text = 'sro_mv_issue_11999' or
	localoid::regclass::text = 't_sro_mv_issue_11999'
	)x;
	count
	-------
	2
	(1 row)

	-- then refresh should error out
	refresh materialized view sro_mv_issue_11999;
	ERROR: division by zero
	CONTEXT: SQL function "mv_action_select_issue_11999" statement 1
	-- Test CTAS + initplan, and an exception was raised in preprocess_initplans
	CREATE OR REPLACE FUNCTION public.exception_func()
	RETURNS refcursor
	LANGUAGE plpgsql
	AS $function$declare cname refcursor = 'result'; begin open cname for select 1; raise sqlstate '02000'; return cname; exception when sqlstate '02000' then return cname; end;$function$;
	SELECT exception_func() INTO TEMPORARY test_tmp1;
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	SELECT * FROM test_tmp1;
	exception_func
	----------------
	result
	(1 row)

	CREATE TEMPORARY TABLE test_tmp2 AS SELECT exception_func();
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	SELECT * FROM test_tmp2;
	exception_func
	----------------
	result
	(1 row)

	DROP FUNCTION public.exception_func();
	DROP TABLE test_tmp1;
	DROP TABLE test_tmp2;
	reset optimizer_trace_fallback;