src/test/regress/expected/subselect_gp2.out - cloudberry - Git at Google

 -- Test using an external table in a subquery.
 --
 -- We used to have a bug where the scan on the external table was not
 -- broadcast to all nodes, so each segment scanned only its own portion
 -- of the external table, when the scan was in a subquery. In that case,
 -- the count(*) calculated for each value below was 1, but it should be
 -- equal to the number of segments, because this external table produces
 -- the same rows on every segment.
 CREATE EXTERNAL WEB TABLE echotable (c1 int, c2 int, c3 int) EXECUTE
 'echo "1,2,3"; echo "4,5,6";' FORMAT 'TEXT' (DELIMITER ',');
 create table test_ext_foo (c1 int, c2 int4);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into test_ext_foo select g, g from generate_series(1, 20) g;
 -- This should return 2 and 5, as the two rows are duplicated in
 -- every segment (assuming you have at least two segments in your
 -- cluster).
 select c2 from echotable group by c2 having count(*) >= 2;
  c2
 ----
   2
   5
 (2 rows)

 select * from test_ext_foo as o
 where (select count(*) from echotable as i where i.c2 = o.c2) >= 2;
  c1 | c2
 ----+----
   5 |  5
   2 |  2
 (2 rows)

 -- Planner test to make sure the initplan is not removed for function scan
 explain select * from generate_series(1,2) s1 join pg_class on true where s1=(select pg_backend_pid());
                                   QUERY PLAN
 ------------------------------------------------------------------------------
  Nested Loop  (cost=10000000000.01..10000000027.63 rows=456 width=206)
    InitPlan 1 (returns $0)
      ->  Result  (cost=0.00..0.01 rows=1 width=0)
    ->  Function Scan on generate_series s1  (cost=0.00..12.50 rows=1 width=4)
          Filter: (s1 = $0)
    ->  Seq Scan on pg_class  (cost=0.00..10.56 rows=456 width=202)
  Optimizer: Postgres query optimizer
 (7 rows)

 -- Planner test: constant folding in subplan testexpr  produces no error
 create table subselect_t1 (a int, b int, c int) distributed by (a);
 create table subselect_t2 (a int, b int, c int) distributed by (a);
 insert into subselect_t1 values (1,1,1);
 insert into subselect_t2 values (1,1,1);
 select * from subselect_t1 where NULL in (select c from subselect_t2);
  a | b | c
 ---+---+---
 (0 rows)

 select * from subselect_t1 where NULL in (select c from subselect_t2) and exists (select generate_series(1,2));
  a | b | c
 ---+---+---
 (0 rows)

 -- Planner test to make sure initplan is removed when no param is used
 select * from subselect_t2 where false and exists (select generate_series(1,2));
  a | b | c
 ---+---+---
 (0 rows)

 --
 -- Test running Init Plans in a query that runs in a function in a QE.
 -- Init Plans get special treatment in the QD, for queries that are
 -- dispatched. The point of this test is to make sure the Init Plans work
 -- correctly when they *don't* need the special treatment, in local queries
 -- in QEs.
 --
 create temp table datetab (start timestamp, stop timestamp);
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'start' as the Apache Cloudberry data distribution key for this table.
 insert into datetab values ('2019-01-01', '2019-01-10');
 -- A function, that contains a query with a subquery that can be turned into
 -- an Init Plan.
 create or replace function number_of_days(start timestamp, stop timestamp) returns text
 as $$
 declare
   result text;
 begin
   result := 'full days: ' || (select count(g) from generate_series(start, stop, '1 day') g)::text;

   return result;
 end;
 $$ language plpgsql;
 -- Run the function in QEs.
 select number_of_days(start, stop) from datetab;
  number_of_days
 ----------------
  full days: 10
 (1 row)

 -- Check delay eager free in squelch functions
 CREATE TABLE subselect2_foo (a int, b int);
 CREATE TABLE subselect2_bar (c int, d int);
 CREATE TABLE subselect2_baz (x int, y int);
 INSERT INTO subselect2_foo VALUES (1,1), (1,2);
 INSERT INTO subselect2_bar VALUES (1,1);
 SELECT *, (SELECT x FROM subselect2_baz EXCEPT SELECT c FROM subselect2_bar WHERE d = a) FROM subselect2_foo;
  a | b | x
 ---+---+---
  1 | 1 |
  1 | 2 |
 (2 rows)
	-- Test using an external table in a subquery.
	--
	-- We used to have a bug where the scan on the external table was not
	-- broadcast to all nodes, so each segment scanned only its own portion
	-- of the external table, when the scan was in a subquery. In that case,
	-- the count(*) calculated for each value below was 1, but it should be
	-- equal to the number of segments, because this external table produces
	-- the same rows on every segment.
	CREATE EXTERNAL WEB TABLE echotable (c1 int, c2 int, c3 int) EXECUTE
	'echo "1,2,3"; echo "4,5,6";' FORMAT 'TEXT' (DELIMITER ',');
	create table test_ext_foo (c1 int, c2 int4);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	insert into test_ext_foo select g, g from generate_series(1, 20) g;
	-- This should return 2 and 5, as the two rows are duplicated in
	-- every segment (assuming you have at least two segments in your
	-- cluster).
	select c2 from echotable group by c2 having count(*) >= 2;
	c2
	----
	2
	5
	(2 rows)

	select * from test_ext_foo as o
	where (select count(*) from echotable as i where i.c2 = o.c2) >= 2;
	c1 \| c2
	----+----
	5 \| 5
	2 \| 2
	(2 rows)

	-- Planner test to make sure the initplan is not removed for function scan
	explain select * from generate_series(1,2) s1 join pg_class on true where s1=(select pg_backend_pid());
	QUERY PLAN
	------------------------------------------------------------------------------
	Nested Loop (cost=10000000000.01..10000000027.63 rows=456 width=206)
	InitPlan 1 (returns $0)
	-> Result (cost=0.00..0.01 rows=1 width=0)
	-> Function Scan on generate_series s1 (cost=0.00..12.50 rows=1 width=4)
	Filter: (s1 = $0)
	-> Seq Scan on pg_class (cost=0.00..10.56 rows=456 width=202)
	Optimizer: Postgres query optimizer
	(7 rows)

	-- Planner test: constant folding in subplan testexpr produces no error
	create table subselect_t1 (a int, b int, c int) distributed by (a);
	create table subselect_t2 (a int, b int, c int) distributed by (a);
	insert into subselect_t1 values (1,1,1);
	insert into subselect_t2 values (1,1,1);
	select * from subselect_t1 where NULL in (select c from subselect_t2);
	a \| b \| c
	---+---+---
	(0 rows)

	select * from subselect_t1 where NULL in (select c from subselect_t2) and exists (select generate_series(1,2));
	a \| b \| c
	---+---+---
	(0 rows)

	-- Planner test to make sure initplan is removed when no param is used
	select * from subselect_t2 where false and exists (select generate_series(1,2));
	a \| b \| c
	---+---+---
	(0 rows)

	--
	-- Test running Init Plans in a query that runs in a function in a QE.
	-- Init Plans get special treatment in the QD, for queries that are
	-- dispatched. The point of this test is to make sure the Init Plans work
	-- correctly when they don't need the special treatment, in local queries
	-- in QEs.
	--
	create temp table datetab (start timestamp, stop timestamp);
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'start' as the Apache Cloudberry data distribution key for this table.
	insert into datetab values ('2019-01-01', '2019-01-10');
	-- A function, that contains a query with a subquery that can be turned into
	-- an Init Plan.
	create or replace function number_of_days(start timestamp, stop timestamp) returns text
	as $$
	declare
	result text;
	begin
	result := 'full days: ' \|\| (select count(g) from generate_series(start, stop, '1 day') g)::text;

	return result;
	end;
	$$ language plpgsql;
	-- Run the function in QEs.
	select number_of_days(start, stop) from datetab;
	number_of_days
	----------------
	full days: 10
	(1 row)

	-- Check delay eager free in squelch functions
	CREATE TABLE subselect2_foo (a int, b int);
	CREATE TABLE subselect2_bar (c int, d int);
	CREATE TABLE subselect2_baz (x int, y int);
	INSERT INTO subselect2_foo VALUES (1,1), (1,2);
	INSERT INTO subselect2_bar VALUES (1,1);
	SELECT *, (SELECT x FROM subselect2_baz EXCEPT SELECT c FROM subselect2_bar WHERE d = a) FROM subselect2_foo;
	a \| b \| x
	---+---+---
	1 \| 1 \|
	1 \| 2 \|
	(2 rows)