src/test/regress/expected/qp_subquery_optimizer.out - cloudberry - Git at Google

 create schema qp_subquery;
 set search_path to qp_subquery;
 set optimizer_trace_fallback to on;
 begin;
 CREATE TABLE SUBSELECT_TBL1 (f1 integer, f2 integer, f3 float);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 INSERT INTO SUBSELECT_TBL1 VALUES (1, 2, 3);
 INSERT INTO SUBSELECT_TBL1 VALUES (2, 3, 4);
 INSERT INTO SUBSELECT_TBL1 VALUES (3, 4, 5);
 INSERT INTO SUBSELECT_TBL1 VALUES (1, 1, 1);
 INSERT INTO SUBSELECT_TBL1 VALUES (2, 2, 2);
 INSERT INTO SUBSELECT_TBL1 VALUES (3, 3, 3);
 INSERT INTO SUBSELECT_TBL1 VALUES (6, 7, 8);
 INSERT INTO SUBSELECT_TBL1 VALUES (8, 9, NULL);
 commit;
 SELECT '' AS eight, * FROM SUBSELECT_TBL1 ORDER BY 2,3,4;
  eight | f1 | f2 | f3
 -------+----+----+----
        |  1 |  1 |  1
        |  1 |  2 |  3
        |  2 |  2 |  2
        |  2 |  3 |  4
        |  3 |  3 |  3
        |  3 |  4 |  5
        |  6 |  7 |  8
        |  8 |  9 |
 (8 rows)


 SELECT '' AS two, f1 AS "Constant Select" FROM SUBSELECT_TBL1
  					 WHERE f1 IN (SELECT 1) ORDER BY 2;
  two | Constant Select
 -----+-----------------
      |               1
      |               1
 (2 rows)


 -- order 2
 SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL1
 					  WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL1) ORDER BY 2;
  six | Uncorrelated Field
 -----+--------------------
      |                  1
      |                  1
      |                  2
      |                  2
      |                  3
      |                  3
 (6 rows)


 -- order 2
 SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL1
 				 WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL1 WHERE
 				   f2 IN (SELECT f1 FROM SUBSELECT_TBL1)) ORDER BY 2;
  six | Uncorrelated Field
 -----+--------------------
      |                  1
      |                  1
      |                  2
      |                  2
      |                  3
      |                  3
 (6 rows)


 -- order 2,3
 SELECT '' AS three, f1, f2
   				FROM SUBSELECT_TBL1
   				WHERE (f1, f2) NOT IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL1
                          	WHERE f3 IS NOT NULL) ORDER BY 2,3;
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  three | f1 | f2
 -------+----+----
        |  1 |  2
        |  6 |  7
        |  8 |  9
 (3 rows)


 SELECT 1 AS one WHERE 1 IN (SELECT 1);
  one
 -----
    1
 (1 row)


 SELECT 1 AS zero WHERE 1 IN (SELECT 2);
  zero
 ------
 (0 rows)


 SELECT 1 AS zero WHERE 1 NOT IN (SELECT 1);
  zero
 ------
 (0 rows)


 SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field"
                                 FROM SUBSELECT_TBL1 upper
                                 WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL1 WHERE f1 = upper.f1);
  six | Correlated Field | Second Field
 -----+------------------+--------------
      |                1 |            2
      |                2 |            3
      |                1 |            1
      |                2 |            2
      |                3 |            4
      |                3 |            3
 (6 rows)

 SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field"
                                 FROM SUBSELECT_TBL1 upper
                                 WHERE f1 IN
                                 (SELECT f2 FROM SUBSELECT_TBL1 WHERE CAST(upper.f2 AS float) = f3);
  six | Correlated Field | Second Field
 -----+------------------+--------------
      |                2 |            4
      |                1 |            1
      |                2 |            2
      |                3 |            5
      |                3 |            3
 (5 rows)

 SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field"
                                 FROM SUBSELECT_TBL1 upper
                                 WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL1
                                 WHERE f2 = CAST(f3 AS integer));
  six | Correlated Field | Second Field
 -----+------------------+--------------
      |                1 |            3
      |                2 |            4
      |                3 |            5
      |                6 |            8
 (4 rows)

 SELECT '' AS five, f1 AS "Correlated Field"
                                 FROM SUBSELECT_TBL1
                                 WHERE (f1, f2) IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL1
                                 WHERE f3 IS NOT NULL);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  five | Correlated Field
 ------+------------------
       |                3
       |                3
       |                2
       |                1
       |                2
 (5 rows)

 begin;
 create table join_tab1 ( i integer, j integer, t text);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 INSERT INTO join_tab1 VALUES (1, 4, 'one');
 ANALYZE join_tab1;
 INSERT INTO join_tab1 VALUES (2, 3, 'two');
 INSERT INTO join_tab1 VALUES (3, 2, 'three');
 INSERT INTO join_tab1 VALUES (4, 1, 'four');
 INSERT INTO join_tab1 VALUES (5, 0, 'five');
 INSERT INTO join_tab1 VALUES (6, 6, 'six');
 INSERT INTO join_tab1  VALUES (7, 7, 'seven');
 INSERT INTO join_tab1 VALUES (8, 8, 'eight');
 INSERT INTO join_tab1 VALUES (0, NULL, 'zero');
 INSERT INTO join_tab1 VALUES (NULL, NULL, 'null');
 INSERT INTO join_tab1 VALUES (NULL, 0, 'zero');
 create table join_tab2 ( i integer, k integer);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 INSERT INTO join_tab2 VALUES (1, -1);
 ANALYZE join_tab2;
 INSERT INTO join_tab2 VALUES (2, 2);
 INSERT INTO join_tab2 VALUES (3, -3);
 INSERT INTO join_tab2 VALUES (2, 4);
 INSERT INTO join_tab2 VALUES (5, -5);
 INSERT INTO join_tab2 VALUES (5, -5);
 INSERT INTO join_tab2 VALUES (0, NULL);
 INSERT INTO join_tab2 VALUES (NULL, NULL);
 INSERT INTO join_tab2 VALUES (NULL, 0);
 commit;
 select * from ( SELECT '' AS "col", * FROM join_tab1 AS tx)A;
  col | i | j |   t
 -----+---+---+-------
      | 1 | 4 | one
      | 2 | 3 | two
      | 0 |   | zero
      | 3 | 2 | three
      | 4 | 1 | four
      | 5 | 0 | five
      | 6 | 6 | six
      | 7 | 7 | seven
      |   |   | null
      |   | 0 | zero
      | 8 | 8 | eight
 (11 rows)


 select * from ( SELECT '' AS "col", * FROM join_tab1 AS tx) AS A;
  col | i | j |   t
 -----+---+---+-------
      | 8 | 8 | eight
      | 1 | 4 | one
      | 2 | 3 | two
      | 0 |   | zero
      | 3 | 2 | three
      | 4 | 1 | four
      | 5 | 0 | five
      | 6 | 6 | six
      | 7 | 7 | seven
      |   |   | null
      |   | 0 | zero
 (11 rows)


 select * from(SELECT '' AS "col", * FROM join_tab1 AS tx) as A(a,b,c);
  a | b | c |   t
 ---+---+---+-------
    | 3 | 2 | three
    | 4 | 1 | four
    | 5 | 0 | five
    | 6 | 6 | six
    | 7 | 7 | seven
    |   |   | null
    |   | 0 | zero
    | 8 | 8 | eight
    | 1 | 4 | one
    | 2 | 3 | two
    | 0 |   | zero
 (11 rows)


 select * from(SELECT '' AS "col", t1.a, t2.e FROM join_tab1 t1 (a, b, c), join_tab2 t2 (d, e)
 				WHERE t1.a = t2.d)as A;
  col | a | e
 -----+---+----
      | 3 | -3
      | 5 | -5
      | 5 | -5
      | 1 | -1
      | 2 |  2
      | 2 |  4
      | 0 |
 (7 rows)


 select * from join_tab1 where exists(select * from join_tab2 where join_tab1.i=join_tab2.i);
  i | j |   t
 ---+---+-------
  3 | 2 | three
  5 | 0 | five
  1 | 4 | one
  2 | 3 | two
  0 |   | zero
 (5 rows)


 select * from join_tab1 where not exists(select * from join_tab2 where join_tab1.i=join_tab2.i) order by i,j;
  i | j |   t
 ---+---+-------
  4 | 1 | four
  6 | 6 | six
  7 | 7 | seven
  8 | 8 | eight
    | 0 | zero
    |   | null
 (6 rows)


 select 25 = any ('{1,2,3,4}');
  ?column?
 ----------
  f
 (1 row)


 select 25 = any ('{1,2,25}');
  ?column?
 ----------
  t
 (1 row)


 select 'abc' = any('{abc,d,e}');
  ?column?
 ----------
  t
 (1 row)


 create table subq_abc(a int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into subq_abc values(1);
 insert into subq_abc values(9);
 insert into subq_abc values(3);
 insert into subq_abc values(6);
 SELECT 9 = any (select * from subq_abc);
  ?column?
 ----------
  t
 (1 row)

 select null::int >= any ('{}');
  ?column?
 ----------
  f
 (1 row)

 select 'abc' = any('{" "}');
  ?column?
 ----------
  f
 (1 row)

 select 33.4 = any (array[1,2,3]);
  ?column?
 ----------
  f
 (1 row)

 select 40 = all ('{3,4,40,10}');
  ?column?
 ----------
  f
 (1 row)

 select 55 >= all ('{1,2,55}');
  ?column?
 ----------
  t
 (1 row)

 select 25 = all ('{25,25,25}');
  ?column?
 ----------
  t
 (1 row)

 select 'abc' = all('{abc}');
  ?column?
 ----------
  t
 (1 row)

 select 'abc' = all('{abc,d,e}');
  ?column?
 ----------
  f
 (1 row)

 select 'abc' = all('{"abc"}');
  ?column?
 ----------
  t
 (1 row)

 select 'abc' = all('{" "}');
  ?column?
 ----------
  f
 (1 row)

 select null::int >= all ('{1,2,33}');
  ?column?
 ----------

 (1 row)

 select null::int >= all ('{}');
  ?column?
 ----------
  t
 (1 row)

 select 33.4 > all (array[1,2,3]);
  ?column?
 ----------
  t
 (1 row)

 create table emp_list(empid int,name char(20),sal float);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'empid' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into emp_list values(1,'empone',1000);
 insert into emp_list values(2,'emptwo',2000);
 insert into emp_list values(3,'empthree',3000);
 insert into emp_list values(4,'empfour',4000);
 insert into emp_list values(5,'empfive',4000);
 select name from emp_list where sal=(select max(sal) from emp_list);
          name
 ----------------------
  empfour
  empfive
 (2 rows)


 select name from emp_list where sal=(select min(sal) from emp_list);
          name
 ----------------------
  empone
 (1 row)


 select name from emp_list where sal>(select avg(sal) from emp_list);
          name
 ----------------------
  empthree
  empfour
  empfive
 (3 rows)


 select name from emp_list where sal<(select avg(sal) from emp_list);
          name
 ----------------------
  empone
  emptwo
 (2 rows)


 CREATE TABLE subq_test1 (s1 INT, s2 CHAR(5), s3 FLOAT);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 's1' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 INSERT INTO subq_test1 VALUES (1,'1',1.0);
 INSERT INTO subq_test1 VALUES (2,'2',2.0);
 INSERT INTO subq_test1 VALUES (3,'3',3.0);
 INSERT INTO subq_test1 VALUES (4,'4',4.0);
 SELECT sb1,sb2,sb3 FROM (SELECT s1 AS sb1, s2 AS sb2, s3*2 AS sb3 FROM subq_test1) AS sb WHERE sb1 > 1;
  sb1 |  sb2  | sb3
 -----+-------+-----
    2 | 2     |   4
    3 | 3     |   6
    4 | 4     |   8
 (3 rows)


 select to_char(Avg(sum_col1),'9999999.9999999') from (select sum(s1) as sum_col1 from subq_test1 group by s1) as tab1;
      to_char
 ------------------
         2.5000000
 (1 row)


 select g2,count(*) from (select I, count(*) as g2 from join_tab1 group by I) as vtable group by g2;
  g2 | count
 ----+-------
   1 |     9
   2 |     1
 (2 rows)

 begin;
 create table join_tab4 ( i integer, j integer, t text);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into join_tab4 values (1,7,'sunday');
 insert into join_tab4 values (2,6,'monday');
 insert into join_tab4 values (3,5,'tueday');
 insert into join_tab4 values (4,4,'wedday');
 insert into join_tab4 values (5,3,'thuday');
 insert into join_tab4 values (6,2,'friday');
 insert into join_tab4 values (7,1,'satday');
 commit;
 select i,j,t from (select * from (select i,j,t from join_tab1)as dtab1
 				UNION select * from(select i,j,t from join_tab4) as dtab2 )as mtab;
  i | j |   t
 ---+---+--------
  1 | 7 | sunday
  3 | 5 | tueday
  4 | 1 | four
  6 | 2 | friday
    |   | null
  1 | 4 | one
  2 | 6 | monday
  5 | 0 | five
  5 | 3 | thuday
  6 | 6 | six
  7 | 7 | seven
  8 | 8 | eight
    | 0 | zero
  0 |   | zero
  2 | 3 | two
  3 | 2 | three
  4 | 4 | wedday
  7 | 1 | satday
 (18 rows)


 select * from join_tab1 where i = (select i from join_tab4 where t='satday');
  i | j |   t
 ---+---+-------
  7 | 7 | seven
 (1 row)


 select * from join_tab1 where i = (select i from join_tab4);
 ERROR:  one or more assertions failed
 DETAIL:  Expected no more than one row to be returned by expression
 --
 -- Test references to outer query in join quals
 --
 -- Single var
 explain (costs off)
 SELECT (SELECT join_tab1.i - join_tab2.i
         FROM join_tab1, join_tab2 WHERE join_tab1.i = join_tab2.i and out.f1 > 0
         LIMIT 1) as x
 FROM subselect_tbl1 out;
                                       QUERY PLAN
 --------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Seq Scan on subselect_tbl1 "out"
          SubPlan 1
            ->  Result
                  ->  Limit
                        ->  Result
                              One-Time Filter: ("out".f1 > 0)
                              ->  Materialize
                                    ->  Broadcast Motion 3:3  (slice2; segments: 3)
                                          ->  Hash Join
                                                Hash Cond: (join_tab1.i = join_tab2.i)
                                                ->  Seq Scan on join_tab1
                                                ->  Hash
                                                      ->  Seq Scan on join_tab2
  Optimizer: Pivotal Optimizer (GPORCA)
 (15 rows)

 SELECT (SELECT join_tab1.i - join_tab2.i
         FROM join_tab1, join_tab2 WHERE join_tab1.i = join_tab2.i and out.f1 > 0
         LIMIT 1) as x
 FROM subselect_tbl1 out;
  x
 ---
  0
  0
  0
  0
  0
  0
  0
  0
 (8 rows)

 -- Two outer vars
 explain (costs off)
 SELECT (SELECT join_tab1.i - join_tab2.i
         FROM join_tab1, join_tab2 WHERE join_tab1.i = join_tab2.i and out1.i = out2.i
         LIMIT 1) as x
 FROM join_tab1 out1, join_tab2 out2;
                                         QUERY PLAN
 -------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Nested Loop
          Join Filter: true
          ->  Broadcast Motion 3:3  (slice2; segments: 3)
                ->  Seq Scan on join_tab1 out1
          ->  Seq Scan on join_tab2 out2
          SubPlan 1
            ->  Result
                  ->  Limit
                        ->  Result
                              One-Time Filter: (out1.i = out2.i)
                              ->  Materialize
                                    ->  Broadcast Motion 3:3  (slice3; segments: 3)
                                          ->  Hash Join
                                                Hash Cond: (join_tab1.i = join_tab2.i)
                                                ->  Seq Scan on join_tab1
                                                ->  Hash
                                                      ->  Seq Scan on join_tab2
  Optimizer: Pivotal Optimizer (GPORCA)
 (19 rows)

 SELECT (SELECT join_tab1.i - join_tab2.i
         FROM join_tab1, join_tab2 WHERE join_tab1.i = join_tab2.i and out1.i = out2.i
         LIMIT 1) as x
 FROM join_tab1 out1, join_tab2 out2;
  x
 ---


  0
  0


  0


  0


  0

  0


  0


 (99 rows)

 -- Same, in an outer join
 --
 -- NOTE: The order that the rows come out from the subquery is not
 -- deterministic, so we have to use a dummy coalesce() expression that
 -- returns the same result regardless.
 --
 explain (costs off)
 SELECT (SELECT coalesce(join_tab1.i + join_tab2.i, 0) >= 0
         FROM join_tab1 LEFT JOIN join_tab2 ON join_tab1.i = join_tab2.i and out.f1 > 0
         LIMIT 1) as x
 FROM subselect_tbl1 out;
                                           QUERY PLAN
 ----------------------------------------------------------------------------------------------
  Result
    ->  Gather Motion 3:1  (slice1; segments: 3)
          ->  Seq Scan on subselect_tbl1 "out"
    SubPlan 1
      ->  Result
            ->  Limit
                  ->  Nested Loop Left Join
                        Join Filter: ((join_tab1.i = join_tab2.i) AND ("out".f1 > 0))
                        ->  Materialize
                              ->  Gather Motion 3:1  (slice3; segments: 3)
                                    ->  Seq Scan on join_tab1
                        ->  Materialize
                              ->  Gather Motion 3:1  (slice2; segments: 3)
                                    ->  Seq Scan on join_tab2
  Optimizer: Pivotal Optimizer (GPORCA)
 (15 rows)

 SELECT (SELECT coalesce(join_tab1.i + join_tab2.i, 0) >= 0
         FROM join_tab1 LEFT JOIN join_tab2 ON join_tab1.i = join_tab2.i and out.f1 > 0
         LIMIT 1) as x
 FROM subselect_tbl1 out;
  x
 ---
  t
  t
  t
  t
  t
  t
  t
  t
 (8 rows)

 explain (costs off)
 SELECT (SELECT coalesce(join_tab1.i + join_tab2.i, 0) >= 0
         FROM join_tab1 LEFT JOIN join_tab2 ON join_tab1.i = join_tab2.i and out1.i = out2.i
         LIMIT 1) as x
 FROM join_tab1 out1, join_tab2 out2;
                                               QUERY PLAN
 ------------------------------------------------------------------------------------------------------
  Result
    ->  Gather Motion 3:1  (slice1; segments: 3)
          ->  Nested Loop
                Join Filter: true
                ->  Broadcast Motion 3:3  (slice2; segments: 3)
                      ->  Seq Scan on join_tab1 out1
                ->  Seq Scan on join_tab2 out2
    SubPlan 1
      ->  Result
            ->  Limit
                  ->  Nested Loop Left Join
                        Join Filter: ((join_tab1.i = join_tab2.i) AND (out1.i = out2.i))
                        ->  Materialize
                              ->  Gather Motion 3:1  (slice4; segments: 3)
                                    ->  Seq Scan on join_tab1
                        ->  Materialize
                              ->  Gather Motion 3:1  (slice3; segments: 3)
                                    ->  Seq Scan on join_tab2
  Optimizer: Pivotal Optimizer (GPORCA)
 (19 rows)

 SELECT (SELECT coalesce(join_tab1.i + join_tab2.i, 0) >= 0
         FROM join_tab1 LEFT JOIN join_tab2 ON join_tab1.i = join_tab2.i and out1.i = out2.i
         LIMIT 1) as x
 FROM join_tab1 out1, join_tab2 out2;
  x
 ---
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
  t
 (99 rows)

 --
 -- Testing NOT-IN Subquery
 --
 create table Tbl8352_t1(a int, b int) distributed by (a);
 create table Tbl8352_t2(a int, b int) distributed by (a);
 insert into Tbl8352_t1 values(1,null),(null,1),(1,1),(null,null);
 insert into Tbl8352_t2 values(1,1);
 select * from Tbl8352_t1 where (Tbl8352_t1.a,Tbl8352_t1.b) not in (select Tbl8352_t2.a,Tbl8352_t2.b from Tbl8352_t2);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
 (0 rows)

 create table Tbl8352_t1a(a int, b int) distributed by (a);
 create table Tbl8352_t2a(a int, b int) distributed by (a);
 insert into Tbl8352_t1a values(1,2),(3,null),(null,4),(null,null);
 insert into Tbl8352_t2a values(1,2);
 select * from Tbl8352_t1a where (Tbl8352_t1a.a,Tbl8352_t1a.b) not in (select Tbl8352_t2a.a,Tbl8352_t2a.b from Tbl8352_t2a) order by 1,2;
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 |
    | 4
 (2 rows)

 select (1,null::int) not in (select 1,1);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  ?column?
 ----------

 (1 row)

 select (3,null::int) not in (select 1,1);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  ?column?
 ----------
  t
 (1 row)

 begin;
 create table t1(a int, b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table t2(a int, b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table t3(a int, b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table t4(a int, b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into t1 values(1,2);
 analyze t1;
 insert into t1 values(3,4);
 insert into t1 values(5,6);
 insert into t2 values(1,2);
 analyze t2;
 insert into t2 values(3,4);
 insert into t2 values(7,8);
 insert into t3 values(1,2);
 insert into t3 values(3,4);
 insert into t4 values(1,2);
 create table i1(a int, b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table i2(a int, b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into i1 values(1,2);
 analyze i1;
 commit;
 --
 -- not in subquery involving vars from different rels with inner join
 --
 select t1.a, t2.b from t1, t2 where t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
 (1 row)

 select t1.a, t2.b from t1 inner join t2 on  (t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1)));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
 (1 row)

 select t1.a, t2.b from t1 inner join t2 on  (t1.a=t2.a) where ((t1.a,t2.b) not in (select i1.a,i1.b from i1));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
 (1 row)

 -- unsupported case
 explain select t1.a, t2.b from t1, t2 where t1.a=t2.a or ((t1.a,t2.b) not in (select i1.a,i1.b from i1));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
                                            QUERY PLAN
 ------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=10000000000.00..10000000002.10 rows=3 width=8)
    ->  Nested Loop  (cost=10000000000.00..10000000002.05 rows=1 width=8)
          Join Filter: ((t1.a = t2.a) OR (NOT (hashed SubPlan 1)))
          ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..1.03 rows=1 width=4)
                ->  Seq Scan on t1  (cost=0.00..1.01 rows=1 width=4)
          ->  Seq Scan on t2  (cost=0.00..1.01 rows=1 width=8)
          SubPlan 1
            ->  Broadcast Motion 3:3  (slice3; segments: 3)  (cost=0.00..1.01 rows=1 width=8)
                  ->  Seq Scan on i1  (cost=0.00..1.01 rows=1 width=8)
  Optimizer: Postgres query optimizer
 (10 rows)

 --
 -- not in subquery involving vars from different rels with left join.
 --
 select t1.a, t2.b from t1 left join t2 on  (t1.a=t2.a) where ((t1.a,t2.b) not in (select i1.a,i1.b from i1));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 |
 (2 rows)

 select t1.a, t2.b from t1 left join t2 on  (t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1)));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  1 |
  3 | 4
  5 |
 (3 rows)

 --
 -- not in subquery involving vars from different rels with outer join
 --
 select t1.a, t2.b from t1 full outer join t2 on  (t1.a=t2.a) where ((t1.a,t2.b) not in (select i1.a,i1.b from i1));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 |
    | 8
 (3 rows)

 -- not in subquery with a row var in FULL JOIN condition
 select t1.a, t2.b from t1 full outer join t2 on  (t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1)));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 |
    | 8
  1 |
    | 2
 (5 rows)

 --
 -- more complex case
 --
 select t1.a,t2.b from t1 left join (t2 inner join t3 on (t3.a not in (select t4.a from t4))) on (t1.a=t2.a);
  a | b
 ---+---
  1 | 2
  3 | 4
  5 |
 (3 rows)

 begin;
 create table Tbl01(a int, b int, c int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into Tbl01 values(1,2,3);
 insert into Tbl01 values(4,5,6);
 insert into Tbl01 values(7,8,9);
 insert into Tbl01 values(null,11,12);
 create table Tbl03(a int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into Tbl03 values(1),(4);
 create or replace function foo(int) returns int as $$
 	select case when $1 is null then 13::int
 	       	    else null::int
 	       end;
 $$ language sql immutable;
 commit;
 select Tbl01.*,foo(Tbl01.a) as foo from Tbl01; -- showing foo values
  a | b  | c  | foo
 ---+----+----+-----
  1 |  2 |  3 |
  4 |  5 |  6 |
  7 |  8 |  9 |
    | 11 | 12 |  13
 (4 rows)

 select Tbl01.* from Tbl01 where foo(Tbl01.a) not in (select a from Tbl03);
  a | b  | c
 ---+----+----
    | 11 | 12
 (1 row)

 create table Tbl02 as select Tbl01.*,foo(Tbl01.a) as foo from Tbl01;
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 select Tbl02.* from Tbl02 where foo not in (select a from Tbl03);
  a | b  | c  | foo
 ---+----+----+-----
    | 11 | 12 |  13
 (1 row)

 begin;
 create table Tbl04(a int, b int);
 insert into Tbl04 values(1,2),(3,4),(5,6);
 create table Tbl05(a int, b int);
 insert into Tbl05 values(1,2);
 create table Tbl06(a int, b int);
 insert into Tbl06 values(1,2),(3,4);
 create table i3(a int not null, b int not null);
 insert into i3 values(1,2);
 create table Tbl07(a int, b int);
 insert into Tbl07 values(1,2),(3,4),(null,null);
 create table Tbl08(a int, b int);
 insert into Tbl08 values(1,2),(3,4),(null,null);
 create table Tbl09(a int, b int);
 insert into Tbl09 values(1,2),(5,null),(null,8);
 analyze Tbl04;
 analyze Tbl05;
 analyze Tbl06;
 analyze i3;
 analyze Tbl07;
 analyze Tbl08;
 analyze Tbl09;
 commit;
 --
 -- Positive cases: We should be inferring non-nullability of the not-in subquery. This should result in HLASJ.
 --
 -- non-nullability due to inner join
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05,Tbl06 where Tbl05.a=Tbl06.a and Tbl05.b < 10); -- expected: (3,4),(5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 inner join Tbl08 on (Tbl07.a=Tbl08.a and Tbl07.b=Tbl08.b) inner join i3 on (i3.a=Tbl08.a and i3.b=Tbl08.b)); -- expected:(3,4), (5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 -- non-nullability due to where clause condition
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05 where Tbl05.a < 2 and Tbl05.b < 10); -- expected: (3,4), (5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 left join Tbl08 on (Tbl07.a=Tbl08.a) where Tbl07.a = 1 and Tbl07.b = 2); -- expected: (3,4),(5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 -- not null condition in the where clause
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 full outer join Tbl08 on (Tbl07.a=Tbl08.a) where Tbl07.a is not null and Tbl07.b is not null); -- (5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  5 | 6
 (1 row)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 left join Tbl08 on (Tbl07.a=Tbl08.a) where Tbl07.a is not null and Tbl07.b is not null); -- (5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  5 | 6
 (1 row)

 -- or clauses that should lead to non-nullability
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05 where (Tbl05.a < 2 or Tbl05.a > 100) AND (Tbl05.b < 4 or Tbl05.b > 100)); -- expected: (3,4), (5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 -- base-table constraints
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3); -- expected: (3,4),(5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05,i3 where	Tbl05.a = i3.a and	Tbl05.b = i3.b);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05,i3 where Tbl05.a < i3.a and Tbl05.b > i3.b);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
  1 | 2
 (3 rows)

 -- non-null constant values
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select 1,2); -- (3,4),(5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in ((1,2));
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 -- multiple NOT-IN expressions
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl06.a,Tbl06.b from Tbl06) and (Tbl04.a,Tbl04.b) not in (select i3.a, i3.b from i3); -- expected: (5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  5 | 6
 (1 row)

 explain (costs off)
 select Tbl04.* from Tbl04 where not ((Tbl04.a,Tbl04.b) in (select Tbl06.a,Tbl06.b from Tbl06) or (Tbl04.a,Tbl04.b) in (select i3.a, i3.b from i3));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
                                 QUERY PLAN
 --------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Nested Loop Left Anti Semi (Not-In) Join
          Join Filter: ((tbl04.a = i3.a) AND (tbl04.b = i3.b))
          ->  Nested Loop Left Anti Semi (Not-In) Join
                Join Filter: ((tbl04.a = tbl06.a) AND (tbl04.b = tbl06.b))
                ->  Seq Scan on tbl04
                ->  Materialize
                      ->  Broadcast Motion 3:3  (slice2; segments: 3)
                            ->  Seq Scan on tbl06
          ->  Materialize
                ->  Broadcast Motion 3:3  (slice3; segments: 3)
                      ->  Seq Scan on i3
  Optimizer: Postgres query optimizer
 (13 rows)

 select Tbl04.* from Tbl04 where not ((Tbl04.a,Tbl04.b) in (select Tbl06.a,Tbl06.b from Tbl06) or (Tbl04.a,Tbl04.b) in (select i3.a, i3.b from i3)); -- expected: (5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  5 | 6
 (1 row)

 -- single column in the target list should always give a HLASJ
 select Tbl04.* from Tbl04 where Tbl04.a NOT IN (select Tbl09.a from Tbl09 where Tbl09.b is null); -- (1,2) (3,4)
  a | b
 ---+---
  3 | 4
  1 | 2
 (2 rows)

 select Tbl04.* from Tbl04 where Tbl04.a NOT IN (select i3.a from i3);
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 select Tbl04.* from Tbl04 where Tbl04.a NOT IN (select Tbl05.a from	Tbl05 left	join i3	on (Tbl05.a=i3.a));
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 --
 -- Negative tests: we should not be inferring non-nullability in these cases. Therefore, we should see NLASJ.
 --
 -- No where clause
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05); -- expected: (3,4), (5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 -- INDF in the where clause
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07,Tbl08 where Tbl07.a is not distinct from Tbl08.a and Tbl07.b is not distinct from Tbl08.b); -- no rows
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
 (0 rows)

 -- null conditions in the where clause
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 left join Tbl08 on (Tbl07.a=Tbl08.a and Tbl07.b=Tbl08.b) where Tbl07.a is null and Tbl07.b is null); -- no rows
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
 (0 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 full outer join Tbl08 on (Tbl07.a=Tbl08.a and Tbl07.b=Tbl08.b) where Tbl07.a is null and Tbl07.b is null); -- no rows
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
 (0 rows)

 -- OR clauses that should not lead to non-nullability
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07,Tbl08 where Tbl07.a is not distinct from Tbl08.a or Tbl07.a=1); -- no rows
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
 (0 rows)

 -- values list: we don't support it yet. not worth the effort.
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (values(1,2),(3,4)); -- (3,4),(5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  5 | 6
 (1 row)

 -- functions/ops in the target list of the subquery
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a+2,i3.b+2 from i3); -- expected: (5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  1 | 2
  5 | 6
 (2 rows)

 -- group by does not guarantee removal of nulls.
 select Tbl09.a, Tbl09.b from Tbl09;
  a | b
 ---+---
  1 | 2
  5 |
    | 8
 (3 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl09.a,Tbl09.b from Tbl09); -- expected: (3,4)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
 (1 row)

 select Tbl09.a, Tbl09.b from Tbl09 group by Tbl09.a, Tbl09.b;
  a | b
 ---+---
  5 |
    | 8
  1 | 2
 (3 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl09.a, Tbl09.b from Tbl09 group by Tbl09.a, Tbl09.b); -- expected: (3,4)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
 (1 row)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select sum(i3.b),i3.a from i3 group by i3.a); -- (1,2),(3,4),(5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  1 | 2
  3 | 4
  5 | 6
 (3 rows)

 -- infering not-nullability for only one of the columns
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,Tbl05.b from i3,Tbl05 where	i3.a=Tbl05.a); -- (3,4),(5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) NOT IN (select i3.a,i3.b from Tbl07 left join i3 on (i3.a=Tbl07.a and i3.b=Tbl07.b) where i3.a > 2);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
  1 | 2
 (3 rows)

 --
 -- Unsupported test: These tests are meant to illustrate NOT-IN subqueries we do not support
 -- Started supporting since RIO
 --
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3 union select Tbl07.a, Tbl07.b from Tbl07); -- nulls in the inner side, should not return any rows
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
 (0 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3 union all select Tbl07.a, Tbl07.b from Tbl07); -- nulls in the innder side, should not return any rows
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
 (0 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select 1,2 union select 3,4); --(5,6)
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  5 | 6
 (1 row)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3) or (Tbl04.a,Tbl04.b) not in (select Tbl07.a, Tbl07.b from Tbl07);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 -- Cases where the planner "should have" determined not-nullabitlity
 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3 left join Tbl07 on (i3.a=Tbl07.a and i3.b=Tbl07.b));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05 where (Tbl05.a IN (select i3.a from i3)) AND (Tbl05.b IN (select i3.b from i3)));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  a | b
 ---+---
  3 | 4
  5 | 6
 (2 rows)

 -- additional queries
 drop table if exists Tbl04;
 create table Tbl04(x int, y int);
 insert into Tbl04 values(1,2);
 insert into Tbl04 values(3,4);
 create table Tbl10(x int, y int);
 insert into Tbl10 values(1,null);
 select * from Tbl04 where (x,y) not in (select x,y from Tbl10);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  x | y
 ---+---
  3 | 4
 (1 row)

 select * from Tbl04 where (x,y) not in (select 1,y from Tbl10);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  x | y
 ---+---
  3 | 4
 (1 row)

 select * from tbl10 where y not in (select 1 where false);
  x | y
 ---+---
  1 |
 (1 row)

 alter table Tbl10 alter column x set not null;
 select * from Tbl04 where (x,y) not in (select x,y from Tbl10);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
  x | y
 ---+---
  3 | 4
 (1 row)

 begin;
 create table TblText1(a text, b text);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table TblText2(a text, b text);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table TblText3(a text, b text);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into TblText1 values('rob', 'klopp');
 insert into TblText1 values('florian','waas');
 insert into TblText1 values('oak','barrett');
 insert into TblText1 values('tushar','pednekar');
 insert into TblText2 select * from TblText1;
 insert into TblText3 values('florian','waas');
 insert into TblText3 values('oak','barrett');
 commit;
 SELECT TblText1.a, TblText2.b FROM TblText1 JOIN TblText2 ON TblText1.a = TblText2.a WHERE ((NOT (TblText1.a, TblText2.b) IN (SELECT TblText3.a, TblText3.b FROM TblText3)));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
    a    |    b
 --------+----------
  tushar | pednekar
  rob    | klopp
 (2 rows)

 SELECT TblText1.a, TblText2.b FROM TblText1 JOIN TblText2 ON TblText1.a = TblText2.a WHERE (( (TblText1.a, TblText2.b) IN (SELECT TblText3.a, TblText3.b FROM TblText3)));
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
     a    |    b
 ---------+---------
  florian | waas
  oak     | barrett
 (2 rows)

 --
 -- Delete
 --
 begin;
 create table TabDel1(a int, b int);
 insert into TabDel1 values(1,2),(3,4),(5,6);
 create table TabDel2 as select * from TabDel1;
 create table TabDel3(a int, b int);
 insert into TabDel3 values(1,2);
 create table TabDel4(a int not null, b int not null);
 insert into TabDel4 values(1,2);
 commit;
 explain delete from TabDel1 where TabDel1.a not in (select a from TabDel3); -- do not support this because we produce NLASJ
                                             QUERY PLAN
 ---------------------------------------------------------------------------------------------------
  Delete on tabdel1  (cost=0.00..862.04 rows=1 width=1)
    ->  Hash Left Anti Semi (Not-In) Join  (cost=0.00..862.00 rows=1 width=18)
          Hash Cond: (tabdel1.a = tabdel3.a)
          ->  Seq Scan on tabdel1  (cost=0.00..431.00 rows=1 width=18)
          ->  Hash  (cost=431.00..431.00 rows=1 width=4)
                ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 width=4)
                      ->  Seq Scan on tabdel3  (cost=0.00..431.00 rows=1 width=4)
  Optimizer: Pivotal Optimizer (GPORCA)
 (8 rows)

 explain delete from TabDel2 where TabDel2.a not in (select a from TabDel4); -- support this
                                             QUERY PLAN
 ---------------------------------------------------------------------------------------------------
  Delete on tabdel2  (cost=0.00..862.03 rows=1 width=1)
    ->  Hash Left Anti Semi (Not-In) Join  (cost=0.00..862.00 rows=1 width=18)
          Hash Cond: (tabdel2.a = tabdel4.a)
          ->  Seq Scan on tabdel2  (cost=0.00..431.00 rows=1 width=18)
          ->  Hash  (cost=431.00..431.00 rows=3 width=4)
                ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.00 rows=3 width=4)
                      ->  Seq Scan on tabdel4  (cost=0.00..431.00 rows=1 width=4)
  Optimizer: Pivotal Optimizer (GPORCA)
 (8 rows)

 delete from TabDel2 where TabDel2.a not in (select a from TabDel4);
 select * from TabDel2;
  a | b
 ---+---
  1 | 2
 (1 row)

 --
 -- Update
 --
 begin;
 create table TblUp1(a int, b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into TblUp1 values(1,2),(3,4),(5,6);
 create table TblUp2 as select * from TblUp1;
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
 create table TblUp3(a int, b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into TblUp3 values(1,2);
 create table TblUp4(a int not null, b int not null);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into TblUp4 values(1,2);
 commit;
 -- planner does not support updates on distribution keys
 update TblUp1 set a=100 where a not in (select a from TblUp3);
 select * from TblUp1;
   a  | b
 -----+---
  100 | 4
  100 | 6
    1 | 2
 (3 rows)

 update TblUp2 set a=100 where a not in (select a from TblUp4);
 select * from TblUp2;
   a  | b
 -----+---
  100 | 4
    1 | 2
  100 | 6
 (3 rows)

 --
 -- Check for correct results for subqueries nested inside a scalar expression
 --
 begin;
 create table subselect_tab1 (a int, b text, c int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table subselect_tab2 (a int, b int, c int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table subselect_tab3 (a int, b text, c int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into subselect_tab1 VALUES (100, 'false', 1);
 insert into subselect_tab1 VALUES (200, 'true', 2);
 insert into subselect_tab2 VALUES (2,2,2);
 insert into subselect_tab3 VALUES (200, 'falseg', 1);
 commit;
 -- scalar subquery in a null test expression
 select * from subselect_tab1 where (select b from subselect_tab2) is null;
  a | b | c
 ---+---+---
 (0 rows)

 -- ANY subquery nested in a scalar comparison expression
 select * from subselect_tab1 where b::bool = ( c = any(select c from subselect_tab2));
   a  |   b   | c
 -----+-------+---
  200 | true  | 2
  100 | false | 1
 (2 rows)

 -- ALL subquery deeply nested in a scalar expression
 select * from subselect_tab3 where b = ( a < all(select c from subselect_tab2) || 'g');
   a  |   b    | c
 -----+--------+---
  200 | falseg | 1
 (1 row)

 -- EXISTS subquery nested in a boolean expression
 select * from subselect_tab1 where b::bool = (exists(select c from subselect_tab2) and not exists (select c from subselect_tab3));
   a  |   b   | c
 -----+-------+---
  100 | false | 1
 (1 row)

 -- ALL and EXISTS nested in a CASE-WHEN-THEN expression
 select * from subselect_tab1 where case when b is not null then (subselect_tab1.c < all(select c from subselect_tab2 where exists (select * from subselect_tab3))) else false end;
   a  |   b   | c
 -----+-------+---
  100 | false | 1
 (1 row)

 -- EXISTS subquery nested in a scalar comparison expression
 select * from subselect_tab1 where b::bool = exists(select c from subselect_tab2);
   a  |  b   | c
 -----+------+---
  200 | true | 2
 (1 row)

 -- a few more complex combinations..
 SELECT * FROM subselect_tab3 WHERE (EXISTS(SELECT c FROM subselect_tab2) AND NOT EXISTS (SELECT c from subselect_tab3)) IN (SELECT b::BOOL from subselect_tab1);
   a  |   b    | c
 -----+--------+---
  200 | falseg | 1
 (1 row)

 SELECT * FROM subselect_tab3 WHERE (NOT EXISTS(SELECT c FROM subselect_tab2)) IN (SELECT b::boolean  from subselect_tab1);
   a  |   b    | c
 -----+--------+---
  200 | falseg | 1
 (1 row)

 -- Test to verify that planner for subqueries, generates different copy of SubPlans referring to the same initplan
 -- and does not Assert on the subplan's being same
 create table append_rel(att1 int, att2 int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'att1' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table append_rel1(att3 int) INHERITS (append_rel);
 NOTICE:  table has parent, setting distribution columns to match parent table
 create table append_rel2(att4 int) INHERITS(append_rel);
 NOTICE:  table has parent, setting distribution columns to match parent table
 insert into append_rel values(1,10),(2,20),(3,30);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables
 analyze append_rel;
 explain with test as (select * from (select * from append_rel) p where att1 in (select att1 from append_rel where att2 >= 19) ) select att2 from append_rel where att1 in (select att1 from test where att2 <= 21);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables
                                                           QUERY PLAN
 ------------------------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=2296.15..4165.55 rows=51935 width=4)
    ->  Hash Join  (cost=2296.15..3473.08 rows=17312 width=4)
          Hash Cond: (append_rel.att1 = test.att1)
          ->  Append  (cost=0.00..848.02 rows=51934 width=8)
                ->  Seq Scan on append_rel append_rel_1  (cost=0.00..1.01 rows=1 width=8)
                ->  Seq Scan on append_rel1 append_rel_2  (cost=0.00..293.67 rows=25967 width=8)
                ->  Seq Scan on append_rel2 append_rel_3  (cost=0.00..293.67 rows=25967 width=8)
          ->  Hash  (cost=2283.65..2283.65 rows=1000 width=4)
                ->  HashAggregate  (cost=2280.31..2283.65 rows=1000 width=4)
                      Group Key: test.att1
                      ->  Subquery Scan on test  (cost=1021.14..2063.92 rows=17312 width=4)
                            ->  Hash Semi Join  (cost=1021.14..2063.92 rows=17312 width=8)
                                  Hash Cond: (append_rel_4.att1 = append_rel_5.att1)
                                  ->  Append  (cost=0.00..804.74 rows=17312 width=8)
                                        ->  Seq Scan on append_rel append_rel_6  (cost=0.00..1.01 rows=1 width=8)
                                              Filter: (att2 <= 21)
                                        ->  Seq Scan on append_rel1 append_rel_7  (cost=0.00..358.58 rows=8656 width=8)
                                              Filter: (att2 <= 21)
                                        ->  Seq Scan on append_rel2 append_rel_8  (cost=0.00..358.58 rows=8656 width=8)
                                              Filter: (att2 <= 21)
                                  ->  Hash  (cost=804.74..804.74 rows=17312 width=4)
                                        ->  Append  (cost=0.00..804.74 rows=17312 width=4)
                                              ->  Seq Scan on append_rel append_rel_9  (cost=0.00..1.01 rows=1 width=4)
                                                    Filter: (att2 >= 19)
                                              ->  Seq Scan on append_rel1 append_rel_10  (cost=0.00..358.58 rows=8656 width=4)
                                                    Filter: (att2 >= 19)
                                              ->  Seq Scan on append_rel2 append_rel_11  (cost=0.00..358.58 rows=8656 width=4)
                                                    Filter: (att2 >= 19)
  Optimizer: Postgres query optimizer
 (29 rows)

 with test as (select * from (select * from append_rel) p where att1 in (select att1 from append_rel where att2 >= 19) ) select att2 from append_rel where att1 in (select att1 from test where att2 <= 21);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables
  att2
 ------
    20
 (1 row)

 -- Check correct results for subqueries in the target list
 begin;
 drop table if exists temp_a, temp_b, temp_c;
 NOTICE:  table "temp_a" does not exist, skipping
 NOTICE:  table "temp_b" does not exist, skipping
 NOTICE:  table "temp_c" does not exist, skipping
 create table temp_a (a int ,b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table temp_b (b int ,c int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table temp_c (c int, d int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into temp_a values (1,2), (2,3), (3,4), (4,7), (5,19), (6,13), (7,23), (7,17);
 insert into temp_b values (1,2), (2,2), (3,2), (4,2), (5,3), (6,3), (7,3), (8,3), (10,4);
 insert into temp_c values (NULL, 2), (2, 2), (4, NULL), (NULL, 3), (1, 3), (8, NULL), (7, 2), (NULL, NULL);
 commit;
 select sum(case when b in (select b from temp_b where t.a>c) then 1 else 0 end),sum(case when not( b in (select b from temp_b where t.a>c)) then 1 else 0 end) from temp_a t;
  sum | sum
 -----+-----
    2 |   6
 (1 row)

 select sum(case when b in (select b from temp_b where EXISTS (select sum(d) from temp_c where t.a > d)) then 1 else 0 end),sum(case when not( b in (select b from temp_b where t.a>c)) then 1 else 0 end) from temp_a t;
  sum | sum
 -----+-----
    4 |   6
 (1 row)

 select sum(case when b in (select b from temp_b where EXISTS (select sum(d) from temp_c where t.a > d or t.a > temp_b.c)) then 1 else 0 end),sum(case when not( b in (select b from temp_b, temp_c where t.a>temp_b.c or t.a > temp_c.d)) then 1 else 0 end) from temp_a t;
  sum | sum
 -----+-----
    4 |   6
 (1 row)

 -- Check that predicate with set-returning function is not pushed down
 create table table_with_array_column (an_array_column double precision[]);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'an_array_column' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into table_with_array_column values (array[1.1, 2.2]);
 explain (costs off)
 select *
 from (
   select unnest(t1.an_array_column) unnested_array_column
   from table_with_array_column t1, table_with_array_column t2) zz
 where unnested_array_column is not null;
                                       QUERY PLAN
 ---------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Result
          Filter: (NOT ((unnest(t1.an_array_column)) IS NULL))
          ->  ProjectSet
                ->  Nested Loop
                      Join Filter: true
                      ->  Seq Scan on table_with_array_column t1
                      ->  Materialize
                            ->  Broadcast Motion 3:3  (slice2; segments: 3)
                                  ->  Seq Scan on table_with_array_column t2
  Optimizer: Pivotal Optimizer (GPORCA)
 (11 rows)

 select *
 from (
   select unnest(t1.an_array_column) unnested_array_column
   from table_with_array_column t1, table_with_array_column t2) zz
 where unnested_array_column is not null;
  unnested_array_column
 -----------------------
                    1.1
                    2.2
 (2 rows)

 -- check that predicate is not pushed through a projected non-correlated subquery
 create table subquery_nonpush_through_1(a int, b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table subquery_nonpush_through_2(a int, b int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 explain (costs off)
 select *
 from(
   select (subquery_nonpush_through_1.a in (select a from subquery_nonpush_through_2))::text as xx, subquery_nonpush_through_1.b
   from subquery_nonpush_through_1,subquery_nonpush_through_2) t
 where xx='dd';
                                                                           QUERY PLAN
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Result
          Filter: ((((hashed SubPlan 1))::text) = 'dd'::text)
          ->  Nested Loop
                Join Filter: true
                ->  Seq Scan on subquery_nonpush_through_1
                ->  Materialize
                      ->  Broadcast Motion 3:3  (slice2; segments: 3)
                            ->  Seq Scan on subquery_nonpush_through_2
                SubPlan 1
                  ->  Result
                        ->  Result
                              ->  Materialize
                                    ->  Broadcast Motion 3:3  (slice3; segments: 3)
                                          ->  Seq Scan on subquery_nonpush_through_2 subquery_nonpush_through_2_1
  Optimizer: Pivotal Optimizer (GPORCA)
 (19 rows)

 select *
 from(
   select (subquery_nonpush_through_1.a in (select a from subquery_nonpush_through_2))::text as xx, subquery_nonpush_through_1.b
   from subquery_nonpush_through_1,subquery_nonpush_through_2) t
 where xx='dd';
  xx | b
 ----+---
 (0 rows)

 -- Ensure we produce a hashed subplan when there are no outer references
 CREATE TABLE a1 AS (
     SELECT * FROM generate_series(1, 5) AS a1)
     WITH data distributed replicated;
 CREATE TABLE a2 AS (
     SELECT * FROM generate_series(1, 10) AS a1)
     WITH data distributed BY (a1);
 CREATE TABLE a3 AS (
 	SELECT a1, row_to_json(a2) AS rj FROM a2)
 	WITH data distributed BY (a1);
 INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
 DETAIL:  Falling back to Postgres-based planner because GPORCA does not support the following feature: Whole-row variable
 -- explain "verbose" is needed to show that the subplan is hashed
 explain (verbose, costs off) select a1,case when a2 in (select a1::text from a1 where a1 is not null) then 'true' else 'false' end as checkcol
 from (
       select a1,rj->>'a1'::text as a2
       from a3
       )t;
                                           QUERY PLAN
 ----------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    Output: a3.a1, (CASE WHEN (hashed SubPlan 1) THEN 'true'::text ELSE 'false'::text END)
    ->  Seq Scan on qp_subquery.a3
          Output: a3.a1, CASE WHEN (hashed SubPlan 1) THEN 'true'::text ELSE 'false'::text END
          SubPlan 1
            ->  Result
                  Output: ((a1.a1)::text)
                  ->  Result
                        Output: (a1.a1)::text, true
                        ->  Seq Scan on qp_subquery.a1
                              Output: a1.a1
                              Filter: (NOT (a1.a1 IS NULL))
  Optimizer: GPORCA
 (13 rows)

 select a1,case when a2 in (select a1::text from a1 where a1 is not null) then 'true' else 'false' end as checkcol
 from (
       select a1,rj->>'a1'::text as a2
       from a3
       )t;
  a1 | checkcol
 ----+----------
   2 | true
   3 | true
   4 | true
   7 | false
   8 | false
   1 | true
   5 | true
   6 | false
   9 | false
  10 | false
 (10 rows)

 -- check various [NOT] EXISTS subqueries on materialized views
 create table t (a int, b int) distributed by (a);
 insert into t values (1, 1), (2, NULL), (NULL, 3);
 create materialized view v as select a, b from t distributed randomly;
 select * from v where exists (select a from v);
  a | b
 ---+---
  2 |
    | 3
  1 | 1
 (3 rows)

 select * from v where exists (select a from v limit 0);
  a | b
 ---+---
 (0 rows)

 select * from v where exists (select a from v where a=2);
  a | b
 ---+---
  2 |
    | 3
  1 | 1
 (3 rows)

 select * from v where exists (select a from v where a<>2);
  a | b
 ---+---
  2 |
    | 3
  1 | 1
 (3 rows)

 select * from v where not exists (select a from v);
  a | b
 ---+---
 (0 rows)

 select * from v where not exists (select a from v limit 0);
  a | b
 ---+---
  2 |
    | 3
  1 | 1
 (3 rows)

 select * from v where not exists (select a from v where a=2);
  a | b
 ---+---
 (0 rows)

 select * from v where not exists (select a from v where a<>2);
  a | b
 ---+---
 (0 rows)

 select * from v where exists (select b from v);
  a | b
 ---+---
  2 |
    | 3
  1 | 1
 (3 rows)

 select * from v where exists (select b from v limit 0);
  a | b
 ---+---
 (0 rows)

 select * from v where exists (select b from v where b=2);
  a | b
 ---+---
 (0 rows)

 select * from v where exists (select b from v where b<>2);
  a | b
 ---+---
  2 |
    | 3
  1 | 1
 (3 rows)

 select * from v where not exists (select b from v);
  a | b
 ---+---
 (0 rows)

 select * from v where not exists (select b from v limit 0);
  a | b
 ---+---
  2 |
    | 3
  1 | 1
 (3 rows)

 select * from v where not exists (select b from v where b=2);
  a | b
 ---+---
  2 |
    | 3
  1 | 1
 (3 rows)

 select * from v where not exists (select b from v where b<>2);
  a | b
 ---+---
 (0 rows)

 -- Check that a query having pattern of Select-Project-NaryJoin,
 -- also containing a Select predicate condition with the same pattern nested in a subquery runs
 CREATE TABLE tab1(a TEXT, b TEXT) DISTRIBUTED RANDOMLY;
 INSERT INTO tab1 SELECT i,i FROM GENERATE_SERIES(1,3)i;
 SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1
 WHERE EXISTS
           (SELECT 1 FROM
               (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2
            WHERE t2.param = t1.param);
  param
 -------
  3
  1
  2
 (3 rows)

 EXPLAIN (COSTS OFF) SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1
 WHERE EXISTS
           (SELECT 1 FROM
               (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2
            WHERE t2.param = t1.param);
                                    QUERY PLAN
 ---------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Semi Join
          Hash Cond: (btrim(p1.b) = btrim(p1_1.b))
          ->  Hash Join
                Hash Cond: (p1.a = p2.a)
                ->  Seq Scan on tab1 p1
                      Filter: (NOT (btrim(b) IS NULL))
                ->  Hash
                      ->  Broadcast Motion 3:3  (slice2; segments: 3)
                            ->  Seq Scan on tab1 p2
          ->  Hash
                ->  Broadcast Motion 3:3  (slice3; segments: 3)
                      ->  Hash Join
                            Hash Cond: (p1_1.a = p2_1.a)
                            ->  Seq Scan on tab1 p1_1
                            ->  Hash
                                  ->  Broadcast Motion 3:3  (slice4; segments: 3)
                                        ->  Seq Scan on tab1 p2_1
  Optimizer: GPORCA
 (19 rows)

 -- Check that a query having pattern of Select-Project-NaryJoin,
 -- also containing a Select predicate condition with the same pattern nested in a subquery runs when subplan is enforced
 SET optimizer_enforce_subplans TO on;
 SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1
 WHERE EXISTS
           (SELECT 1 FROM
               (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2
            WHERE t2.param = t1.param);
  param
 -------
  2
  1
  3
 (3 rows)

 EXPLAIN (COSTS OFF) SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1
 WHERE EXISTS
           (SELECT 1 FROM
               (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2
            WHERE t2.param = t1.param);
                                        QUERY PLAN
 -----------------------------------------------------------------------------------------
  Hash Join
    Hash Cond: (p1.a = p2_1.a)
    ->  Result
          Filter: (SubPlan 1)
          ->  Gather Motion 3:1  (slice1; segments: 3)
                ->  Seq Scan on tab1 p1
                      Filter: (NOT (btrim(b) IS NULL))
          SubPlan 1
            ->  Result
                  Filter: (btrim(p1_1.b) = btrim(p1.b))
                  ->  Materialize
                        ->  Gather Motion 3:1  (slice2; segments: 3)
                              ->  Hash Join
                                    Hash Cond: (p1_1.a = p2.a)
                                    ->  Seq Scan on tab1 p1_1
                                    ->  Hash
                                          ->  Broadcast Motion 3:3  (slice3; segments: 3)
                                                ->  Seq Scan on tab1 p2
    ->  Hash
          ->  Gather Motion 3:1  (slice4; segments: 3)
                ->  Seq Scan on tab1 p2_1
  Optimizer: GPORCA
 (22 rows)

 reset optimizer_enforce_subplans;
 set client_min_messages='warning';
 drop schema qp_subquery cascade;
 reset optimizer_trace_fallback;