src/test/regress/expected/subselect_gp_indexes.out - cloudberry - Git at Google

 --
 -- Test correlated subquery in subplan with motion chooses correct scan type
 --
 -- Given I have two distributed tables
 create table choose_seqscan_t1(id1 int,id2 int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id1' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table choose_seqscan_t2(id1 int,id2 int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id1' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 -- and they have some data
 insert into choose_seqscan_t1 select i+1,i from generate_series(1,50)i;
 insert into choose_seqscan_t2 select i+1,i from generate_series(1,50)i;
 -- and one of the tables has an index on a column which is not the distribution column
 create index bidx2 on choose_seqscan_t2(id2);
 -- and the statistics reflect the newly inserted data
 analyze choose_seqscan_t1; analyze choose_seqscan_t2;
 -- making an indexscan cheaper with this GUC is only necessary with this small dataset
 -- if you insert more data, you can still ensure an indexscan is considered
 set random_page_cost=1;
 set seq_page_cost=5;
 -- and I query the table with the index from inside a subquery which will be pulled up inside of a subquery that will stay a subplan
 select (select id1 from (select * from choose_seqscan_t2) foo where id2=choose_seqscan_t1.id2) from choose_seqscan_t1 order by id1;
  id1
 -----
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
 (50 rows)

 explain select (select id1 from (select * from choose_seqscan_t2) foo where id2=choose_seqscan_t1.id2) from choose_seqscan_t1;
                                                QUERY PLAN
 ---------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..184.75 rows=50 width=4)
    ->  Seq Scan on choose_seqscan_t1  (cost=0.00..184.75 rows=17 width=4)
          SubPlan 1
            ->  Result  (cost=0.00..3.63 rows=1 width=4)
                  Filter: (choose_seqscan_t2.id2 = choose_seqscan_t1.id2)
                  ->  Materialize  (cost=0.00..3.63 rows=1 width=4)
                        ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..3.62 rows=1 width=4)
                              ->  Seq Scan on choose_seqscan_t2  (cost=0.00..3.62 rows=1 width=4)
  Optimizer: Postgres query optimizer
 (9 rows)

 -- then, a sequential scan is chosen because I need a motion to move choose_seqscan_t2
 -- Index Scan can be used on quals that don't depend on the correlation vars, however.
 select t1.id1, (select count(*) from choose_seqscan_t2 t2 where t2.id1 = t1.id1 and t2.id2 = 1) from choose_seqscan_t1 t1 where t1.id1 < 10;
  id1 | count
 -----+-------
    5 |     0
    6 |     0
    9 |     0
    2 |     1
    3 |     0
    4 |     0
    7 |     0
    8 |     0
 (8 rows)

 explain select t1.id1, (select count(*) from choose_seqscan_t2 t2 where t2.id1 = t1.id1 and t2.id2 = 1) from choose_seqscan_t1 t1 where t1.id1 < 10;
                                                        QUERY PLAN
 ------------------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..21.62 rows=9 width=12)
    ->  Seq Scan on choose_seqscan_t1 t1  (cost=0.00..21.62 rows=3 width=12)
          Filter: (id1 < 10)
          SubPlan 1
            ->  Aggregate  (cost=2.19..2.20 rows=1 width=8)
                  ->  Result  (cost=0.14..2.19 rows=1 width=0)
                        Filter: (t2.id1 = t1.id1)
                        ->  Materialize  (cost=0.14..2.18 rows=1 width=4)
                              ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.14..2.18 rows=1 width=4)
                                    ->  Index Scan using bidx2 on choose_seqscan_t2 t2  (cost=0.14..2.16 rows=1 width=4)
                                          Index Cond: (id2 = 1)
  Optimizer: Postgres query optimizer
 (12 rows)

 -- Test using a join within the subplan. It could perhaps use an Nested Loop Join +
 -- Index Scan to do the join, but at the moment, the planner doesn't consider distributing
 -- the Function Scan.
 select t1.id1, (select count(*) from generate_series(1,5) g, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t2.id2 = g) from choose_seqscan_t1 t1 where t1.id1 < 10;
  id1 | count
 -----+-------
    2 |     1
    3 |     1
    4 |     1
    7 |     0
    8 |     0
    5 |     1
    6 |     1
    9 |     0
 (8 rows)

 explain select t1.id1, (select count(*) from generate_series(1,5) g, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t2.id2 = g) from choose_seqscan_t1 t1 where t1.id1 < 10;
                                                       QUERY PLAN
 ----------------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..43.72 rows=8 width=12)
    ->  Seq Scan on choose_seqscan_t1 t1  (cost=0.00..43.57 rows=3 width=12)
          Filter: (id1 < 10)
          SubPlan 1
            ->  Aggregate  (cost=5.57..5.58 rows=1 width=8)
                  ->  Hash Join  (cost=0.12..5.55 rows=2 width=0)
                        Hash Cond: (t2.id2 = g.g)
                        ->  Result  (cost=0.00..5.25 rows=17 width=8)
                              Filter: (t1.id1 = t2.id1)
                              ->  Materialize  (cost=0.00..4.75 rows=17 width=8)
                                    ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..4.50 rows=17 width=8)
                                          ->  Seq Scan on choose_seqscan_t2 t2  (cost=0.00..3.50 rows=17 width=8)
                        ->  Hash  (cost=0.05..0.05 rows=2 width=4)
                              ->  Function Scan on generate_series g  (cost=0.00..0.05 rows=2 width=4)
  Optimizer: Postgres query optimizer
 (15 rows)

 -- Similar, but use a real table. One possible plan for the subplan here would be to do the join
 -- first, and then filter the join result based on the correlation qual "t1.id1 = t2.id1". But
 -- the planner isn't smart enough to generate that plan, currently.
 create table choose_seqscan_t3(id1 int,id2 int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id1' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create index bidx3 on choose_seqscan_t3(id1);
 insert into choose_seqscan_t3 select i+1,i from generate_series(1,50)i;
 analyze choose_seqscan_t3;
 select t1.id1, (select count(*) from choose_seqscan_t3 t3, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t3.id1 = t2.id1) from choose_seqscan_t1 t1 where t1.id1 < 10;
  id1 | count
 -----+-------
    5 |     1
    6 |     1
    9 |     1
    2 |     1
    3 |     1
    4 |     1
    7 |     1
    8 |     1
 (8 rows)

 explain select t1.id1, (select count(*) from choose_seqscan_t3 t3, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t3.id1 = t2.id1) from choose_seqscan_t1 t1 where t1.id1 < 10;
                                                          QUERY PLAN
 ----------------------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..81632653449.62 rows=9 width=8)
    ->  Seq Scan on choose_seqscan_t1 t1  (cost=0.00..81632653449.62 rows=3 width=8)
          Filter: (id1 < 10)
          SubPlan 1
            ->  Aggregate  (cost=10000000047.12..10000000047.14 rows=1 width=8)
                  ->  Nested Loop  (cost=10000000000.00..10000000040.88 rows=2500 width=0)
                        ->  Result  (cost=0.00..4.75 rows=50 width=4)
                              Filter: (t3.id1 = t1.id1)
                              ->  Materialize  (cost=0.00..4.75 rows=50 width=4)
                                    ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..4.50 rows=17 width=4)
                                          ->  Seq Scan on choose_seqscan_t3 t3  (cost=0.00..3.50 rows=17 width=4)
                        ->  Materialize  (cost=0.00..5.00 rows=50 width=4)
                              ->  Result  (cost=0.00..4.75 rows=50 width=4)
                                    Filter: (t2.id1 = t1.id1)
                                    ->  Materialize  (cost=0.00..4.75 rows=50 width=4)
                                          ->  Broadcast Motion 3:3  (slice3; segments: 3)  (cost=0.00..4.50 rows=17 width=4)
                                                ->  Seq Scan on choose_seqscan_t2 t2  (cost=0.00..3.50 rows=17 width=4)
  Optimizer: Postgres query optimizer
 (18 rows)

 -- start_ignore
 drop table if exists choose_seqscan_t1;
 drop table if exists choose_seqscan_t2;
 -- end_ignore
 -- Given I have one replicated table
 create table choose_indexscan_t1(id1 int, id2 int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id1' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table choose_indexscan_t2(id1 int, id2 int) distributed replicated;
 -- and it has data
 insert into choose_indexscan_t1 select i+1, i from generate_series(1,20)i;
 insert into choose_indexscan_t2 select i+1, i from generate_series(1,100)i;
 -- and the replicated table has an index on a column which is not the distribution key
 create index choose_indexscan_t2_idx on choose_indexscan_t2(id2);
 -- and the statistics reflect the newly inserted data
 analyze choose_indexscan_t1; analyze choose_indexscan_t2;
 -- making an indexscan cheaper with this GUC is only necessary with this small dataset
 -- if you insert more data, you can still ensure an indexscan is considered
 set random_page_cost=1;
 -- and I query the table with the index from inside a subquery which will be pulled up inside of a subquery that will stay a subplan
 select (select id1 from (select * from choose_indexscan_t2) foo where id2=choose_indexscan_t1.id2) from choose_indexscan_t1 order by id1;
  id1
 -----
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
 (20 rows)

 -- start_ignore
 -- GPDB_96_MERGE_FIXME: we no longer choose an index scan here, because the planner cannot
 -- easily determine that it's safe. See https://github.com/greenplum-db/gpdb/issues/8648.
 -- I hope we can get that back with the 9.6 upper planner pathification.
 -- end_ignore
 explain select (select id1 from (select * from choose_indexscan_t2) foo where id2=choose_indexscan_t1.id2) from choose_indexscan_t1;
                                                 QUERY PLAN
 -----------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..93.20 rows=20 width=4)
    ->  Seq Scan on choose_indexscan_t1  (cost=0.00..93.20 rows=7 width=4)
          SubPlan 1
            ->  Result  (cost=0.00..4.50 rows=100 width=8)
                  Filter: (choose_indexscan_t2.id2 = choose_indexscan_t1.id2)
                  ->  Materialize  (cost=0.00..4.50 rows=100 width=8)
                        ->  Broadcast Motion 1:3  (slice2; segments: 1)  (cost=0.00..4.00 rows=100 width=8)
                              ->  Seq Scan on choose_indexscan_t2  (cost=0.00..2.00 rows=100 width=8)
  Optimizer: Postgres query optimizer
 (9 rows)

 -- then an indexscan is chosen because it is correct to do this on a replicated table since no motion is required
 -- Test that Motions are added when you mix replicated tables and catalog
 -- tables in the same query. A replicated table is available on all segments,
 -- but *not* on the QD node, so we need a motion for this, because the catalog
 -- table is scanned in the QD. (Catalog tables are present with same contents
 -- on all segments, too, so we could alternatively perform scan the catalog
 -- table oon one of the segments.)
 -- https://github.com/greenplum-db/gpdb/issues/8648
 create table mytables (tablename text, explanation text) distributed replicated;
 insert into mytables values ('pg_class', 'contains all relations');
 create index on mytables(tablename);
 select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
  relname  |      explanation
 ----------+------------------------
  pg_class | contains all relations
 (1 row)

 set enable_seqscan=off;
 explain select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
                                                      QUERY PLAN
 ---------------------------------------------------------------------------------------------------------------------
  Index Only Scan using pg_class_relname_nsp_index on pg_class c  (cost=0.15..10000000002.20 rows=1 width=64)
    Index Cond: (relname = 'pg_class'::name)
    SubPlan 1
      ->  Result  (cost=10000000000.00..10000000001.03 rows=1 width=32)
            Filter: (mt.tablename = (c.relname)::text)
            ->  Materialize  (cost=10000000000.00..10000000001.03 rows=1 width=32)
                  ->  Gather Motion 1:1  (slice1; segments: 1)  (cost=10000000000.00..10000000001.03 rows=1 width=32)
                        ->  Seq Scan on mytables mt  (cost=10000000000.00..10000000001.01 rows=1 width=32)
  Optimizer: Postgres query optimizer
 (9 rows)

 select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
  relname  |      explanation
 ----------+------------------------
  pg_class | contains all relations
 (1 row)

 reset enable_seqscan;
 -- start_ignore
 drop table if exists choose_indexscan_t1;
 drop table if exists choose_indexscan_t2;
 -- end_ignore
	--
	-- Test correlated subquery in subplan with motion chooses correct scan type
	--
	-- Given I have two distributed tables
	create table choose_seqscan_t1(id1 int,id2 int);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id1' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	create table choose_seqscan_t2(id1 int,id2 int);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id1' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	-- and they have some data
	insert into choose_seqscan_t1 select i+1,i from generate_series(1,50)i;
	insert into choose_seqscan_t2 select i+1,i from generate_series(1,50)i;
	-- and one of the tables has an index on a column which is not the distribution column
	create index bidx2 on choose_seqscan_t2(id2);
	-- and the statistics reflect the newly inserted data
	analyze choose_seqscan_t1; analyze choose_seqscan_t2;
	-- making an indexscan cheaper with this GUC is only necessary with this small dataset
	-- if you insert more data, you can still ensure an indexscan is considered
	set random_page_cost=1;
	set seq_page_cost=5;
	-- and I query the table with the index from inside a subquery which will be pulled up inside of a subquery that will stay a subplan
	select (select id1 from (select * from choose_seqscan_t2) foo where id2=choose_seqscan_t1.id2) from choose_seqscan_t1 order by id1;
	id1
	-----
	2
	3
	4
	5
	6
	7
	8
	9
	10
	11
	12
	13
	14
	15
	16
	17
	18
	19
	20
	21
	22
	23
	24
	25
	26
	27
	28
	29
	30
	31
	32
	33
	34
	35
	36
	37
	38
	39
	40
	41
	42
	43
	44
	45
	46
	47
	48
	49
	50
	51
	(50 rows)

	explain select (select id1 from (select * from choose_seqscan_t2) foo where id2=choose_seqscan_t1.id2) from choose_seqscan_t1;
	QUERY PLAN
	---------------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..184.75 rows=50 width=4)
	-> Seq Scan on choose_seqscan_t1 (cost=0.00..184.75 rows=17 width=4)
	SubPlan 1
	-> Result (cost=0.00..3.63 rows=1 width=4)
	Filter: (choose_seqscan_t2.id2 = choose_seqscan_t1.id2)
	-> Materialize (cost=0.00..3.63 rows=1 width=4)
	-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..3.62 rows=1 width=4)
	-> Seq Scan on choose_seqscan_t2 (cost=0.00..3.62 rows=1 width=4)
	Optimizer: Postgres query optimizer
	(9 rows)

	-- then, a sequential scan is chosen because I need a motion to move choose_seqscan_t2
	-- Index Scan can be used on quals that don't depend on the correlation vars, however.
	select t1.id1, (select count(*) from choose_seqscan_t2 t2 where t2.id1 = t1.id1 and t2.id2 = 1) from choose_seqscan_t1 t1 where t1.id1 < 10;
	id1 \| count
	-----+-------
	5 \| 0
	6 \| 0
	9 \| 0
	2 \| 1
	3 \| 0
	4 \| 0
	7 \| 0
	8 \| 0
	(8 rows)

	explain select t1.id1, (select count(*) from choose_seqscan_t2 t2 where t2.id1 = t1.id1 and t2.id2 = 1) from choose_seqscan_t1 t1 where t1.id1 < 10;
	QUERY PLAN
	------------------------------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..21.62 rows=9 width=12)
	-> Seq Scan on choose_seqscan_t1 t1 (cost=0.00..21.62 rows=3 width=12)
	Filter: (id1 < 10)
	SubPlan 1
	-> Aggregate (cost=2.19..2.20 rows=1 width=8)
	-> Result (cost=0.14..2.19 rows=1 width=0)
	Filter: (t2.id1 = t1.id1)
	-> Materialize (cost=0.14..2.18 rows=1 width=4)
	-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.14..2.18 rows=1 width=4)
	-> Index Scan using bidx2 on choose_seqscan_t2 t2 (cost=0.14..2.16 rows=1 width=4)
	Index Cond: (id2 = 1)
	Optimizer: Postgres query optimizer
	(12 rows)

	-- Test using a join within the subplan. It could perhaps use an Nested Loop Join +
	-- Index Scan to do the join, but at the moment, the planner doesn't consider distributing
	-- the Function Scan.
	select t1.id1, (select count(*) from generate_series(1,5) g, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t2.id2 = g) from choose_seqscan_t1 t1 where t1.id1 < 10;
	id1 \| count
	-----+-------
	2 \| 1
	3 \| 1
	4 \| 1
	7 \| 0
	8 \| 0
	5 \| 1
	6 \| 1
	9 \| 0
	(8 rows)

	explain select t1.id1, (select count(*) from generate_series(1,5) g, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t2.id2 = g) from choose_seqscan_t1 t1 where t1.id1 < 10;
	QUERY PLAN
	----------------------------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..43.72 rows=8 width=12)
	-> Seq Scan on choose_seqscan_t1 t1 (cost=0.00..43.57 rows=3 width=12)
	Filter: (id1 < 10)
	SubPlan 1
	-> Aggregate (cost=5.57..5.58 rows=1 width=8)
	-> Hash Join (cost=0.12..5.55 rows=2 width=0)
	Hash Cond: (t2.id2 = g.g)
	-> Result (cost=0.00..5.25 rows=17 width=8)
	Filter: (t1.id1 = t2.id1)
	-> Materialize (cost=0.00..4.75 rows=17 width=8)
	-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..4.50 rows=17 width=8)
	-> Seq Scan on choose_seqscan_t2 t2 (cost=0.00..3.50 rows=17 width=8)
	-> Hash (cost=0.05..0.05 rows=2 width=4)
	-> Function Scan on generate_series g (cost=0.00..0.05 rows=2 width=4)
	Optimizer: Postgres query optimizer
	(15 rows)

	-- Similar, but use a real table. One possible plan for the subplan here would be to do the join
	-- first, and then filter the join result based on the correlation qual "t1.id1 = t2.id1". But
	-- the planner isn't smart enough to generate that plan, currently.
	create table choose_seqscan_t3(id1 int,id2 int);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id1' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	create index bidx3 on choose_seqscan_t3(id1);
	insert into choose_seqscan_t3 select i+1,i from generate_series(1,50)i;
	analyze choose_seqscan_t3;
	select t1.id1, (select count(*) from choose_seqscan_t3 t3, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t3.id1 = t2.id1) from choose_seqscan_t1 t1 where t1.id1 < 10;
	id1 \| count
	-----+-------
	5 \| 1
	6 \| 1
	9 \| 1
	2 \| 1
	3 \| 1
	4 \| 1
	7 \| 1
	8 \| 1
	(8 rows)

	explain select t1.id1, (select count(*) from choose_seqscan_t3 t3, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t3.id1 = t2.id1) from choose_seqscan_t1 t1 where t1.id1 < 10;
	QUERY PLAN
	----------------------------------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..81632653449.62 rows=9 width=8)
	-> Seq Scan on choose_seqscan_t1 t1 (cost=0.00..81632653449.62 rows=3 width=8)
	Filter: (id1 < 10)
	SubPlan 1
	-> Aggregate (cost=10000000047.12..10000000047.14 rows=1 width=8)
	-> Nested Loop (cost=10000000000.00..10000000040.88 rows=2500 width=0)
	-> Result (cost=0.00..4.75 rows=50 width=4)
	Filter: (t3.id1 = t1.id1)
	-> Materialize (cost=0.00..4.75 rows=50 width=4)
	-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..4.50 rows=17 width=4)
	-> Seq Scan on choose_seqscan_t3 t3 (cost=0.00..3.50 rows=17 width=4)
	-> Materialize (cost=0.00..5.00 rows=50 width=4)
	-> Result (cost=0.00..4.75 rows=50 width=4)
	Filter: (t2.id1 = t1.id1)
	-> Materialize (cost=0.00..4.75 rows=50 width=4)
	-> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..4.50 rows=17 width=4)
	-> Seq Scan on choose_seqscan_t2 t2 (cost=0.00..3.50 rows=17 width=4)
	Optimizer: Postgres query optimizer
	(18 rows)

	-- start_ignore
	drop table if exists choose_seqscan_t1;
	drop table if exists choose_seqscan_t2;
	-- end_ignore
	-- Given I have one replicated table
	create table choose_indexscan_t1(id1 int, id2 int);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id1' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	create table choose_indexscan_t2(id1 int, id2 int) distributed replicated;
	-- and it has data
	insert into choose_indexscan_t1 select i+1, i from generate_series(1,20)i;
	insert into choose_indexscan_t2 select i+1, i from generate_series(1,100)i;
	-- and the replicated table has an index on a column which is not the distribution key
	create index choose_indexscan_t2_idx on choose_indexscan_t2(id2);
	-- and the statistics reflect the newly inserted data
	analyze choose_indexscan_t1; analyze choose_indexscan_t2;
	-- making an indexscan cheaper with this GUC is only necessary with this small dataset
	-- if you insert more data, you can still ensure an indexscan is considered
	set random_page_cost=1;
	-- and I query the table with the index from inside a subquery which will be pulled up inside of a subquery that will stay a subplan
	select (select id1 from (select * from choose_indexscan_t2) foo where id2=choose_indexscan_t1.id2) from choose_indexscan_t1 order by id1;
	id1
	-----
	2
	3
	4
	5
	6
	7
	8
	9
	10
	11
	12
	13
	14
	15
	16
	17
	18
	19
	20
	21
	(20 rows)

	-- start_ignore
	-- GPDB_96_MERGE_FIXME: we no longer choose an index scan here, because the planner cannot
	-- easily determine that it's safe. See https://github.com/greenplum-db/gpdb/issues/8648.
	-- I hope we can get that back with the 9.6 upper planner pathification.
	-- end_ignore
	explain select (select id1 from (select * from choose_indexscan_t2) foo where id2=choose_indexscan_t1.id2) from choose_indexscan_t1;
	QUERY PLAN
	-----------------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..93.20 rows=20 width=4)
	-> Seq Scan on choose_indexscan_t1 (cost=0.00..93.20 rows=7 width=4)
	SubPlan 1
	-> Result (cost=0.00..4.50 rows=100 width=8)
	Filter: (choose_indexscan_t2.id2 = choose_indexscan_t1.id2)
	-> Materialize (cost=0.00..4.50 rows=100 width=8)
	-> Broadcast Motion 1:3 (slice2; segments: 1) (cost=0.00..4.00 rows=100 width=8)
	-> Seq Scan on choose_indexscan_t2 (cost=0.00..2.00 rows=100 width=8)
	Optimizer: Postgres query optimizer
	(9 rows)

	-- then an indexscan is chosen because it is correct to do this on a replicated table since no motion is required
	-- Test that Motions are added when you mix replicated tables and catalog
	-- tables in the same query. A replicated table is available on all segments,
	-- but not on the QD node, so we need a motion for this, because the catalog
	-- table is scanned in the QD. (Catalog tables are present with same contents
	-- on all segments, too, so we could alternatively perform scan the catalog
	-- table oon one of the segments.)
	-- https://github.com/greenplum-db/gpdb/issues/8648
	create table mytables (tablename text, explanation text) distributed replicated;
	insert into mytables values ('pg_class', 'contains all relations');
	create index on mytables(tablename);
	select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
	relname \| explanation
	----------+------------------------
	pg_class \| contains all relations
	(1 row)

	set enable_seqscan=off;
	explain select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
	QUERY PLAN
	---------------------------------------------------------------------------------------------------------------------
	Index Only Scan using pg_class_relname_nsp_index on pg_class c (cost=0.15..10000000002.20 rows=1 width=64)
	Index Cond: (relname = 'pg_class'::name)
	SubPlan 1
	-> Result (cost=10000000000.00..10000000001.03 rows=1 width=32)
	Filter: (mt.tablename = (c.relname)::text)
	-> Materialize (cost=10000000000.00..10000000001.03 rows=1 width=32)
	-> Gather Motion 1:1 (slice1; segments: 1) (cost=10000000000.00..10000000001.03 rows=1 width=32)
	-> Seq Scan on mytables mt (cost=10000000000.00..10000000001.01 rows=1 width=32)
	Optimizer: Postgres query optimizer
	(9 rows)

	select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
	relname \| explanation
	----------+------------------------
	pg_class \| contains all relations
	(1 row)

	reset enable_seqscan;
	-- start_ignore
	drop table if exists choose_indexscan_t1;
	drop table if exists choose_indexscan_t2;
	-- end_ignore