src/test/regress/expected/subselect_gp_indexes_optimizer.out - cloudberry - Git at Google

 --
 -- Test correlated subquery in subplan with motion chooses correct scan type
 --
 -- Given I have two distributed tables
 create table choose_seqscan_t1(id1 int,id2 int);
 create table choose_seqscan_t2(id1 int,id2 int);
 -- and they have some data
 insert into choose_seqscan_t1 select i+1,i from generate_series(1,50)i;
 insert into choose_seqscan_t2 select i+1,i from generate_series(1,50)i;
 -- and one of the tables has an index on a column which is not the distribution column
 create index bidx2 on choose_seqscan_t2(id2);
 -- and the statistics reflect the newly inserted data
 analyze choose_seqscan_t1; analyze choose_seqscan_t2;
 -- making an indexscan cheaper with this GUC is only necessary with this small dataset
 -- if you insert more data, you can still ensure an indexscan is considered
 set random_page_cost=1;
 set seq_page_cost=5;
 -- and I query the table with the index from inside a subquery which will be pulled up inside of a subquery that will stay a subplan
 select (select id1 from (select * from choose_seqscan_t2) foo where id2=choose_seqscan_t1.id2) from choose_seqscan_t1 order by id1;
  id1
 -----
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
 (50 rows)

 explain select (select id1 from (select * from choose_seqscan_t2) foo where id2=choose_seqscan_t1.id2) from choose_seqscan_t1;
                                                  QUERY PLAN
 ------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..1324216.68 rows=50 width=4)
    ->  Seq Scan on choose_seqscan_t1  (cost=0.00..1324216.68 rows=334 width=4)
          SubPlan 1
            ->  Result  (cost=0.00..431.17 rows=1 width=4)
                  Filter: (choose_seqscan_t2.id2 = choose_seqscan_t1.id2)
                  ->  Materialize  (cost=0.00..431.01 rows=50 width=8)
                        ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.01 rows=50 width=8)
                              ->  Seq Scan on choose_seqscan_t2  (cost=0.00..431.00 rows=17 width=8)
  Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
 (9 rows)

 -- then, a sequential scan is chosen because I need a motion to move choose_seqscan_t2
 -- Index Scan can be used on quals that don't depend on the correlation vars, however.
 select t1.id1, (select count(*) from choose_seqscan_t2 t2 where t2.id1 = t1.id1 and t2.id2 = 1) from choose_seqscan_t1 t1 where t1.id1 < 10;
  id1 | count
 -----+-------
    5 |     0
    6 |     0
    9 |     0
    2 |     1
    3 |     0
    4 |     0
    7 |     0
    8 |     0
 (8 rows)

 explain select t1.id1, (select count(*) from choose_seqscan_t2 t2 where t2.id1 = t1.id1 and t2.id2 = 1) from choose_seqscan_t1 t1 where t1.id1 < 10;
                                                  QUERY PLAN
 -------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..437.00 rows=9 width=12)
    ->  Hash Left Join  (cost=0.00..437.00 rows=3 width=12)
          Hash Cond: (t1.id1 = t2.id1)
          ->  Seq Scan on choose_seqscan_t1 t1  (cost=0.00..431.00 rows=3 width=4)
                Filter: (id1 < 10)
          ->  Hash  (cost=6.00..6.00 rows=1 width=12)
                ->  GroupAggregate  (cost=0.00..6.00 rows=1 width=12)
                      Group Key: t2.id1
                      ->  Sort  (cost=0.00..6.00 rows=1 width=4)
                            Sort Key: t2.id1
                            ->  Index Scan using bidx2 on choose_seqscan_t2 t2  (cost=0.00..6.00 rows=1 width=4)
                                  Index Cond: (id2 = 1)
  Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
 (13 rows)

 -- Test using a join within the subplan. It could perhaps use an Nested Loop Join +
 -- Index Scan to do the join, but at the moment, the planner doesn't consider distributing
 -- the Function Scan.
 select t1.id1, (select count(*) from generate_series(1,5) g, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t2.id2 = g) from choose_seqscan_t1 t1 where t1.id1 < 10;
  id1 | count
 -----+-------
    2 |     1
    3 |     1
    4 |     1
    7 |     0
    8 |     0
    5 |     1
    6 |     1
    9 |     0
 (8 rows)

 explain select t1.id1, (select count(*) from generate_series(1,5) g, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t2.id2 = g) from choose_seqscan_t1 t1 where t1.id1 < 10;
                                                                 QUERY PLAN
 ------------------------------------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..862.12 rows=9 width=12)
    ->  Hash Left Join  (cost=0.00..862.12 rows=3 width=12)
          Hash Cond: (t1.id1 = t2.id1)
          ->  Seq Scan on choose_seqscan_t1 t1  (cost=0.00..431.00 rows=3 width=4)
                Filter: (id1 < 10)
          ->  Hash  (cost=431.12..431.12 rows=3 width=12)
                ->  Finalize GroupAggregate  (cost=0.00..431.12 rows=3 width=12)
                      Group Key: t2.id1
                      ->  Sort  (cost=0.00..431.12 rows=3 width=12)
                            Sort Key: t2.id1
                            ->  Redistribute Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.12 rows=3 width=12)
                                  Hash Key: t2.id1
                                  ->  Streaming Partial HashAggregate  (cost=0.00..431.12 rows=3 width=12)
                                        Group Key: t2.id1
                                        ->  Hash Join  (cost=0.00..431.08 rows=334 width=4)
                                              Hash Cond: (generate_series.generate_series = t2.id2)
                                              ->  Result  (cost=0.00..0.01 rows=334 width=4)
                                                    ->  Function Scan on generate_series  (cost=0.00..0.00 rows=334 width=4)
                                              ->  Hash  (cost=431.00..431.00 rows=3 width=8)
                                                    ->  Redistribute Motion 3:3  (slice3; segments: 3)  (cost=0.00..431.00 rows=3 width=8)
                                                          Hash Key: t2.id2
                                                          ->  Seq Scan on choose_seqscan_t2 t2  (cost=0.00..431.00 rows=3 width=8)
                                                                Filter: (id1 < 10)
  Optimizer: Pivotal Optimizer (GPORCA)
 (24 rows)

 -- Similar, but use a real table. One possible plan for the subplan here would be to do the join
 -- first, and then filter the join result based on the correlation qual "t1.id1 = t2.id1". But
 -- the planner isn't smart enough to generate that plan, currently.
 create table choose_seqscan_t3(id1 int,id2 int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id1' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 create index bidx3 on choose_seqscan_t3(id1);
 insert into choose_seqscan_t3 select i+1,i from generate_series(1,50)i;
 analyze choose_seqscan_t3;
 select t1.id1, (select count(*) from choose_seqscan_t3 t3, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t3.id1 = t2.id1) from choose_seqscan_t1 t1 where t1.id1 < 10;
  id1 | count
 -----+-------
    5 |     1
    6 |     1
    9 |     1
    2 |     1
    3 |     1
    4 |     1
    7 |     1
    8 |     1
 (8 rows)

 explain select t1.id1, (select count(*) from choose_seqscan_t3 t3, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t3.id1 = t2.id1) from choose_seqscan_t1 t1 where t1.id1 < 10;
                                                     QUERY PLAN
 -------------------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..868.01 rows=9 width=12)
    ->  Hash Left Join  (cost=0.00..868.01 rows=3 width=12)
          Hash Cond: (t1.id1 = t2.id1)
          ->  Seq Scan on choose_seqscan_t1 t1  (cost=0.00..431.00 rows=3 width=4)
                Filter: (id1 < 10)
          ->  Hash  (cost=437.00..437.00 rows=3 width=12)
                ->  GroupAggregate  (cost=0.00..437.00 rows=3 width=12)
                      Group Key: t2.id1
                      ->  Sort  (cost=0.00..437.00 rows=3 width=4)
                            Sort Key: t2.id1
                            ->  Hash Join  (cost=0.00..437.00 rows=3 width=4)
                                  Hash Cond: (t3.id1 = t2.id1)
                                  ->  Index Scan using bidx3 on choose_seqscan_t3 t3  (cost=0.00..6.00 rows=3 width=4)
                                        Index Cond: (id1 < 10)
                                  ->  Hash  (cost=431.00..431.00 rows=3 width=4)
                                        ->  Seq Scan on choose_seqscan_t2 t2  (cost=0.00..431.00 rows=3 width=4)
                                              Filter: (id1 < 10)
  Optimizer: Pivotal Optimizer (GPORCA)
 (18 rows)

 -- start_ignore
 drop table if exists choose_seqscan_t1;
 drop table if exists choose_seqscan_t2;
 -- end_ignore
 -- Given I have one replicated table
 create table choose_indexscan_t1(id1 int, id2 int);
 create table choose_indexscan_t2(id1 int, id2 int) distributed replicated;
 -- and it has data
 insert into choose_indexscan_t1 select i+1, i from generate_series(1,20)i;
 insert into choose_indexscan_t2 select i+1, i from generate_series(1,100)i;
 -- and the replicated table has an index on a column which is not the distribution key
 create index choose_indexscan_t2_idx on choose_indexscan_t2(id2);
 -- and the statistics reflect the newly inserted data
 analyze choose_indexscan_t1; analyze choose_indexscan_t2;
 -- making an indexscan cheaper with this GUC is only necessary with this small dataset
 -- if you insert more data, you can still ensure an indexscan is considered
 set random_page_cost=1;
 -- and I query the table with the index from inside a subquery which will be pulled up inside of a subquery that will stay a subplan
 select (select id1 from (select * from choose_indexscan_t2) foo where id2=choose_indexscan_t1.id2) from choose_indexscan_t1 order by id1;
  id1
 -----
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
 (20 rows)

 explain select (select id1 from (select * from choose_indexscan_t2) foo where id2=choose_indexscan_t1.id2) from choose_indexscan_t1;
                                     QUERY PLAN
 -----------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..1324176.46 rows=20 width=4)
    ->  Seq Scan on choose_indexscan_t1  (cost=0.00..1324176.46 rows=334 width=4)
          SubPlan 1
            ->  Seq Scan on choose_indexscan_t2  (cost=0.00..431.13 rows=1 width=4)
                  Filter: (id2 = choose_indexscan_t1.id2)
  Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
 (6 rows)

 -- then an indexscan is chosen because it is correct to do this on a replicated table since no motion is required
 -- Test that Motions are added when you mix replicated tables and catalog
 -- tables in the same query. A replicated table is available on all segments,
 -- but *not* on the QD node, so we need a motion for this, because the catalog
 -- table is scanned in the QD. (Catalog tables are present with same contents
 -- on all segments, too, so we could alternatively perform scan the catalog
 -- table oon one of the segments.)
 -- https://github.com/greenplum-db/gpdb/issues/8648
 create table mytables (tablename text, explanation text) distributed replicated;
 insert into mytables values ('pg_class', 'contains all relations');
 create index on mytables(tablename);
 select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
  relname  |      explanation
 ----------+------------------------
  pg_class | contains all relations
 (1 row)

 set enable_seqscan=off;
 explain select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
                                                      QUERY PLAN
 ---------------------------------------------------------------------------------------------------------------------
  Index Only Scan using pg_class_relname_nsp_index on pg_class c  (cost=0.15..10000000002.20 rows=1 width=64)
    Index Cond: (relname = 'pg_class'::name)
    SubPlan 1
      ->  Result  (cost=10000000000.00..10000000001.03 rows=1 width=32)
            Filter: (mt.tablename = (c.relname)::text)
            ->  Materialize  (cost=10000000000.00..10000000001.03 rows=1 width=32)
                  ->  Gather Motion 1:1  (slice1; segments: 1)  (cost=10000000000.00..10000000001.03 rows=1 width=32)
                        ->  Seq Scan on mytables mt  (cost=10000000000.00..10000000001.01 rows=1 width=32)
  Optimizer: Postgres query optimizer
 (9 rows)

 select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
  relname  |      explanation
 ----------+------------------------
  pg_class | contains all relations
 (1 row)

 reset enable_seqscan;
 -- start_ignore
 drop table if exists choose_indexscan_t1;
 drop table if exists choose_indexscan_t2;
 -- end_ignore
	--
	-- Test correlated subquery in subplan with motion chooses correct scan type
	--
	-- Given I have two distributed tables
	create table choose_seqscan_t1(id1 int,id2 int);
	create table choose_seqscan_t2(id1 int,id2 int);
	-- and they have some data
	insert into choose_seqscan_t1 select i+1,i from generate_series(1,50)i;
	insert into choose_seqscan_t2 select i+1,i from generate_series(1,50)i;
	-- and one of the tables has an index on a column which is not the distribution column
	create index bidx2 on choose_seqscan_t2(id2);
	-- and the statistics reflect the newly inserted data
	analyze choose_seqscan_t1; analyze choose_seqscan_t2;
	-- making an indexscan cheaper with this GUC is only necessary with this small dataset
	-- if you insert more data, you can still ensure an indexscan is considered
	set random_page_cost=1;
	set seq_page_cost=5;
	-- and I query the table with the index from inside a subquery which will be pulled up inside of a subquery that will stay a subplan
	select (select id1 from (select * from choose_seqscan_t2) foo where id2=choose_seqscan_t1.id2) from choose_seqscan_t1 order by id1;
	id1
	-----
	2
	3
	4
	5
	6
	7
	8
	9
	10
	11
	12
	13
	14
	15
	16
	17
	18
	19
	20
	21
	22
	23
	24
	25
	26
	27
	28
	29
	30
	31
	32
	33
	34
	35
	36
	37
	38
	39
	40
	41
	42
	43
	44
	45
	46
	47
	48
	49
	50
	51
	(50 rows)

	explain select (select id1 from (select * from choose_seqscan_t2) foo where id2=choose_seqscan_t1.id2) from choose_seqscan_t1;
	QUERY PLAN
	------------------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1324216.68 rows=50 width=4)
	-> Seq Scan on choose_seqscan_t1 (cost=0.00..1324216.68 rows=334 width=4)
	SubPlan 1
	-> Result (cost=0.00..431.17 rows=1 width=4)
	Filter: (choose_seqscan_t2.id2 = choose_seqscan_t1.id2)
	-> Materialize (cost=0.00..431.01 rows=50 width=8)
	-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..431.01 rows=50 width=8)
	-> Seq Scan on choose_seqscan_t2 (cost=0.00..431.00 rows=17 width=8)
	Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
	(9 rows)

	-- then, a sequential scan is chosen because I need a motion to move choose_seqscan_t2
	-- Index Scan can be used on quals that don't depend on the correlation vars, however.
	select t1.id1, (select count(*) from choose_seqscan_t2 t2 where t2.id1 = t1.id1 and t2.id2 = 1) from choose_seqscan_t1 t1 where t1.id1 < 10;
	id1 \| count
	-----+-------
	5 \| 0
	6 \| 0
	9 \| 0
	2 \| 1
	3 \| 0
	4 \| 0
	7 \| 0
	8 \| 0
	(8 rows)

	explain select t1.id1, (select count(*) from choose_seqscan_t2 t2 where t2.id1 = t1.id1 and t2.id2 = 1) from choose_seqscan_t1 t1 where t1.id1 < 10;
	QUERY PLAN
	-------------------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..437.00 rows=9 width=12)
	-> Hash Left Join (cost=0.00..437.00 rows=3 width=12)
	Hash Cond: (t1.id1 = t2.id1)
	-> Seq Scan on choose_seqscan_t1 t1 (cost=0.00..431.00 rows=3 width=4)
	Filter: (id1 < 10)
	-> Hash (cost=6.00..6.00 rows=1 width=12)
	-> GroupAggregate (cost=0.00..6.00 rows=1 width=12)
	Group Key: t2.id1
	-> Sort (cost=0.00..6.00 rows=1 width=4)
	Sort Key: t2.id1
	-> Index Scan using bidx2 on choose_seqscan_t2 t2 (cost=0.00..6.00 rows=1 width=4)
	Index Cond: (id2 = 1)
	Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
	(13 rows)

	-- Test using a join within the subplan. It could perhaps use an Nested Loop Join +
	-- Index Scan to do the join, but at the moment, the planner doesn't consider distributing
	-- the Function Scan.
	select t1.id1, (select count(*) from generate_series(1,5) g, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t2.id2 = g) from choose_seqscan_t1 t1 where t1.id1 < 10;
	id1 \| count
	-----+-------
	2 \| 1
	3 \| 1
	4 \| 1
	7 \| 0
	8 \| 0
	5 \| 1
	6 \| 1
	9 \| 0
	(8 rows)

	explain select t1.id1, (select count(*) from generate_series(1,5) g, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t2.id2 = g) from choose_seqscan_t1 t1 where t1.id1 < 10;
	QUERY PLAN
	------------------------------------------------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.12 rows=9 width=12)
	-> Hash Left Join (cost=0.00..862.12 rows=3 width=12)
	Hash Cond: (t1.id1 = t2.id1)
	-> Seq Scan on choose_seqscan_t1 t1 (cost=0.00..431.00 rows=3 width=4)
	Filter: (id1 < 10)
	-> Hash (cost=431.12..431.12 rows=3 width=12)
	-> Finalize GroupAggregate (cost=0.00..431.12 rows=3 width=12)
	Group Key: t2.id1
	-> Sort (cost=0.00..431.12 rows=3 width=12)
	Sort Key: t2.id1
	-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.12 rows=3 width=12)
	Hash Key: t2.id1
	-> Streaming Partial HashAggregate (cost=0.00..431.12 rows=3 width=12)
	Group Key: t2.id1
	-> Hash Join (cost=0.00..431.08 rows=334 width=4)
	Hash Cond: (generate_series.generate_series = t2.id2)
	-> Result (cost=0.00..0.01 rows=334 width=4)
	-> Function Scan on generate_series (cost=0.00..0.00 rows=334 width=4)
	-> Hash (cost=431.00..431.00 rows=3 width=8)
	-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=3 width=8)
	Hash Key: t2.id2
	-> Seq Scan on choose_seqscan_t2 t2 (cost=0.00..431.00 rows=3 width=8)
	Filter: (id1 < 10)
	Optimizer: Pivotal Optimizer (GPORCA)
	(24 rows)

	-- Similar, but use a real table. One possible plan for the subplan here would be to do the join
	-- first, and then filter the join result based on the correlation qual "t1.id1 = t2.id1". But
	-- the planner isn't smart enough to generate that plan, currently.
	create table choose_seqscan_t3(id1 int,id2 int);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id1' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	create index bidx3 on choose_seqscan_t3(id1);
	insert into choose_seqscan_t3 select i+1,i from generate_series(1,50)i;
	analyze choose_seqscan_t3;
	select t1.id1, (select count(*) from choose_seqscan_t3 t3, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t3.id1 = t2.id1) from choose_seqscan_t1 t1 where t1.id1 < 10;
	id1 \| count
	-----+-------
	5 \| 1
	6 \| 1
	9 \| 1
	2 \| 1
	3 \| 1
	4 \| 1
	7 \| 1
	8 \| 1
	(8 rows)

	explain select t1.id1, (select count(*) from choose_seqscan_t3 t3, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t3.id1 = t2.id1) from choose_seqscan_t1 t1 where t1.id1 < 10;
	QUERY PLAN
	-------------------------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..868.01 rows=9 width=12)
	-> Hash Left Join (cost=0.00..868.01 rows=3 width=12)
	Hash Cond: (t1.id1 = t2.id1)
	-> Seq Scan on choose_seqscan_t1 t1 (cost=0.00..431.00 rows=3 width=4)
	Filter: (id1 < 10)
	-> Hash (cost=437.00..437.00 rows=3 width=12)
	-> GroupAggregate (cost=0.00..437.00 rows=3 width=12)
	Group Key: t2.id1
	-> Sort (cost=0.00..437.00 rows=3 width=4)
	Sort Key: t2.id1
	-> Hash Join (cost=0.00..437.00 rows=3 width=4)
	Hash Cond: (t3.id1 = t2.id1)
	-> Index Scan using bidx3 on choose_seqscan_t3 t3 (cost=0.00..6.00 rows=3 width=4)
	Index Cond: (id1 < 10)
	-> Hash (cost=431.00..431.00 rows=3 width=4)
	-> Seq Scan on choose_seqscan_t2 t2 (cost=0.00..431.00 rows=3 width=4)
	Filter: (id1 < 10)
	Optimizer: Pivotal Optimizer (GPORCA)
	(18 rows)

	-- start_ignore
	drop table if exists choose_seqscan_t1;
	drop table if exists choose_seqscan_t2;
	-- end_ignore
	-- Given I have one replicated table
	create table choose_indexscan_t1(id1 int, id2 int);
	create table choose_indexscan_t2(id1 int, id2 int) distributed replicated;
	-- and it has data
	insert into choose_indexscan_t1 select i+1, i from generate_series(1,20)i;
	insert into choose_indexscan_t2 select i+1, i from generate_series(1,100)i;
	-- and the replicated table has an index on a column which is not the distribution key
	create index choose_indexscan_t2_idx on choose_indexscan_t2(id2);
	-- and the statistics reflect the newly inserted data
	analyze choose_indexscan_t1; analyze choose_indexscan_t2;
	-- making an indexscan cheaper with this GUC is only necessary with this small dataset
	-- if you insert more data, you can still ensure an indexscan is considered
	set random_page_cost=1;
	-- and I query the table with the index from inside a subquery which will be pulled up inside of a subquery that will stay a subplan
	select (select id1 from (select * from choose_indexscan_t2) foo where id2=choose_indexscan_t1.id2) from choose_indexscan_t1 order by id1;
	id1
	-----
	2
	3
	4
	5
	6
	7
	8
	9
	10
	11
	12
	13
	14
	15
	16
	17
	18
	19
	20
	21
	(20 rows)

	explain select (select id1 from (select * from choose_indexscan_t2) foo where id2=choose_indexscan_t1.id2) from choose_indexscan_t1;
	QUERY PLAN
	-----------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324176.46 rows=20 width=4)
	-> Seq Scan on choose_indexscan_t1 (cost=0.00..1324176.46 rows=334 width=4)
	SubPlan 1
	-> Seq Scan on choose_indexscan_t2 (cost=0.00..431.13 rows=1 width=4)
	Filter: (id2 = choose_indexscan_t1.id2)
	Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
	(6 rows)

	-- then an indexscan is chosen because it is correct to do this on a replicated table since no motion is required
	-- Test that Motions are added when you mix replicated tables and catalog
	-- tables in the same query. A replicated table is available on all segments,
	-- but not on the QD node, so we need a motion for this, because the catalog
	-- table is scanned in the QD. (Catalog tables are present with same contents
	-- on all segments, too, so we could alternatively perform scan the catalog
	-- table oon one of the segments.)
	-- https://github.com/greenplum-db/gpdb/issues/8648
	create table mytables (tablename text, explanation text) distributed replicated;
	insert into mytables values ('pg_class', 'contains all relations');
	create index on mytables(tablename);
	select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
	relname \| explanation
	----------+------------------------
	pg_class \| contains all relations
	(1 row)

	set enable_seqscan=off;
	explain select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
	QUERY PLAN
	---------------------------------------------------------------------------------------------------------------------
	Index Only Scan using pg_class_relname_nsp_index on pg_class c (cost=0.15..10000000002.20 rows=1 width=64)
	Index Cond: (relname = 'pg_class'::name)
	SubPlan 1
	-> Result (cost=10000000000.00..10000000001.03 rows=1 width=32)
	Filter: (mt.tablename = (c.relname)::text)
	-> Materialize (cost=10000000000.00..10000000001.03 rows=1 width=32)
	-> Gather Motion 1:1 (slice1; segments: 1) (cost=10000000000.00..10000000001.03 rows=1 width=32)
	-> Seq Scan on mytables mt (cost=10000000000.00..10000000001.01 rows=1 width=32)
	Optimizer: Postgres query optimizer
	(9 rows)

	select c.relname, (select explanation from mytables mt where mt.tablename=c.relname ) from pg_class c where relname = 'pg_class';
	relname \| explanation
	----------+------------------------
	pg_class \| contains all relations
	(1 row)

	reset enable_seqscan;
	-- start_ignore
	drop table if exists choose_indexscan_t1;
	drop table if exists choose_indexscan_t2;
	-- end_ignore