src/test/regress/expected/gp_index_costing_optimizer.out - cloudberry - Git at Google

 --
 -- Test Cases to check Index scan, Index Only scan,
 -- Dynamic Index scan & Dynamic Index only scan costing
 --
 ---------------------------------------------------------------------
 -- Scenario1: Verify impact of unused index column in predicate,
 -- which helps in selecting best index in a multiple index scenario.
 ---------------------------------------------------------------------
     -- Case A : Index - idx_ab, Columns used in predicate : a
     -- Case B : Index - idx_ab, Columns used in predicate : b
     -- In the above two cases, the index scan using idx_ab should be higher
     -- in CASE B, compared for CASE A, as the in Case-B, index column
     -- of higher significance is unused  i.e. col 'a'
 drop table if exists foo;
 NOTICE:  table "foo" does not exist, skipping
 create table foo (a int , b int, c int) distributed by (a);
 insert into foo select i,i,i from generate_series(1,5)i;
 -- Adding 1,1,1 in the table so that we have different values for NDV and table rows.
 insert into foo select 1,1,1*i/i from generate_series(1,5)i;
 -- 1.1 Test case for index scans
 CREATE INDEX idx_foo_ab ON foo USING btree(a,b);
 CREATE INDEX idx_foo_ba ON foo USING btree(b,a);
 analyze foo;
     -- index idx_foo_ab should be selected
 explain select * from foo where a=1;
                                  QUERY PLAN
 -----------------------------------------------------------------------------
  Gather Motion 1:1  (slice1; segments: 1)  (cost=0.00..6.00 rows=6 width=12)
    ->  Index Scan using idx_foo_ab on foo  (cost=0.00..6.00 rows=2 width=12)
          Index Cond: (a = 1)
  Optimizer: GPORCA
 (4 rows)

     -- Index idx_foo_ba should be selected
 explain select * from foo where b=1;
                                  QUERY PLAN
 -----------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..6.00 rows=6 width=12)
    ->  Index Scan using idx_foo_ba on foo  (cost=0.00..6.00 rows=2 width=12)
          Index Cond: (b = 1)
  Optimizer: GPORCA
 (4 rows)

 drop index idx_foo_ab;
 drop index idx_foo_ba;
 -- 1.2 Test case for index only scans
 CREATE INDEX idx_foo_abc ON foo USING btree(a,b,c);
 CREATE INDEX idx_foo_cba ON foo USING btree(c,b,a);
 vacuum analyze foo;
     --  Index idx_foo_abc should be selected
 explain select * from foo where a=1;
                                     QUERY PLAN
 -----------------------------------------------------------------------------------
  Gather Motion 1:1  (slice1; segments: 1)  (cost=0.00..6.00 rows=6 width=12)
    ->  Index Only Scan using idx_foo_abc on foo  (cost=0.00..6.00 rows=2 width=12)
          Index Cond: (a = 1)
  Optimizer: GPORCA
 (4 rows)

     --  Index idx_foo_cba should be selected
 explain select * from foo where c=1;
                                     QUERY PLAN
 -----------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..6.00 rows=6 width=12)
    ->  Index Only Scan using idx_foo_cba on foo  (cost=0.00..6.00 rows=2 width=12)
          Index Cond: (c = 1)
  Optimizer: GPORCA
 (4 rows)

 drop index idx_foo_abc;
 drop index idx_foo_cba;
 -- 1.3 Test case for dynamic index scans
 drop table if exists foo;
 drop table if exists bar_PT;
 NOTICE:  table "bar_pt" does not exist, skipping
 create table foo (a int , b int, c int) distributed by (a);
 insert into foo select i,i,i from generate_series(1,5)i;
 analyze foo;
     -- Partitioned Table
 create table bar_PT (a int, b int, c int) partition by range(a) (start (1) inclusive end (12) every (2)) distributed by (a);
 insert into bar_PT select i,i,i from generate_series(1,11)i;
 CREATE INDEX idx_bar_PT_ab ON bar_PT USING btree(a,b);
 CREATE INDEX idx_bar_PT_ba ON bar_PT USING btree(b,a);
 analyze bar_PT;
     --  Index idx_bar_PT_ab should be selected
 explain select * from bar_PT join foo on bar_PT.a =foo.a;
                                           QUERY PLAN
 -----------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..461.00 rows=5 width=24)
    ->  Nested Loop  (cost=0.00..461.00 rows=2 width=24)
          Join Filter: true
          ->  Seq Scan on foo  (cost=0.00..431.00 rows=2 width=12)
          ->  Dynamic Index Scan on idx_bar_pt_ab on bar_pt  (cost=0.00..30.00 rows=1 width=12)
                Index Cond: (a = foo.a)
                Number of partitions to scan: 6 (out of 6)
  Optimizer: GPORCA
 (8 rows)

     --  Index idx_bar_PT_ba should be selected
 explain select * from bar_PT join foo on bar_PT.b =foo.b;
                                           QUERY PLAN
 -----------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..461.00 rows=5 width=24)
    ->  Nested Loop  (cost=0.00..461.00 rows=2 width=24)
          Join Filter: true
          ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.00 rows=5 width=12)
                ->  Seq Scan on foo  (cost=0.00..431.00 rows=2 width=12)
          ->  Dynamic Index Scan on idx_bar_pt_ba on bar_pt  (cost=0.00..30.00 rows=1 width=12)
                Index Cond: (b = foo.b)
                Number of partitions to scan: 6 (out of 6)
  Optimizer: GPORCA
 (9 rows)

 drop index idx_bar_PT_ab;
 drop index idx_bar_PT_ba;
 -- 1.4 Test case for dynamic index only scans
 CREATE INDEX idx_bar_PT_abc ON bar_PT USING btree(a,b,c);
 CREATE INDEX idx_bar_PT_cba ON bar_PT USING btree(c,b,a);
 vacuum analyze bar_PT;
     --  Index idx_bar_PT_abc should be selected
 explain select * from bar_PT join foo on bar_PT.a =foo.a;
                                              QUERY PLAN
 -----------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..461.00 rows=5 width=24)
    ->  Nested Loop  (cost=0.00..461.00 rows=2 width=24)
          Join Filter: true
          ->  Seq Scan on foo  (cost=0.00..431.00 rows=2 width=12)
          ->  Dynamic Index Only Scan on idx_bar_pt_abc on bar_pt  (cost=0.00..30.00 rows=1 width=12)
                Index Cond: (a = foo.a)
                Number of partitions to scan: 6 (out of 6)
  Optimizer: GPORCA
 (8 rows)

     --  Index idx_bar_PT_cba should be selected
 explain select * from bar_PT join foo on bar_PT.c =foo.c;
                                              QUERY PLAN
 -----------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..461.00 rows=5 width=24)
    ->  Nested Loop  (cost=0.00..461.00 rows=2 width=24)
          Join Filter: true
          ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.00 rows=5 width=12)
                ->  Seq Scan on foo  (cost=0.00..431.00 rows=2 width=12)
          ->  Dynamic Index Only Scan on idx_bar_pt_cba on bar_pt  (cost=0.00..30.00 rows=1 width=12)
                Index Cond: (c = foo.c)
                Number of partitions to scan: 6 (out of 6)
  Optimizer: GPORCA
 (9 rows)

 drop table if exists foo;
 drop table if exists bar_PT;
 ------------------------------------------------
 -- Scenario2: Unindexed predicate column in index.
 ------------------------------------------------
 -- If the index does not cover all the columns of the predicate, than a scan cost
 -- using it should be higher compared to an index which covers all the predicate columns.
 drop table if exists foo;
 NOTICE:  table "foo" does not exist, skipping
 create table foo (a int , b int, c int) distributed by (a);
 insert into foo select i,i,i from generate_series(1,5)i;
 -- Adding 1,1,1 in the table so that we have different values for NDV and table rows.
 insert into foo select 1,1,1*i/i from generate_series(1,5)i;
 -- 2.1 Test case for index scans
 CREATE INDEX idx_foo_a ON foo USING btree(a);
 CREATE INDEX idx_foo_abc ON foo USING btree(a,b,c);
 vacuum analyze foo;
 -- Query1 - Index idx_foo_a should be selected.
 explain select * from foo where a=1;
                                  QUERY PLAN
 -----------------------------------------------------------------------------
  Gather Motion 1:1  (slice1; segments: 1)  (cost=0.00..6.00 rows=6 width=12)
    ->  Index Scan using idx_foo_a on foo  (cost=0.00..6.00 rows=2 width=12)
          Index Cond: (a = 1)
  Optimizer: GPORCA
 (4 rows)

 -- Query2 - Index idx_foo_abc should be selected.
 explain select * from foo where a=1 and b=1 and c=1;
                                     QUERY PLAN
 -----------------------------------------------------------------------------------
  Gather Motion 1:1  (slice1; segments: 1)  (cost=0.00..6.00 rows=6 width=12)
    ->  Index Only Scan using idx_foo_abc on foo  (cost=0.00..6.00 rows=2 width=12)
          Index Cond: ((a = 1) AND (b = 1) AND (c = 1))
  Optimizer: GPORCA
 (4 rows)

 drop index idx_foo_a;
 drop index idx_foo_abc;
 -- 2.2 Test case for dynamic index scans
 drop table if exists foo;
 drop table if exists bar_PT;
 NOTICE:  table "bar_pt" does not exist, skipping
 create table foo (a int , b int, c int) distributed by (a);
 insert into foo select i,i,i from generate_series(1,5)i;
 analyze foo;
     -- Partitioned Table
 create table bar_PT (a int, b int, c int) partition by range(a) (start (1) inclusive end (12) every (2)) distributed by (a);
 insert into bar_PT select i,i,i from generate_series(1,11)i;
 CREATE INDEX idx_bar_PT_a ON bar_PT USING btree(a);
 CREATE INDEX idx_bar_PT_abc ON bar_PT USING btree(a,b,c);
 vacuum analyze bar_PT;
 -- Query1: Index idx_bar_PT_a should be selected.
 explain select * from bar_PT join foo on bar_PT.a =foo.a ;
                                           QUERY PLAN
 ----------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..461.00 rows=5 width=24)
    ->  Nested Loop  (cost=0.00..461.00 rows=2 width=24)
          Join Filter: true
          ->  Seq Scan on foo  (cost=0.00..431.00 rows=2 width=12)
          ->  Dynamic Index Scan on idx_bar_pt_a on bar_pt  (cost=0.00..30.00 rows=1 width=12)
                Index Cond: (a = foo.a)
                Number of partitions to scan: 6 (out of 6)
  Optimizer: GPORCA
 (8 rows)

 -- Query2 : Index idx_bar_PT_abc should be selected.
 explain select * from bar_PT join foo on bar_PT.a =foo.a and bar_PT.b =foo.b and bar_PT.c =foo.c;
                                              QUERY PLAN
 -----------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..461.00 rows=5 width=24)
    ->  Nested Loop  (cost=0.00..461.00 rows=2 width=24)
          Join Filter: true
          ->  Seq Scan on foo  (cost=0.00..431.00 rows=2 width=12)
          ->  Dynamic Index Only Scan on idx_bar_pt_abc on bar_pt  (cost=0.00..30.00 rows=1 width=12)
                Index Cond: ((a = foo.a) AND (b = foo.b) AND (c = foo.c))
                Number of partitions to scan: 6 (out of 6)
  Optimizer: GPORCA
 (8 rows)

 drop table if exists foo;
 drop table if exists bar_PT;
	--
	-- Test Cases to check Index scan, Index Only scan,
	-- Dynamic Index scan & Dynamic Index only scan costing
	--
	---------------------------------------------------------------------
	-- Scenario1: Verify impact of unused index column in predicate,
	-- which helps in selecting best index in a multiple index scenario.
	---------------------------------------------------------------------
	-- Case A : Index - idx_ab, Columns used in predicate : a
	-- Case B : Index - idx_ab, Columns used in predicate : b
	-- In the above two cases, the index scan using idx_ab should be higher
	-- in CASE B, compared for CASE A, as the in Case-B, index column
	-- of higher significance is unused i.e. col 'a'
	drop table if exists foo;
	NOTICE: table "foo" does not exist, skipping
	create table foo (a int , b int, c int) distributed by (a);
	insert into foo select i,i,i from generate_series(1,5)i;
	-- Adding 1,1,1 in the table so that we have different values for NDV and table rows.
	insert into foo select 1,1,1*i/i from generate_series(1,5)i;
	-- 1.1 Test case for index scans
	CREATE INDEX idx_foo_ab ON foo USING btree(a,b);
	CREATE INDEX idx_foo_ba ON foo USING btree(b,a);
	analyze foo;
	-- index idx_foo_ab should be selected
	explain select * from foo where a=1;
	QUERY PLAN
	-----------------------------------------------------------------------------
	Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..6.00 rows=6 width=12)
	-> Index Scan using idx_foo_ab on foo (cost=0.00..6.00 rows=2 width=12)
	Index Cond: (a = 1)
	Optimizer: GPORCA
	(4 rows)

	-- Index idx_foo_ba should be selected
	explain select * from foo where b=1;
	QUERY PLAN
	-----------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..6.00 rows=6 width=12)
	-> Index Scan using idx_foo_ba on foo (cost=0.00..6.00 rows=2 width=12)
	Index Cond: (b = 1)
	Optimizer: GPORCA
	(4 rows)

	drop index idx_foo_ab;
	drop index idx_foo_ba;
	-- 1.2 Test case for index only scans
	CREATE INDEX idx_foo_abc ON foo USING btree(a,b,c);
	CREATE INDEX idx_foo_cba ON foo USING btree(c,b,a);
	vacuum analyze foo;
	-- Index idx_foo_abc should be selected
	explain select * from foo where a=1;
	QUERY PLAN
	-----------------------------------------------------------------------------------
	Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..6.00 rows=6 width=12)
	-> Index Only Scan using idx_foo_abc on foo (cost=0.00..6.00 rows=2 width=12)
	Index Cond: (a = 1)
	Optimizer: GPORCA
	(4 rows)

	-- Index idx_foo_cba should be selected
	explain select * from foo where c=1;
	QUERY PLAN
	-----------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..6.00 rows=6 width=12)
	-> Index Only Scan using idx_foo_cba on foo (cost=0.00..6.00 rows=2 width=12)
	Index Cond: (c = 1)
	Optimizer: GPORCA
	(4 rows)

	drop index idx_foo_abc;
	drop index idx_foo_cba;
	-- 1.3 Test case for dynamic index scans
	drop table if exists foo;
	drop table if exists bar_PT;
	NOTICE: table "bar_pt" does not exist, skipping
	create table foo (a int , b int, c int) distributed by (a);
	insert into foo select i,i,i from generate_series(1,5)i;
	analyze foo;
	-- Partitioned Table
	create table bar_PT (a int, b int, c int) partition by range(a) (start (1) inclusive end (12) every (2)) distributed by (a);
	insert into bar_PT select i,i,i from generate_series(1,11)i;
	CREATE INDEX idx_bar_PT_ab ON bar_PT USING btree(a,b);
	CREATE INDEX idx_bar_PT_ba ON bar_PT USING btree(b,a);
	analyze bar_PT;
	-- Index idx_bar_PT_ab should be selected
	explain select * from bar_PT join foo on bar_PT.a =foo.a;
	QUERY PLAN
	-----------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..461.00 rows=5 width=24)
	-> Nested Loop (cost=0.00..461.00 rows=2 width=24)
	Join Filter: true
	-> Seq Scan on foo (cost=0.00..431.00 rows=2 width=12)
	-> Dynamic Index Scan on idx_bar_pt_ab on bar_pt (cost=0.00..30.00 rows=1 width=12)
	Index Cond: (a = foo.a)
	Number of partitions to scan: 6 (out of 6)
	Optimizer: GPORCA
	(8 rows)

	-- Index idx_bar_PT_ba should be selected
	explain select * from bar_PT join foo on bar_PT.b =foo.b;
	QUERY PLAN
	-----------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..461.00 rows=5 width=24)
	-> Nested Loop (cost=0.00..461.00 rows=2 width=24)
	Join Filter: true
	-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=5 width=12)
	-> Seq Scan on foo (cost=0.00..431.00 rows=2 width=12)
	-> Dynamic Index Scan on idx_bar_pt_ba on bar_pt (cost=0.00..30.00 rows=1 width=12)
	Index Cond: (b = foo.b)
	Number of partitions to scan: 6 (out of 6)
	Optimizer: GPORCA
	(9 rows)

	drop index idx_bar_PT_ab;
	drop index idx_bar_PT_ba;
	-- 1.4 Test case for dynamic index only scans
	CREATE INDEX idx_bar_PT_abc ON bar_PT USING btree(a,b,c);
	CREATE INDEX idx_bar_PT_cba ON bar_PT USING btree(c,b,a);
	vacuum analyze bar_PT;
	-- Index idx_bar_PT_abc should be selected
	explain select * from bar_PT join foo on bar_PT.a =foo.a;
	QUERY PLAN
	-----------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..461.00 rows=5 width=24)
	-> Nested Loop (cost=0.00..461.00 rows=2 width=24)
	Join Filter: true
	-> Seq Scan on foo (cost=0.00..431.00 rows=2 width=12)
	-> Dynamic Index Only Scan on idx_bar_pt_abc on bar_pt (cost=0.00..30.00 rows=1 width=12)
	Index Cond: (a = foo.a)
	Number of partitions to scan: 6 (out of 6)
	Optimizer: GPORCA
	(8 rows)

	-- Index idx_bar_PT_cba should be selected
	explain select * from bar_PT join foo on bar_PT.c =foo.c;
	QUERY PLAN
	-----------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..461.00 rows=5 width=24)
	-> Nested Loop (cost=0.00..461.00 rows=2 width=24)
	Join Filter: true
	-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=5 width=12)
	-> Seq Scan on foo (cost=0.00..431.00 rows=2 width=12)
	-> Dynamic Index Only Scan on idx_bar_pt_cba on bar_pt (cost=0.00..30.00 rows=1 width=12)
	Index Cond: (c = foo.c)
	Number of partitions to scan: 6 (out of 6)
	Optimizer: GPORCA
	(9 rows)

	drop table if exists foo;
	drop table if exists bar_PT;
	------------------------------------------------
	-- Scenario2: Unindexed predicate column in index.
	------------------------------------------------
	-- If the index does not cover all the columns of the predicate, than a scan cost
	-- using it should be higher compared to an index which covers all the predicate columns.
	drop table if exists foo;
	NOTICE: table "foo" does not exist, skipping
	create table foo (a int , b int, c int) distributed by (a);
	insert into foo select i,i,i from generate_series(1,5)i;
	-- Adding 1,1,1 in the table so that we have different values for NDV and table rows.
	insert into foo select 1,1,1*i/i from generate_series(1,5)i;
	-- 2.1 Test case for index scans
	CREATE INDEX idx_foo_a ON foo USING btree(a);
	CREATE INDEX idx_foo_abc ON foo USING btree(a,b,c);
	vacuum analyze foo;
	-- Query1 - Index idx_foo_a should be selected.
	explain select * from foo where a=1;
	QUERY PLAN
	-----------------------------------------------------------------------------
	Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..6.00 rows=6 width=12)
	-> Index Scan using idx_foo_a on foo (cost=0.00..6.00 rows=2 width=12)
	Index Cond: (a = 1)
	Optimizer: GPORCA
	(4 rows)

	-- Query2 - Index idx_foo_abc should be selected.
	explain select * from foo where a=1 and b=1 and c=1;
	QUERY PLAN
	-----------------------------------------------------------------------------------
	Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..6.00 rows=6 width=12)
	-> Index Only Scan using idx_foo_abc on foo (cost=0.00..6.00 rows=2 width=12)
	Index Cond: ((a = 1) AND (b = 1) AND (c = 1))
	Optimizer: GPORCA
	(4 rows)

	drop index idx_foo_a;
	drop index idx_foo_abc;
	-- 2.2 Test case for dynamic index scans
	drop table if exists foo;
	drop table if exists bar_PT;
	NOTICE: table "bar_pt" does not exist, skipping
	create table foo (a int , b int, c int) distributed by (a);
	insert into foo select i,i,i from generate_series(1,5)i;
	analyze foo;
	-- Partitioned Table
	create table bar_PT (a int, b int, c int) partition by range(a) (start (1) inclusive end (12) every (2)) distributed by (a);
	insert into bar_PT select i,i,i from generate_series(1,11)i;
	CREATE INDEX idx_bar_PT_a ON bar_PT USING btree(a);
	CREATE INDEX idx_bar_PT_abc ON bar_PT USING btree(a,b,c);
	vacuum analyze bar_PT;
	-- Query1: Index idx_bar_PT_a should be selected.
	explain select * from bar_PT join foo on bar_PT.a =foo.a ;
	QUERY PLAN
	----------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..461.00 rows=5 width=24)
	-> Nested Loop (cost=0.00..461.00 rows=2 width=24)
	Join Filter: true
	-> Seq Scan on foo (cost=0.00..431.00 rows=2 width=12)
	-> Dynamic Index Scan on idx_bar_pt_a on bar_pt (cost=0.00..30.00 rows=1 width=12)
	Index Cond: (a = foo.a)
	Number of partitions to scan: 6 (out of 6)
	Optimizer: GPORCA
	(8 rows)

	-- Query2 : Index idx_bar_PT_abc should be selected.
	explain select * from bar_PT join foo on bar_PT.a =foo.a and bar_PT.b =foo.b and bar_PT.c =foo.c;
	QUERY PLAN
	-----------------------------------------------------------------------------------------------------
	Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..461.00 rows=5 width=24)
	-> Nested Loop (cost=0.00..461.00 rows=2 width=24)
	Join Filter: true
	-> Seq Scan on foo (cost=0.00..431.00 rows=2 width=12)
	-> Dynamic Index Only Scan on idx_bar_pt_abc on bar_pt (cost=0.00..30.00 rows=1 width=12)
	Index Cond: ((a = foo.a) AND (b = foo.b) AND (c = foo.c))
	Number of partitions to scan: 6 (out of 6)
	Optimizer: GPORCA
	(8 rows)

	drop table if exists foo;
	drop table if exists bar_PT;