| create extension if not exists gp_inject_fault; |
| SET enable_seqscan = OFF; |
| SET enable_indexscan = ON; |
| SET enable_bitmapscan = ON; |
| create table bm_test (i int, t text); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into bm_test select i % 10, (i % 10)::text from generate_series(1, 100) i; |
| create index bm_test_idx on bm_test using bitmap (i); |
| select count(*) from bm_test where i=1; |
| count |
| ------- |
| 10 |
| (1 row) |
| |
| select count(*) from bm_test where i in(1, 3); |
| count |
| ------- |
| 20 |
| (1 row) |
| |
| -- this sql should confirm that the tuple with i=1 |
| -- and the tuple with i=5 are on different segments |
| select count(distinct gp_segment_id) from bm_test where i in (1, 5); |
| count |
| ------- |
| 2 |
| (1 row) |
| |
| select count(*) from bm_test where i in(1, 5); |
| count |
| ------- |
| 20 |
| (1 row) |
| |
| select * from bm_test where i > 10; |
| i | t |
| ---+--- |
| (0 rows) |
| |
| reindex index bm_test_idx; |
| select count(*) from bm_test where i in(1, 3); |
| count |
| ------- |
| 20 |
| (1 row) |
| |
| drop index bm_test_idx; |
| create index bm_test_coll_idx on bm_test using bitmap(i, t COLLATE "C"); |
| select * from bm_test where i=5 and t='5'; |
| i | t |
| ---+--- |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| (10 rows) |
| |
| select * from bm_test where i=5 or t='6'; |
| i | t |
| ---+--- |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 6 | 6 |
| 6 | 6 |
| 6 | 6 |
| 6 | 6 |
| 6 | 6 |
| 6 | 6 |
| 6 | 6 |
| 6 | 6 |
| 6 | 6 |
| 6 | 6 |
| (20 rows) |
| |
| -- this sql should confirm that the tuple with i=5 |
| -- and the tuple with t='1' are on different segments |
| select count(distinct gp_segment_id) from bm_test where i=5 or t='1'; |
| count |
| ------- |
| 2 |
| (1 row) |
| |
| select * from bm_test where i=5 or t='1'; |
| i | t |
| ---+--- |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 1 | 1 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| 5 | 5 |
| (20 rows) |
| |
| select * from bm_test where i between 1 and 10 and i::text = t; |
| i | t |
| ---+--- |
| 2 | 2 |
| 4 | 4 |
| 6 | 6 |
| 8 | 8 |
| 2 | 2 |
| 4 | 4 |
| 6 | 6 |
| 8 | 8 |
| 2 | 2 |
| 4 | 4 |
| 6 | 6 |
| 8 | 8 |
| 2 | 2 |
| 4 | 4 |
| 6 | 6 |
| 8 | 8 |
| 2 | 2 |
| 4 | 4 |
| 6 | 6 |
| 8 | 8 |
| 2 | 2 |
| 4 | 4 |
| 6 | 6 |
| 8 | 8 |
| 2 | 2 |
| 4 | 4 |
| 6 | 6 |
| 8 | 8 |
| 2 | 2 |
| 4 | 4 |
| 6 | 6 |
| 8 | 8 |
| 2 | 2 |
| 4 | 4 |
| 6 | 6 |
| 8 | 8 |
| 2 | 2 |
| 4 | 4 |
| 6 | 6 |
| 8 | 8 |
| 1 | 1 |
| 3 | 3 |
| 5 | 5 |
| 7 | 7 |
| 9 | 9 |
| 1 | 1 |
| 3 | 3 |
| 5 | 5 |
| 7 | 7 |
| 9 | 9 |
| 1 | 1 |
| 3 | 3 |
| 5 | 5 |
| 7 | 7 |
| 9 | 9 |
| 1 | 1 |
| 3 | 3 |
| 5 | 5 |
| 7 | 7 |
| 9 | 9 |
| 1 | 1 |
| 3 | 3 |
| 5 | 5 |
| 7 | 7 |
| 9 | 9 |
| 1 | 1 |
| 3 | 3 |
| 5 | 5 |
| 7 | 7 |
| 9 | 9 |
| 1 | 1 |
| 3 | 3 |
| 5 | 5 |
| 7 | 7 |
| 9 | 9 |
| 1 | 1 |
| 3 | 3 |
| 5 | 5 |
| 7 | 7 |
| 9 | 9 |
| 1 | 1 |
| 3 | 3 |
| 5 | 5 |
| 7 | 7 |
| 9 | 9 |
| 1 | 1 |
| 3 | 3 |
| 5 | 5 |
| 7 | 7 |
| 9 | 9 |
| (90 rows) |
| |
| drop table bm_test; |
| -- test a bunch of different data types |
| create table bm_test (i2 int2, i4 int4, i8 int8, f4 float4, f8 float8, |
| n numeric(10, 3), t1 varchar(3), t2 char(3), t3 text, a int[2], |
| ip inet, b bytea, t timestamp, d date, g bool); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i2' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into bm_test values(1, 1, 1, 1.0, 1.0, 1000.333, '1', '1', '1', |
| array[1, 3], '127.0.0.1', E'\001', '2007-01-01 01:01:01', |
| '2007-01-01', 't'); |
| insert into bm_test values(2, 2, 2, 2.0, 2.0, 2000.333, '2', '2', 'foo', |
| array[2, 6], '127.0.0.2', E'\002', '2007-01-02 01:01:01', |
| '2007-01-02', 'f'); |
| insert into bm_test default values; -- test nulls |
| create index bm_i2_idx on bm_test using bitmap(i2); |
| create index bm_i4_idx on bm_test using bitmap(i4); |
| create index bm_i8_idx on bm_test using bitmap(i8); |
| create index bm_f4_idx on bm_test using bitmap(f4); |
| create index bm_f8_idx on bm_test using bitmap(f8); |
| create index bm_n_idx on bm_test using bitmap(n); |
| create index bm_t1_idx on bm_test using bitmap(t1); |
| create index bm_t2_idx on bm_test using bitmap(t2); |
| create index bm_t3_idx on bm_test using bitmap(t3); |
| create index bm_a_idx on bm_test using bitmap(a); |
| create index bm_ip_idx on bm_test using bitmap(ip); |
| create index bm_b_idx on bm_test using bitmap(b); |
| create index bm_t_idx on bm_test using bitmap(t); |
| create index bm_d_idx on bm_test using bitmap(d); |
| create index bm_g_idx on bm_test using bitmap(g); |
| create index bm_t3_upper_idx on bm_test using bitmap(upper(t3)); |
| create index bm_n_null_idx on bm_test using bitmap(n) WHERE n ISNULL; |
| -- Try some cross type stuff |
| select a.t from bm_test a, bm_test b where a.i2 = b.i2; |
| t |
| -------------------------- |
| Tue Jan 02 01:01:01 2007 |
| Mon Jan 01 01:01:01 2007 |
| (2 rows) |
| |
| select a.t from bm_test a, bm_test b where a.i2 = b.i4; |
| t |
| -------------------------- |
| Mon Jan 01 01:01:01 2007 |
| Tue Jan 02 01:01:01 2007 |
| (2 rows) |
| |
| select a.t from bm_test a, bm_test b where a.i2 = b.i8; |
| t |
| -------------------------- |
| Mon Jan 01 01:01:01 2007 |
| Tue Jan 02 01:01:01 2007 |
| (2 rows) |
| |
| select a.t from bm_test a, bm_test b where b.f4 = a.f8 and a.f8 = '2.0'; |
| t |
| -------------------------- |
| Tue Jan 02 01:01:01 2007 |
| (1 row) |
| |
| -- some range queries |
| select a.t from bm_test a, bm_test b where a.n < b.n; |
| t |
| -------------------------- |
| Mon Jan 01 01:01:01 2007 |
| (1 row) |
| |
| select a.t from bm_test a, bm_test b where a.ip < b.ip; |
| t |
| -------------------------- |
| Mon Jan 01 01:01:01 2007 |
| (1 row) |
| |
| -- or queries |
| select a.t from bm_test a, bm_test b where a.ip=b.ip OR a.b = b.b; |
| t |
| -------------------------- |
| Mon Jan 01 01:01:01 2007 |
| Tue Jan 02 01:01:01 2007 |
| (2 rows) |
| |
| -- and |
| select a.t from bm_test a, bm_test b where a.ip=b.ip and a.b = b.b and a.i2=1; |
| t |
| -------------------------- |
| Mon Jan 01 01:01:01 2007 |
| (1 row) |
| |
| -- subquery |
| select a.t from bm_test a where d in(select d from bm_test b where a.g=b.g); |
| t |
| -------------------------- |
| Mon Jan 01 01:01:01 2007 |
| Tue Jan 02 01:01:01 2007 |
| (2 rows) |
| |
| -- functional and predicate indexes |
| select t from bm_test where upper(t3) = 'FOO'; |
| t |
| -------------------------- |
| Tue Jan 02 01:01:01 2007 |
| (1 row) |
| |
| select t from bm_test where n ISNULL; |
| t |
| --- |
| |
| (1 row) |
| |
| -- test updates |
| update bm_test set i4 = 3; |
| -- should return nothing |
| select * from bm_test where i4 = 1; |
| i2 | i4 | i8 | f4 | f8 | n | t1 | t2 | t3 | a | ip | b | t | d | g |
| ----+----+----+----+----+---+----+----+----+---+----+---+---+---+--- |
| (0 rows) |
| |
| -- should return all |
| select * from bm_test where i4=3; |
| i2 | i4 | i8 | f4 | f8 | n | t1 | t2 | t3 | a | ip | b | t | d | g |
| ----+----+----+----+----+----------+----+-----+-----+-------+-----------+------+--------------------------+------------+--- |
| 1 | 3 | 1 | 1 | 1 | 1000.333 | 1 | 1 | 1 | {1,3} | 127.0.0.1 | \x01 | Mon Jan 01 01:01:01 2007 | 01-01-2007 | t |
| | 3 | | | | | | | | | | | | | |
| 2 | 3 | 2 | 2 | 2 | 2000.333 | 2 | 2 | foo | {2,6} | 127.0.0.2 | \x02 | Tue Jan 02 01:01:01 2007 | 01-02-2007 | f |
| (3 rows) |
| |
| -- should return one row |
| select * from bm_test where i2=1; |
| i2 | i4 | i8 | f4 | f8 | n | t1 | t2 | t3 | a | ip | b | t | d | g |
| ----+----+----+----+----+----------+----+-----+----+-------+-----------+------+--------------------------+------------+--- |
| 1 | 3 | 1 | 1 | 1 | 1000.333 | 1 | 1 | 1 | {1,3} | 127.0.0.1 | \x01 | Mon Jan 01 01:01:01 2007 | 01-01-2007 | t |
| (1 row) |
| |
| -- test splitting of words |
| -- We distribute by k and only insert a single distinct value in that |
| -- field so that we can be guaranteed of behaviour. We're not testing |
| -- the parallel mechanism here so it's fine to harass a single backend |
| create table bm_test2 (i int, j int, k int) distributed by (k); |
| create index bm_test2_i_idx on bm_test2 using bitmap(i); |
| insert into bm_test2 select 1, |
| case when (i % (16 * 16 + 8)) = 0 then 2 else 1 end, 1 |
| from generate_series(1, 16 * 16 * 16) i; |
| select count(*) from bm_test2 where i = 1; |
| count |
| ------- |
| 4096 |
| (1 row) |
| |
| select count(*) from bm_test2 where j = 2; |
| count |
| ------- |
| 15 |
| (1 row) |
| |
| -- break some compressed words |
| update bm_test2 set i = 2 where j = 2; |
| select count(*) from bm_test2 where i = 1; |
| count |
| ------- |
| 4081 |
| (1 row) |
| |
| select count(*) from bm_test2 where i = 2; |
| count |
| ------- |
| 15 |
| (1 row) |
| |
| update bm_test2 set i = 3 where i = 1; |
| select count(*) from bm_test2 where i = 1; |
| count |
| ------- |
| 0 |
| (1 row) |
| |
| select count(*) from bm_test2 where i = 2; |
| count |
| ------- |
| 15 |
| (1 row) |
| |
| select count(*) from bm_test2 where i = 3; |
| count |
| ------- |
| 4081 |
| (1 row) |
| |
| -- now try and break a whole page |
| -- bitmap words are 16 bits so, with no compression we get about |
| -- 16500 words per 32K page. So, what we want to do is, insert |
| -- 8250 uncompressed words, then a compressed word, then more uncompressed |
| -- words until the page is full. After this, we can break the compressed word |
| -- and there by test the word spliting system |
| create table bm_test3 (i int, j int, k int) distributed by (k); |
| create index bm_test3_i_idx on bm_test3 using bitmap(i); |
| insert into bm_test3 select i, 1, 1 from |
| generate_series(1, 8250 * 8) g, generate_series(1, 2) i; |
| insert into bm_test3 |
| select 17, 1, 1 from generate_series(1, 16 * 16) i; |
| insert into bm_test3 values(17, 2, 1); |
| insert into bm_test3 |
| select 17, 1, 1 from generate_series(1, 16 * 16) i; |
| insert into bm_test3 select i, 1, 1 from |
| generate_series(1, 8250 * 8) g, generate_series(1, 2) i; |
| select count(*) from bm_test3 where i = 1; |
| count |
| -------- |
| 132000 |
| (1 row) |
| |
| select count(*) from bm_test3 where i = 17; |
| count |
| ------- |
| 513 |
| (1 row) |
| |
| select count(*) from bm_test3 where i = 17 and j = 2; |
| count |
| ------- |
| 1 |
| (1 row) |
| |
| update bm_test3 set i = 18 where i = 17 and j = 2; |
| select count(*) from bm_test3 where i = 1; |
| count |
| -------- |
| 132000 |
| (1 row) |
| |
| select count(*) from bm_test3 where i = 2; |
| count |
| -------- |
| 132000 |
| (1 row) |
| |
| select count(*) from bm_test3 where i = 17; |
| count |
| ------- |
| 512 |
| (1 row) |
| |
| select count(*) from bm_test3 where i = 18; |
| count |
| ------- |
| 1 |
| (1 row) |
| |
| drop table bm_test; |
| drop table bm_test2; |
| drop table bm_test3; |
| create table bm_test (i int, j int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into bm_test values (0, 0), (0, 0), (0, 1), (1,0), (1,0), (1,1); |
| analyze bm_test; |
| create index bm_test_j on bm_test using bitmap(j); |
| delete from bm_test where j =1; |
| vacuum bm_test; |
| insert into bm_test values (0, 0), (1,0); |
| set enable_seqscan=off; |
| set enable_bitmapscan=off; |
| set optimizer_enable_bitmapscan=off; |
| -- start_ignore |
| -- Known_opt_diff: MPP-19808 |
| -- end_ignore |
| explain select * from bm_test where j = 1; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2.03 rows=1 width=8) |
| -> Seq Scan on bm_test (cost=0.00..1.03 rows=1 width=8) |
| Filter: j = 1 |
| Settings: enable_bitmapscan=off; enable_indexscan=on; enable_seqscan=off; optimizer=on |
| (4 rows) |
| |
| select * from bm_test where j = 1; |
| i | j |
| ---+--- |
| (0 rows) |
| |
| drop table bm_test; |
| -- MPP-3232 |
| create table bm_test (i int,j int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into bm_test values (1, 1), (1, 2); |
| create index bm_test_j on bm_test using bitmap(j); |
| update bm_test set j=20 where j=1; |
| vacuum bm_test; |
| drop table bm_test; |
| -- unique index with null value tests |
| drop table if exists ijk; |
| NOTICE: table "ijk" does not exist, skipping |
| create table ijk(i int, j int, k int) distributed by (i); |
| insert into ijk values (1, 1, 3); |
| insert into ijk values (1, 2, 4); |
| insert into ijk values (1, 3, NULL); |
| insert into ijk values (1, 3, NULL); |
| insert into ijk values (1, NULL, NULL); |
| insert into ijk values (1, NULL, NULL); |
| -- should fail. |
| create unique index ijk_i on ijk(i); |
| ERROR: could not create unique index "ijk_i" |
| DETAIL: Key (i)=(1) is duplicated. |
| create unique index ijk_ij on ijk(i,j); |
| ERROR: could not create unique index "ijk_ij" |
| DETAIL: Key (i, j)=(1, 3) is duplicated. |
| -- should OK. |
| create unique index ijk_ijk on ijk(i,j,k); |
| drop table if exists ijk; |
| create table ijk(i int, j int, k int) distributed by (i); |
| insert into ijk values (1, 1, 3); |
| insert into ijk values (1, 2, 4); |
| insert into ijk values (1, 3, NULL); |
| insert into ijk values (1, 3, NULL); |
| insert into ijk values (1, NULL, NULL); |
| insert into ijk values (1, NULL, NULL); |
| -- should fail. |
| create unique index ijk_i on ijk(i); |
| ERROR: could not create unique index "ijk_i" |
| DETAIL: Key (i)=(1) is duplicated. |
| create unique index ijk_ij on ijk(i,j); |
| ERROR: could not create unique index "ijk_ij" |
| DETAIL: Key (i, j)=(1, 3) is duplicated. |
| -- should OK. |
| create unique index ijk_ijk on ijk(i,j,k); |
| drop table ijk; |
| -- |
| -- test bitmaps with NULL and non-NULL values (MPP-8461) |
| -- |
| create table bmap_test (x int, y int, z int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into bmap_test values (1,NULL,NULL); |
| insert into bmap_test values (NULL,1,NULL); |
| insert into bmap_test values (NULL,NULL,1); |
| insert into bmap_test values (1,NULL,NULL); |
| insert into bmap_test values (NULL,1,NULL); |
| insert into bmap_test values (NULL,NULL,1); |
| insert into bmap_test values (1,NULL,5); |
| insert into bmap_test values (NULL,1,NULL); |
| insert into bmap_test values (NULL,NULL,1); |
| insert into bmap_test select a from generate_series(1,10*1000) as s(a); |
| create index bmap_test_idx_1 on bmap_test using bitmap (x,y,z); |
| analyze bmap_test; |
| select * from bmap_test where x = 1 order by x,y,z; |
| x | y | z |
| ---+---+--- |
| 1 | | 5 |
| 1 | | |
| 1 | | |
| 1 | | |
| (4 rows) |
| |
| drop table bmap_test; |
| -- |
| -- Test over-sized values |
| -- |
| create table oversize_test (c1 text); |
| CREATE INDEX oversize_test_idx ON oversize_test USING BITMAP (c1); |
| insert into oversize_test values ('a'); |
| select * from oversize_test; |
| c1 |
| ---- |
| a |
| (1 row) |
| |
| -- this fails, because the value is too large |
| insert into oversize_test values (array_to_string(array(select generate_series(1, 10000)), '123456789')); |
| ERROR: row is too big: size 33256, maximum size 32736 (seg2 127.0.0.1:40002 pid=5270) |
| set enable_seqscan=off; |
| select * from oversize_test where c1 < 'z'; |
| c1 |
| ---- |
| a |
| (1 row) |
| |
| -- Drop the index, insert the row, and then try creating the index again. This is essentially |
| -- the same test, but now the failure happens during CREATE INDEX rather than INSERT. |
| drop index oversize_test_idx; |
| insert into oversize_test values (array_to_string(array(select generate_series(1, 10000)), '123456789')); |
| CREATE INDEX oversize_test_idx ON oversize_test USING BITMAP (c1); |
| ERROR: row is too big: size 33256, maximum size 32736 (seg2 172.17.0.2:25434 pid=391078) |
| -- |
| -- Test Index Only Scans. |
| -- |
| -- Bitmap indexes don't really support index only scans at the moment. But the |
| -- planner can still choose an Index Only Scan, if the query doesn't need any |
| -- of the attributes from the index. |
| create table bm_indexonly_test (i int, t text); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into bm_indexonly_test select g, 'foo' || g from generate_series(1, 5) g; |
| create index on bm_indexonly_test using bitmap (i, t); |
| set enable_seqscan=off; |
| set enable_bitmapscan=off; |
| set enable_indexscan=on; |
| set enable_indexonlyscan=on; |
| -- if bitmap indexes supported Index Only Scans, properly, this could use one. |
| explain (costs off) select i, t from bm_indexonly_test where i = 1; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 1:1 (slice1; segments: 1) |
| -> Seq Scan on bm_indexonly_test |
| Filter: (i = 1) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (4 rows) |
| |
| select i, t from bm_indexonly_test where i = 1; |
| i | t |
| ---+------ |
| 1 | foo1 |
| (1 row) |
| |
| -- even without proper support, the planner chooses an Index Only Scan for this. |
| explain (costs off) select 'foobar' from bm_indexonly_test; |
| QUERY PLAN |
| ------------------------------------------------ |
| Result |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on bm_indexonly_test |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (4 rows) |
| |
| select 'foobar' from bm_indexonly_test; |
| ?column? |
| ---------- |
| foobar |
| foobar |
| foobar |
| foobar |
| foobar |
| (5 rows) |
| |
| -- |
| -- Test unlogged table |
| -- |
| set enable_seqscan=off; |
| set enable_indexscan=on; |
| set optimizer_enable_bitmapscan=on; |
| create unlogged table unlogged_test(c1 int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into unlogged_test select * from generate_series(1,1000); |
| CREATE INDEX unlogged_test_idx ON unlogged_test USING BITMAP (c1); |
| analyze unlogged_test; |
| explain select * from unlogged_test where c1 = 100; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------- |
| Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..204.38 rows=1 width=4) |
| -> Bitmap Heap Scan on unlogged_test (cost=0.00..204.38 rows=1 width=4) |
| Recheck Cond: c1 = 100 |
| -> Bitmap Index Scan on unlogged_test_idx (cost=0.00..0.00 rows=0 width=0) |
| Index Cond: c1 = 100 |
| Optimizer: Pivotal Optimizer (GPORCA) version 2.70.0 |
| (6 rows) |
| |
| select * from unlogged_test where c1 = 100; |
| c1 |
| ----- |
| 100 |
| (1 row) |
| |
| drop table unlogged_test; |
| -- |
| -- Test crash recovery |
| -- |
| -- |
| -- disable fault-tolerance service (FTS) probing to ensure |
| -- the mirror does not accidentally get promoted |
| -- |
| SELECT gp_inject_fault_infinite('fts_probe', 'skip', dbid) FROM gp_segment_configuration WHERE role = 'p' and content = -1; |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| |
| CREATE TABLE bm_test_insert(a int) DISTRIBUTED BY (a); |
| CREATE INDEX bm_a_idx ON bm_test_insert USING bitmap(a); |
| CREATE TABLE bm_test_update(a int, b int) DISTRIBUTED BY (a); |
| CREATE INDEX bm_b_idx ON bm_test_update USING bitmap(b); |
| INSERT INTO bm_test_update SELECT i,i FROM generate_series (1, 10000) i; |
| -- flush the data to disk |
| CHECKPOINT; |
| -- skip all further checkpoint |
| SELECT gp_inject_fault_infinite('checkpoint', 'skip', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content > -1; |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| Success: |
| Success: |
| (3 rows) |
| |
| INSERT INTO bm_test_insert SELECT generate_series (1, 10000); |
| UPDATE bm_test_update SET b=b+1; |
| -- trigger recovery on primaries |
| SELECT gp_inject_fault_infinite('finish_prepared_after_record_commit_prepared', 'panic', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content > -1; |
| HINT: Inject an infinite 'skip' into the 'fts_probe' fault to disable FTS probing. |
| WARNING: consider disabling FTS probes while injecting a panic. |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| Success: |
| Success: |
| (3 rows) |
| |
| SET client_min_messages='ERROR'; |
| CREATE TABLE trigger_recovery_on_primaries(c int); |
| RESET client_min_messages; |
| -- reconnect to the database after restart |
| \c |
| SELECT gp_inject_fault('checkpoint', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content > -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| Success: |
| Success: |
| (3 rows) |
| |
| SELECT gp_inject_fault('finish_prepared_after_record_commit_prepared', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content > -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| Success: |
| Success: |
| (3 rows) |
| |
| SET enable_seqscan=off; |
| SET enable_indexscan=off; |
| SELECT * FROM bm_test_insert WHERE a=1; |
| a |
| --- |
| 1 |
| (1 row) |
| |
| SELECT * FROM bm_test_update WHERE b=1; |
| a | b |
| ---+--- |
| (0 rows) |
| |
| DROP TABLE trigger_recovery_on_primaries; |
| DROP TABLE bm_test_insert; |
| DROP TABLE bm_test_update; |
| -- |
| -- re-enable fault-tolerance service (FTS) probing after recovery completed. |
| -- |
| SELECT gp_inject_fault('fts_probe', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- If the table is AO table, it need generate some fake tuple pointer, |
| -- this pointer is a little different from the heap tables pointer, |
| -- If the Offset in pointer is 0(If the row number is 32768, the Offset |
| -- should be 0), we set the 16th bit of the Offsert to be 1, so we |
| -- do not forget to remove the flag when we use it, otherwise we will |
| -- get an wrong value. |
| CREATE TABLE bm_test_reindex(c1 int, c2 int) WITH (appendonly=true); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE INDEX bm_test_reindex_idx ON bm_test_reindex USING bitmap(c2); |
| INSERT INTO bm_test_reindex SELECT 1,i FROM generate_series(1, 65537)i; |
| REINDEX INDEX bm_test_reindex_idx; |
| SET enable_bitmapscan to on; |
| SET enable_seqscan to off; |
| SELECT * from bm_test_reindex where c2 = 32767; |
| c1 | c2 |
| ----+------- |
| 1 | 32767 |
| (1 row) |
| |
| SELECT * from bm_test_reindex where c2 = 32768; |
| c1 | c2 |
| ----+------- |
| 1 | 32768 |
| (1 row) |
| |
| SELECT * from bm_test_reindex where c2 = 32769; |
| c1 | c2 |
| ----+------- |
| 1 | 32769 |
| (1 row) |
| |
| SELECT * from bm_test_reindex where c2 = 65536; |
| c1 | c2 |
| ----+------- |
| 1 | 65536 |
| (1 row) |
| |
| SET enable_seqscan = ON; |
| SET enable_indexscan = ON; |
| -- |
| -- correct cost estimate to avoid bm index scan for wrong result |
| -- |
| CREATE TABLE test_bmselec(id int, type int, msg text) distributed by (id); |
| INSERT INTO test_bmselec (id, type, msg) SELECT g, g % 10000, md5(g::text) FROM generate_series(1,100000) as g; |
| CREATE INDEX ON test_bmselec USING bitmap(type); |
| ANALYZE test_bmselec; |
| -- it used to choose bitmap index over seq scan, which not right. |
| explain (analyze, verbose) select * from test_bmselec where type < 500; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..434.28 rows=4943 width=41) (actual time=0.435..7.910 rows=5000 loops=1) |
| Output: id, type, msg |
| -> Seq Scan on public.test_bmselec (cost=0.00..433.52 rows=1648 width=41) (actual time=0.030..5.213 rows=1693 loops=1) |
| Output: id, type, msg |
| Filter: (test_bmselec.type < 500) |
| Rows Removed by Filter: 31769 |
| Planning Time: 49.134 ms |
| (slice0) Executor memory: 36K bytes. |
| (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). |
| Memory used: 128000kB |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_bitmapscan=on, enable_indexscan=on, enable_seqscan=on |
| Execution Time: 8.972 ms |
| (13 rows) |
| |
| SET enable_seqscan = OFF; |
| SET enable_bitmapscan = OFF; |
| -- we can see the bitmap index scan is much more slower |
| explain (analyze, verbose) select * from test_bmselec where type < 500; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..434.28 rows=4943 width=41) (actual time=0.439..8.396 rows=5000 loops=1) |
| Output: id, type, msg |
| -> Seq Scan on public.test_bmselec (cost=0.00..433.52 rows=1648 width=41) (actual time=0.032..6.750 rows=1693 loops=1) |
| Output: id, type, msg |
| Filter: (test_bmselec.type < 500) |
| Rows Removed by Filter: 31769 |
| Planning Time: 4.239 ms |
| (slice0) Executor memory: 36K bytes. |
| (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). |
| Memory used: 128000kB |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_bitmapscan=off, enable_indexscan=on, enable_seqscan=off |
| Execution Time: 9.222 ms |
| (13 rows) |
| |
| DROP TABLE test_bmselec; |
| SET enable_seqscan = ON; |
| SET enable_bitmapscan = ON; |
| -- for sparse bitmap index |
| create table test_bmsparse(id int, type int, msg text) distributed by (id); |
| INSERT INTO test_bmsparse (id, type, msg) SELECT g, g % 10000, md5(g::text) FROM generate_series(1,10000) as g; |
| INSERT INTO test_bmsparse (id, type, msg) SELECT g, g % 200, md5(g::text) FROM generate_series(1,80000) as g; |
| INSERT INTO test_bmsparse (id, type, msg) SELECT g, g % 10000, md5(g::text) FROM generate_series(1,10000) as g; |
| CREATE INDEX ON test_bmsparse USING bitmap(type); |
| ANALYZE test_bmsparse; |
| -- select lots of rows but on small part of distinct values, should use seq scan |
| explain (analyze, verbose) select * from test_bmsparse where type < 200; |
| QUERY PLAN |
| ---------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..447.65 rows=79956 width=41) (actual time=0.888..24.753 rows=80400 loops=1) |
| Output: id, type, msg |
| -> Seq Scan on public.test_bmsparse (cost=0.00..435.43 rows=26652 width=41) (actual time=0.037..7.395 rows=26975 loops=1) |
| Output: id, type, msg |
| Filter: (test_bmsparse.type < 200) |
| Rows Removed by Filter: 6596 |
| Planning Time: 161.423 ms |
| (slice0) Executor memory: 36K bytes. |
| (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). |
| Memory used: 128000kB |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_bitmapscan=on, enable_indexscan=on, enable_seqscan=on |
| Execution Time: 29.185 ms |
| (13 rows) |
| |
| SET enable_seqscan = OFF; |
| SET enable_bitmapscan = OFF; |
| explain (analyze, verbose) select * from test_bmsparse where type < 200; |
| QUERY PLAN |
| ---------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..447.65 rows=79956 width=41) (actual time=0.866..24.050 rows=80400 loops=1) |
| Output: id, type, msg |
| -> Seq Scan on public.test_bmsparse (cost=0.00..435.43 rows=26652 width=41) (actual time=0.029..7.666 rows=26975 loops=1) |
| Output: id, type, msg |
| Filter: (test_bmsparse.type < 200) |
| Rows Removed by Filter: 6596 |
| Planning Time: 5.059 ms |
| (slice0) Executor memory: 36K bytes. |
| (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). |
| Memory used: 128000kB |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_bitmapscan=off, enable_indexscan=on, enable_seqscan=off |
| Execution Time: 28.480 ms |
| (13 rows) |
| |
| SET enable_seqscan = ON; |
| SET enable_bitmapscan = ON; |
| -- select small part of table but on lots of distinct values, should use seq scan |
| explain (analyze, verbose) select * from test_bmsparse where type > 500; |
| QUERY PLAN |
| ---------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..436.80 rows=19101 width=41) (actual time=0.391..12.640 rows=18998 loops=1) |
| Output: id, type, msg |
| -> Seq Scan on public.test_bmsparse (cost=0.00..433.88 rows=6367 width=41) (actual time=0.049..5.123 rows=6448 loops=1) |
| Output: id, type, msg |
| Filter: (test_bmsparse.type > 500) |
| Rows Removed by Filter: 26979 |
| Planning Time: 5.442 ms |
| (slice0) Executor memory: 36K bytes. |
| (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). |
| Memory used: 128000kB |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_bitmapscan=on, enable_indexscan=on, enable_seqscan=on |
| Execution Time: 14.206 ms |
| (13 rows) |
| |
| SET enable_seqscan = OFF; |
| SET enable_bitmapscan = OFF; |
| explain (analyze, verbose) select * from test_bmsparse where type > 500; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..436.80 rows=19101 width=41) (actual time=0.352..9.098 rows=18998 loops=1) |
| Output: id, type, msg |
| -> Seq Scan on public.test_bmsparse (cost=0.00..433.88 rows=6367 width=41) (actual time=0.049..5.299 rows=6448 loops=1) |
| Output: id, type, msg |
| Filter: (test_bmsparse.type > 500) |
| Rows Removed by Filter: 26979 |
| Planning Time: 5.703 ms |
| (slice0) Executor memory: 36K bytes. |
| (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). |
| Memory used: 128000kB |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_bitmapscan=off, enable_indexscan=on, enable_seqscan=off |
| Execution Time: 10.390 ms |
| (13 rows) |
| |
| DROP TABLE test_bmsparse; |
| -- test bitmap index scan when using NULL array-condition as index key |
| create table foo(a int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create index foo_i on foo using bitmap(a); |
| explain (verbose on, costs off) select * from foo where a = any(null::int[]); |
| QUERY PLAN |
| ----------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: a |
| -> Bitmap Heap Scan on public.foo |
| Output: a |
| Recheck Cond: (foo.a = ANY (NULL::integer[])) |
| -> Bitmap Index Scan on foo_i |
| Index Cond: (foo.a = ANY (NULL::integer[])) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (8 rows) |
| |
| select * from foo where a = any(null::int[]); |
| a |
| --- |
| (0 rows) |
| |
| insert into foo values(1); |
| select * from foo where a = 1 and a = any(null::int[]); |
| a |
| --- |
| (0 rows) |
| |
| select * from foo where a = 1 or a = any(null::int[]); |
| a |
| --- |
| 1 |
| (1 row) |
| |
| drop table foo; |
| -- test for compressed bitmap index ; see https://github.com/apache/cloudberry/pull/679 |
| SET enable_seqscan = OFF; |
| SET enable_indexscan = ON; |
| SET enable_bitmapscan = OFF; |
| create table bm_test_ao (i int, j int, k int) WITH (appendonly=true) distributed by (k) ; |
| create index bm_test_ao_i_idx on bm_test_ao using bitmap(i); |
| insert into bm_test_ao select i, 1, 1 from |
| generate_series(1, 65535) g, generate_series(1, 4) i; |
| explain select count(*) from bm_test_ao where i =2; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------- |
| Aggregate (cost=0.00..391.30 rows=1 width=8) |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) |
| -> Bitmap Heap Scan on bm_test_ao (cost=0.00..391.30 rows=1 width=1) |
| Recheck Cond: (i = 2) |
| -> Bitmap Index Scan on bm_test_ao_i_idx (cost=0.00..0.00 rows=0 width=0) |
| Index Cond: (i = 2) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (7 rows) |
| |
| select count(*) from bm_test_ao where i = 2; |
| count |
| ------- |
| 65535 |
| (1 row) |
| |
| DROP TABLE bm_test_ao; |
| -- |
| -- test union bitmap batch words for multivalues index scan like where x in (x1, x2) or x > v |
| -- which creates bitmapand plan on two bitmap indexs that match multiple keys by using in in where clause |
| -- |
| create table bmunion (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into bmunion |
| select (r%53), (r%59) |
| from generate_series(1,70000) r; |
| create index bmu_i_bmtest2_a on bmunion using bitmap(a); |
| create index bmu_i_bmtest2_b on bmunion using bitmap(b); |
| insert into bmunion select 53, 1 from generate_series(1, 1000); |
| analyze bmunion; |
| set optimizer_enable_tablescan=off; |
| set optimizer_enable_dynamictablescan=off; |
| -- inject fault for planner so that it could produce bitmapand plan node. |
| select gp_inject_fault('simulate_bitmap_and', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| explain (costs off) select count(*) from bmunion where a = 53 and b < 3; |
| QUERY PLAN |
| -------------------------------------------------------------------- |
| Finalize Aggregate |
| -> Gather Motion 1:1 (slice1; segments: 1) |
| -> Partial Aggregate |
| -> Bitmap Heap Scan on bmunion |
| Recheck Cond: ((a = 53) AND (b < 3)) |
| -> BitmapAnd |
| -> Bitmap Index Scan on bmu_i_bmtest2_a |
| Index Cond: (a = 53) |
| -> Bitmap Index Scan on bmu_i_bmtest2_b |
| Index Cond: (b < 3) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (11 rows) |
| |
| select gp_inject_fault('simulate_bitmap_and', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| select gp_inject_fault('simulate_bitmap_and', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| select count(*) from bmunion where a = 53 and b < 3; |
| count |
| ------- |
| 1000 |
| (1 row) |
| |
| select gp_inject_fault('simulate_bitmap_and', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| reset optimizer_enable_tablescan; |
| reset optimizer_enable_dynamictablescan; |
| drop table bmunion; |
| -- test create bitmap index and there have HOT chains. |
| drop table if exists bm_test; |
| NOTICE: table "bm_test" does not exist, skipping |
| create table bm_test(a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| -- insert some data into a one segment |
| insert into bm_test values (1, 1); |
| insert into bm_test values (1, 2); |
| insert into bm_test values (1, 3); |
| insert into bm_test values (12, 1); |
| -- update the first tuple using HOT, since this page |
| -- just have 4 tuples, there have full free space to |
| -- use HOT update. |
| update bm_test set b = 1 where a = 1 and b = 1; |
| -- After the update, the tids that the value of b is equal to 1 |
| -- we scanned will not be in order, due to HOT. |
| create index idx_bm_test on bm_test using bitmap(b); |
| select * from bm_test where b = 1; |
| a | b |
| ----+--- |
| 1 | 1 |
| 12 | 1 |
| (2 rows) |
| |
| -- clean up |
| drop table bm_test; |
| -- test the scenario that we need read the same batch words many times |
| -- more detials can be found at https://github.com/greenplum-db/gpdb/issues/13446 |
| SET enable_seqscan = OFF; |
| SET enable_bitmapscan = OFF; |
| create table foo_13446(a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create index idx_13446 on foo_13446 using bitmap(b); |
| insert into foo_13446 select 1, 1 from generate_series(0, 16384); |
| -- At current implementation, BMIterateResult can only store 16*1024=16384 TIDs, |
| -- if we have 13685 TIDs to read, it must scan same batch words twice, that's what we want |
| select count(*) from foo_13446 where b = 1; |
| count |
| ------- |
| 16385 |
| (1 row) |
| |
| drop table foo_13446; |
| SET enable_seqscan = ON; |
| SET enable_bitmapscan = ON; |