| -- |
| -- Concurrent scan on bitmap index when there's insert running in the backend |
| -- may cause the bitmap scan read wrong tid. |
| -- If a LOV has multiple bitmap pages, and the index insert tries to insert a tid |
| -- into a compressed word on a full bitmap page(Let's call the page `PAGE_FULL`). |
| -- Then it'll try to find free space on next bitmap page(Let's call the page `PAGE_NEXT`) |
| -- and rearrange the words and copy extra words into the next bitmap page. |
| -- So when the above insertion happens, imagine below case: |
| -- 1. Query on bitmap: A query starts and reads all bitmap pages to `PAGE_FULL`, increase |
| -- next tid to fetch, release lock after reading each page. |
| -- 2. Concurrent insert: insert a tid into `PAGE_FULL` cause expand compressed words to |
| -- new words, and rearrange words into `PAGE_NEXT`. |
| -- 3. Query on bitmap: fetch `PAGE_NEXT` and expect the first tid in it should equal the |
| -- saved next tid. But actually `PAGE_NEXT` now contains words used to belong in `PAGE_FULL`. |
| -- This causes the real next tid less than the expected next tid. But our scan keeps increasing |
| -- the wrong tid. And then this leads to a wrong result. |
| -- This related to issue: https://github.com/greenplum-db/gpdb/issues/11308 |
| -- |
| |
| -- Here we use append optimized table to speed up create full bitmap pages |
| -- since each transaction use different seg file number. And ao table's AOTupleId |
| -- is composed of (seg file number, row number). So this will lead to lots of |
| -- compressed words in the first bitmap page. |
| -- With the below transacions in each session, on seg0, the bitmap for id=97 |
| -- will generate two bitmap pages, and the first page is a full page. |
| -- Use heap table, delete tuples and then vacuum should be the same. But it needs huge tuples. |
| CREATE TABLE bmupdate (id int) with(appendonly = true) DISTRIBUTED BY (id); |
| CREATE |
| |
| 1: begin; |
| BEGIN |
| 2: begin; |
| BEGIN |
| 3: begin; |
| BEGIN |
| 4: begin; |
| BEGIN |
| 5: begin; |
| BEGIN |
| 6: begin; |
| BEGIN |
| 7: begin; |
| BEGIN |
| 8: begin; |
| BEGIN |
| 9: begin; |
| BEGIN |
| 10: begin; |
| BEGIN |
| 11: begin; |
| BEGIN |
| 12: begin; |
| BEGIN |
| 13: begin; |
| BEGIN |
| 14: begin; |
| BEGIN |
| 15: begin; |
| BEGIN |
| 16: begin; |
| BEGIN |
| 17: begin; |
| BEGIN |
| 18: begin; |
| BEGIN |
| 19: begin; |
| BEGIN |
| 20: begin; |
| BEGIN |
| 21: begin; |
| BEGIN |
| 22: begin; |
| BEGIN |
| |
| 1: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 2: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 3: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 4: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 5: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 6: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 7: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 8: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 9: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 10: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 11: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 12: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 13: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 14: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 15: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 16: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 17: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 18: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 19: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 20: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 21: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| 22: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i; |
| INSERT 1000000 |
| |
| 1: commit; |
| COMMIT |
| 2: commit; |
| COMMIT |
| 3: commit; |
| COMMIT |
| 4: commit; |
| COMMIT |
| 5: commit; |
| COMMIT |
| 6: commit; |
| COMMIT |
| 7: commit; |
| COMMIT |
| 8: commit; |
| COMMIT |
| 9: commit; |
| COMMIT |
| 10: commit; |
| COMMIT |
| 11: commit; |
| COMMIT |
| 12: commit; |
| COMMIT |
| 13: commit; |
| COMMIT |
| 14: commit; |
| COMMIT |
| 15: commit; |
| COMMIT |
| 16: commit; |
| COMMIT |
| 17: commit; |
| COMMIT |
| 18: commit; |
| COMMIT |
| 19: commit; |
| COMMIT |
| 20: commit; |
| COMMIT |
| 21: commit; |
| COMMIT |
| 22: commit; |
| COMMIT |
| |
| -- Let's check the total tuple count with id=97 without bitmap index. |
| SELECT count(*) FROM bmupdate WHERE id = 97; |
| count |
| ------- |
| 2200 |
| (1 row) |
| |
| CREATE INDEX idx_bmupdate__id ON bmupdate USING bitmap (id); |
| CREATE |
| |
| -- |
| -- Test 1, run Bitmap Heap Scan on the bitmap index when there's |
| -- backend insert running. |
| -- |
| -- Inject fault after read the first bitmap page when query the table. |
| SELECT gp_inject_fault_infinite('after_read_one_bitmap_idx_page', 'suspend', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| |
| -- Inject fault when insert new tid cause rearrange words from current |
| -- bitmap page to next bitmap page. |
| SELECT gp_inject_fault_infinite('rearrange_word_to_next_bitmap_page', 'skip', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| |
| 1: set optimizer = off; |
| SET |
| 1: set enable_seqscan=off; |
| SET |
| -- Should generate Bitmap Heap Scan on the bitmap index. |
| 1: EXPLAIN (COSTS OFF) SELECT * FROM bmupdate WHERE id = 97; |
| QUERY PLAN |
| --------------------------------------------------- |
| Gather Motion 1:1 (slice1; segments: 1) |
| -> Bitmap Heap Scan on bmupdate |
| Recheck Cond: (id = 97) |
| -> Bitmap Index Scan on idx_bmupdate__id |
| Index Cond: (id = 97) |
| Optimizer: Postgres query optimizer |
| (6 rows) |
| -- Query should suspend on the first fault injection which finish read the first bitmap page. |
| 1&: SELECT count(*) FROM bmupdate WHERE id = 97; <waiting ...> |
| |
| -- Insert will insert new tid in the first bitmap page and cause the word expand |
| -- and rearrange exceed words to next bitmap page. |
| -- The reason it not insert at the end of bitmap LOV is because right now only one |
| -- transaction doing the insert, and it'll insert to small seg file number. |
| 2: INSERT INTO bmupdate VALUES (97); |
| INSERT 1 |
| |
| -- Query should read the first page(buffer lock released), and then INSERT insert to |
| -- the first page which will trigger rearrange words. |
| SELECT gp_wait_until_triggered_fault('rearrange_word_to_next_bitmap_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_wait_until_triggered_fault |
| ------------------------------- |
| Success: |
| (1 row) |
| SELECT gp_inject_fault('rearrange_word_to_next_bitmap_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- Insert triggered rearrange |
| SELECT gp_wait_until_triggered_fault('after_read_one_bitmap_idx_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_wait_until_triggered_fault |
| ------------------------------- |
| Success: |
| (1 row) |
| SELECT gp_inject_fault('after_read_one_bitmap_idx_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- Should return the correct tuple count with id=97. It used to raise assertion failure for |
| -- AO tables. This is because the wrong tid transform to an invalud AOTupleId. |
| 1<: <... completed> |
| count |
| ------- |
| 2200 |
| (1 row) |
| |
| -- Let's check the total tuple count after the test. |
| SELECT count(*) FROM bmupdate WHERE id = 97; |
| count |
| ------- |
| 2201 |
| (1 row) |
| |
| -- |
| -- Test 2, run Index Scan on the bitmap index when there's backend |
| -- insert running. |
| -- |
| -- Inject fault after read the first bitmap page when query the table. |
| SELECT gp_inject_fault_infinite('after_read_one_bitmap_idx_page', 'suspend', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| |
| -- Inject fault when insert new tid cause rearrange words from current |
| -- bitmap page to next bitmap page. |
| SELECT gp_inject_fault_infinite('rearrange_word_to_next_bitmap_page', 'skip', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| |
| 1: set enable_bitmapscan=off; |
| SET |
| -- Should generate Index Scan on the bitmap index. |
| 1: EXPLAIN (COSTS OFF) SELECT * FROM bmupdate WHERE id = 97; |
| QUERY PLAN |
| ----------------------------------------------------- |
| Gather Motion 1:1 (slice1; segments: 1) |
| -> Index Scan using idx_bmupdate__id on bmupdate |
| Index Cond: (id = 97) |
| Optimizer: Postgres query optimizer |
| (4 rows) |
| -- Query should suspend on the first fault injection which finish read the first bitmap page. |
| 1&: SELECT count(*) FROM bmupdate WHERE id = 97; <waiting ...> |
| |
| -- Insert will insert new tid in the first bitmap page and cause the word expand |
| -- and rearrange exceed words to next bitmap page. |
| -- The reason it not insert at the end of bitmap LOV is because right now only one |
| -- transaction doing the insert, and it'll insert to small seg file number. |
| 2: INSERT INTO bmupdate VALUES (97); |
| INSERT 1 |
| |
| -- Query should read the first page(buffer lock released), and then INSERT insert to |
| -- the first page which will trigger rearrange words. |
| SELECT gp_wait_until_triggered_fault('rearrange_word_to_next_bitmap_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_wait_until_triggered_fault |
| ------------------------------- |
| Success: |
| (1 row) |
| SELECT gp_inject_fault('rearrange_word_to_next_bitmap_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- Insert triggered rearrange |
| SELECT gp_wait_until_triggered_fault('after_read_one_bitmap_idx_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_wait_until_triggered_fault |
| ------------------------------- |
| Success: |
| (1 row) |
| SELECT gp_inject_fault('after_read_one_bitmap_idx_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- Should return the correct tuple count with id=97. It used to raise assertion failure for |
| -- AO tables. This is because the wrong tid transform to an invalud AOTupleId. |
| 1<: <... completed> |
| count |
| ------- |
| 2201 |
| (1 row) |
| |
| -- |
| -- Test 3, run Index Scan on the bitmap index that match multiple keys when there's backend |
| -- insert running. |
| -- |
| -- Let's check the total tuple count before the test. |
| SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0; |
| count |
| ------- |
| 4402 |
| (1 row) |
| |
| -- Inject fault after read the first bitmap page when query the table. |
| SELECT gp_inject_fault_infinite('after_read_one_bitmap_idx_page', 'suspend', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| |
| -- Inject fault when insert new tid cause rearrange words from current |
| -- bitmap page to next bitmap page. |
| SELECT gp_inject_fault_infinite('rearrange_word_to_next_bitmap_page', 'skip', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| |
| -- Should generate Index Scan on the bitmap index that match multiple keys. |
| 1: EXPLAIN (COSTS OFF) SELECT * FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0; |
| QUERY PLAN |
| ----------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Index Scan using idx_bmupdate__id on bmupdate |
| Index Cond: ((id >= 97) AND (id <= 99)) |
| Filter: (gp_segment_id = 0) |
| Optimizer: Postgres query optimizer |
| (5 rows) |
| -- Query should suspend on the first fault injection which finish read the first bitmap page. |
| 1&: SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0; <waiting ...> |
| |
| -- Insert will insert new tid in the first bitmap page and cause the word expand |
| -- and rearrange exceed words to next bitmap page. |
| -- The reason it not insert at the end of bitmap LOV is because right now only one |
| -- transaction doing the insert, and it'll insert to small seg file number. |
| -- Here insert both values to make sure update on full bitmap happens for one LOV. |
| 2: INSERT INTO bmupdate VALUES (97); |
| INSERT 1 |
| 2: INSERT INTO bmupdate VALUES (99); |
| INSERT 1 |
| |
| -- Query should read the first page(buffer lock released), and then INSERT insert to |
| -- the first page which will trigger rearrange words. |
| SELECT gp_wait_until_triggered_fault('rearrange_word_to_next_bitmap_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_wait_until_triggered_fault |
| ------------------------------- |
| Success: |
| (1 row) |
| SELECT gp_inject_fault('rearrange_word_to_next_bitmap_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- Insert triggered rearrange |
| SELECT gp_wait_until_triggered_fault('after_read_one_bitmap_idx_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_wait_until_triggered_fault |
| ------------------------------- |
| Success: |
| (1 row) |
| SELECT gp_inject_fault('after_read_one_bitmap_idx_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- Should return the correct tuple count with id=97. It used to raise assertion failure for |
| -- AO tables. This is because the wrong tid transform to an invalud AOTupleId. |
| 1<: <... completed> |
| count |
| ------- |
| 4402 |
| (1 row) |
| |
| -- Let's check the total tuple count after the test. |
| SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0; |
| count |
| ------- |
| 4404 |
| (1 row) |
| |
| -- |
| -- Test 4, run Bitmap Heap Scan on the bitmap index that match multiple keys when there's backend |
| -- insert running. |
| -- |
| |
| -- Inject fault after read the first bitmap page when query the table. |
| SELECT gp_inject_fault_infinite('after_read_one_bitmap_idx_page', 'suspend', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| |
| -- Inject fault when insert new tid cause rearrange words from current |
| -- bitmap page to next bitmap page. |
| SELECT gp_inject_fault_infinite('rearrange_word_to_next_bitmap_page', 'skip', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| |
| 1: set enable_bitmapscan=on; |
| SET |
| -- Should generate Bitmap HEAP Scan on the bitmap index that match multiple keys. |
| 1: EXPLAIN (COSTS OFF) SELECT * FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0; |
| QUERY PLAN |
| ------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Bitmap Heap Scan on bmupdate |
| Recheck Cond: ((id >= 97) AND (id <= 99)) |
| Filter: (gp_segment_id = 0) |
| -> Bitmap Index Scan on idx_bmupdate__id |
| Index Cond: ((id >= 97) AND (id <= 99)) |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| -- Query should suspend on the first fault injection which finish read the first bitmap page. |
| 1&: SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0; <waiting ...> |
| |
| -- Insert will insert new tid in the first bitmap page and cause the word expand |
| -- and rearrange exceed words to next bitmap page. |
| -- The reason it not insert at the end of bitmap LOV is because right now only one |
| -- transaction doing the insert, and it'll insert to small seg file number. |
| -- Here insert both values to make sure update on full bitmap happens for one LOV. |
| 2: INSERT INTO bmupdate SELECT 97 FROM generate_series(1, 1000); |
| INSERT 1000 |
| 2: INSERT INTO bmupdate SELECT 99 FROM generate_series(1, 1000); |
| INSERT 1000 |
| |
| -- Query should read the first page(buffer lock released), and then INSERT insert to |
| -- the first page which will trigger rearrange words. |
| SELECT gp_wait_until_triggered_fault('rearrange_word_to_next_bitmap_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_wait_until_triggered_fault |
| ------------------------------- |
| Success: |
| (1 row) |
| SELECT gp_inject_fault('rearrange_word_to_next_bitmap_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- Insert triggered rearrange |
| SELECT gp_wait_until_triggered_fault('after_read_one_bitmap_idx_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_wait_until_triggered_fault |
| ------------------------------- |
| Success: |
| (1 row) |
| SELECT gp_inject_fault('after_read_one_bitmap_idx_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- Should return the correct tuple count with id=97. It used to raise assertion failure for |
| -- AO tables. This is because the wrong tid transform to an invalud AOTupleId. |
| 1<: <... completed> |
| count |
| ------- |
| 4404 |
| (1 row) |
| |
| -- Let's check the total tuple count after the test. |
| SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0; |
| count |
| ------- |
| 6404 |
| (1 row) |
| |
| DROP TABLE bmupdate; |
| DROP |
| |