blob: 2a7d0e8a0e3ee0cffc979382f0427f8352959ee1 [file] [log] [blame]
--
-- Concurrent scan on bitmap index when there's insert running in the backend
-- may cause the bitmap scan read wrong tid.
-- If a LOV has multiple bitmap pages, and the index insert tries to insert a tid
-- into a compressed word on a full bitmap page(Let's call the page `PAGE_FULL`).
-- Then it'll try to find free space on next bitmap page(Let's call the page `PAGE_NEXT`)
-- and rearrange the words and copy extra words into the next bitmap page.
-- So when the above insertion happens, imagine below case:
-- 1. Query on bitmap: A query starts and reads all bitmap pages to `PAGE_FULL`, increase
-- next tid to fetch, release lock after reading each page.
-- 2. Concurrent insert: insert a tid into `PAGE_FULL` cause expand compressed words to
-- new words, and rearrange words into `PAGE_NEXT`.
-- 3. Query on bitmap: fetch `PAGE_NEXT` and expect the first tid in it should equal the
-- saved next tid. But actually `PAGE_NEXT` now contains words used to belong in `PAGE_FULL`.
-- This causes the real next tid less than the expected next tid. But our scan keeps increasing
-- the wrong tid. And then this leads to a wrong result.
-- This related to issue: https://github.com/greenplum-db/gpdb/issues/11308
--
-- Here we use append optimized table to speed up create full bitmap pages
-- since each transaction use different seg file number. And ao table's AOTupleId
-- is composed of (seg file number, row number). So this will lead to lots of
-- compressed words in the first bitmap page.
-- With the below transacions in each session, on seg0, the bitmap for id=97
-- will generate two bitmap pages, and the first page is a full page.
-- Use heap table, delete tuples and then vacuum should be the same. But it needs huge tuples.
CREATE TABLE bmupdate (id int) with(appendonly = true) DISTRIBUTED BY (id);
CREATE
1: begin;
BEGIN
2: begin;
BEGIN
3: begin;
BEGIN
4: begin;
BEGIN
5: begin;
BEGIN
6: begin;
BEGIN
7: begin;
BEGIN
8: begin;
BEGIN
9: begin;
BEGIN
10: begin;
BEGIN
11: begin;
BEGIN
12: begin;
BEGIN
13: begin;
BEGIN
14: begin;
BEGIN
15: begin;
BEGIN
16: begin;
BEGIN
17: begin;
BEGIN
18: begin;
BEGIN
19: begin;
BEGIN
20: begin;
BEGIN
21: begin;
BEGIN
22: begin;
BEGIN
1: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
2: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
3: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
4: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
5: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
6: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
7: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
8: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
9: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
10: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
11: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
12: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
13: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
14: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
15: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
16: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
17: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
18: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
19: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
20: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
21: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
22: INSERT INTO bmupdate SELECT i%10000 FROM generate_series(1, 1000000) AS i;
INSERT 1000000
1: commit;
COMMIT
2: commit;
COMMIT
3: commit;
COMMIT
4: commit;
COMMIT
5: commit;
COMMIT
6: commit;
COMMIT
7: commit;
COMMIT
8: commit;
COMMIT
9: commit;
COMMIT
10: commit;
COMMIT
11: commit;
COMMIT
12: commit;
COMMIT
13: commit;
COMMIT
14: commit;
COMMIT
15: commit;
COMMIT
16: commit;
COMMIT
17: commit;
COMMIT
18: commit;
COMMIT
19: commit;
COMMIT
20: commit;
COMMIT
21: commit;
COMMIT
22: commit;
COMMIT
-- Let's check the total tuple count with id=97 without bitmap index.
SELECT count(*) FROM bmupdate WHERE id = 97;
count
-------
2200
(1 row)
CREATE INDEX idx_bmupdate__id ON bmupdate USING bitmap (id);
CREATE
--
-- Test 1, run Bitmap Heap Scan on the bitmap index when there's
-- backend insert running.
--
-- Inject fault after read the first bitmap page when query the table.
SELECT gp_inject_fault_infinite('after_read_one_bitmap_idx_page', 'suspend', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
-- Inject fault when insert new tid cause rearrange words from current
-- bitmap page to next bitmap page.
SELECT gp_inject_fault_infinite('rearrange_word_to_next_bitmap_page', 'skip', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
1: set optimizer = off;
SET
1: set enable_seqscan=off;
SET
-- Should generate Bitmap Heap Scan on the bitmap index.
1: EXPLAIN (COSTS OFF) SELECT * FROM bmupdate WHERE id = 97;
QUERY PLAN
---------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
-> Bitmap Heap Scan on bmupdate
Recheck Cond: (id = 97)
-> Bitmap Index Scan on idx_bmupdate__id
Index Cond: (id = 97)
Optimizer: Postgres query optimizer
(6 rows)
-- Query should suspend on the first fault injection which finish read the first bitmap page.
1&: SELECT count(*) FROM bmupdate WHERE id = 97; <waiting ...>
-- Insert will insert new tid in the first bitmap page and cause the word expand
-- and rearrange exceed words to next bitmap page.
-- The reason it not insert at the end of bitmap LOV is because right now only one
-- transaction doing the insert, and it'll insert to small seg file number.
2: INSERT INTO bmupdate VALUES (97);
INSERT 1
-- Query should read the first page(buffer lock released), and then INSERT insert to
-- the first page which will trigger rearrange words.
SELECT gp_wait_until_triggered_fault('rearrange_word_to_next_bitmap_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
SELECT gp_inject_fault('rearrange_word_to_next_bitmap_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- Insert triggered rearrange
SELECT gp_wait_until_triggered_fault('after_read_one_bitmap_idx_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
SELECT gp_inject_fault('after_read_one_bitmap_idx_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- Should return the correct tuple count with id=97. It used to raise assertion failure for
-- AO tables. This is because the wrong tid transform to an invalud AOTupleId.
1<: <... completed>
count
-------
2200
(1 row)
-- Let's check the total tuple count after the test.
SELECT count(*) FROM bmupdate WHERE id = 97;
count
-------
2201
(1 row)
--
-- Test 2, run Index Scan on the bitmap index when there's backend
-- insert running.
--
-- Inject fault after read the first bitmap page when query the table.
SELECT gp_inject_fault_infinite('after_read_one_bitmap_idx_page', 'suspend', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
-- Inject fault when insert new tid cause rearrange words from current
-- bitmap page to next bitmap page.
SELECT gp_inject_fault_infinite('rearrange_word_to_next_bitmap_page', 'skip', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
1: set enable_bitmapscan=off;
SET
-- Should generate Index Scan on the bitmap index.
1: EXPLAIN (COSTS OFF) SELECT * FROM bmupdate WHERE id = 97;
QUERY PLAN
-----------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
-> Index Scan using idx_bmupdate__id on bmupdate
Index Cond: (id = 97)
Optimizer: Postgres query optimizer
(4 rows)
-- Query should suspend on the first fault injection which finish read the first bitmap page.
1&: SELECT count(*) FROM bmupdate WHERE id = 97; <waiting ...>
-- Insert will insert new tid in the first bitmap page and cause the word expand
-- and rearrange exceed words to next bitmap page.
-- The reason it not insert at the end of bitmap LOV is because right now only one
-- transaction doing the insert, and it'll insert to small seg file number.
2: INSERT INTO bmupdate VALUES (97);
INSERT 1
-- Query should read the first page(buffer lock released), and then INSERT insert to
-- the first page which will trigger rearrange words.
SELECT gp_wait_until_triggered_fault('rearrange_word_to_next_bitmap_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
SELECT gp_inject_fault('rearrange_word_to_next_bitmap_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- Insert triggered rearrange
SELECT gp_wait_until_triggered_fault('after_read_one_bitmap_idx_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
SELECT gp_inject_fault('after_read_one_bitmap_idx_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- Should return the correct tuple count with id=97. It used to raise assertion failure for
-- AO tables. This is because the wrong tid transform to an invalud AOTupleId.
1<: <... completed>
count
-------
2201
(1 row)
--
-- Test 3, run Index Scan on the bitmap index that match multiple keys when there's backend
-- insert running.
--
-- Let's check the total tuple count before the test.
SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0;
count
-------
4402
(1 row)
-- Inject fault after read the first bitmap page when query the table.
SELECT gp_inject_fault_infinite('after_read_one_bitmap_idx_page', 'suspend', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
-- Inject fault when insert new tid cause rearrange words from current
-- bitmap page to next bitmap page.
SELECT gp_inject_fault_infinite('rearrange_word_to_next_bitmap_page', 'skip', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
-- Should generate Index Scan on the bitmap index that match multiple keys.
1: EXPLAIN (COSTS OFF) SELECT * FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0;
QUERY PLAN
-----------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Index Scan using idx_bmupdate__id on bmupdate
Index Cond: ((id >= 97) AND (id <= 99))
Filter: (gp_segment_id = 0)
Optimizer: Postgres query optimizer
(5 rows)
-- Query should suspend on the first fault injection which finish read the first bitmap page.
1&: SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0; <waiting ...>
-- Insert will insert new tid in the first bitmap page and cause the word expand
-- and rearrange exceed words to next bitmap page.
-- The reason it not insert at the end of bitmap LOV is because right now only one
-- transaction doing the insert, and it'll insert to small seg file number.
-- Here insert both values to make sure update on full bitmap happens for one LOV.
2: INSERT INTO bmupdate VALUES (97);
INSERT 1
2: INSERT INTO bmupdate VALUES (99);
INSERT 1
-- Query should read the first page(buffer lock released), and then INSERT insert to
-- the first page which will trigger rearrange words.
SELECT gp_wait_until_triggered_fault('rearrange_word_to_next_bitmap_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
SELECT gp_inject_fault('rearrange_word_to_next_bitmap_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- Insert triggered rearrange
SELECT gp_wait_until_triggered_fault('after_read_one_bitmap_idx_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
SELECT gp_inject_fault('after_read_one_bitmap_idx_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- Should return the correct tuple count with id=97. It used to raise assertion failure for
-- AO tables. This is because the wrong tid transform to an invalud AOTupleId.
1<: <... completed>
count
-------
4402
(1 row)
-- Let's check the total tuple count after the test.
SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0;
count
-------
4404
(1 row)
--
-- Test 4, run Bitmap Heap Scan on the bitmap index that match multiple keys when there's backend
-- insert running.
--
-- Inject fault after read the first bitmap page when query the table.
SELECT gp_inject_fault_infinite('after_read_one_bitmap_idx_page', 'suspend', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
-- Inject fault when insert new tid cause rearrange words from current
-- bitmap page to next bitmap page.
SELECT gp_inject_fault_infinite('rearrange_word_to_next_bitmap_page', 'skip', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
1: set enable_bitmapscan=on;
SET
-- Should generate Bitmap HEAP Scan on the bitmap index that match multiple keys.
1: EXPLAIN (COSTS OFF) SELECT * FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0;
QUERY PLAN
-------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Bitmap Heap Scan on bmupdate
Recheck Cond: ((id >= 97) AND (id <= 99))
Filter: (gp_segment_id = 0)
-> Bitmap Index Scan on idx_bmupdate__id
Index Cond: ((id >= 97) AND (id <= 99))
Optimizer: Postgres query optimizer
(7 rows)
-- Query should suspend on the first fault injection which finish read the first bitmap page.
1&: SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0; <waiting ...>
-- Insert will insert new tid in the first bitmap page and cause the word expand
-- and rearrange exceed words to next bitmap page.
-- The reason it not insert at the end of bitmap LOV is because right now only one
-- transaction doing the insert, and it'll insert to small seg file number.
-- Here insert both values to make sure update on full bitmap happens for one LOV.
2: INSERT INTO bmupdate SELECT 97 FROM generate_series(1, 1000);
INSERT 1000
2: INSERT INTO bmupdate SELECT 99 FROM generate_series(1, 1000);
INSERT 1000
-- Query should read the first page(buffer lock released), and then INSERT insert to
-- the first page which will trigger rearrange words.
SELECT gp_wait_until_triggered_fault('rearrange_word_to_next_bitmap_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
SELECT gp_inject_fault('rearrange_word_to_next_bitmap_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- Insert triggered rearrange
SELECT gp_wait_until_triggered_fault('after_read_one_bitmap_idx_page', 1, dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
SELECT gp_inject_fault('after_read_one_bitmap_idx_page', 'reset', dbid) FROM gp_segment_configuration where role = 'p' and content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- Should return the correct tuple count with id=97. It used to raise assertion failure for
-- AO tables. This is because the wrong tid transform to an invalud AOTupleId.
1<: <... completed>
count
-------
4404
(1 row)
-- Let's check the total tuple count after the test.
SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0;
count
-------
6404
(1 row)
DROP TABLE bmupdate;
DROP