| /*------------------------------------------------------------------------- |
| * |
| * bitmapinsert.c |
| * Tuple insertion in the on-disk bitmap index. |
| * |
| * Portions Copyright (c) 2007-2010 Greenplum Inc |
| * Portions Copyright (c) 2010-2012 EMC Corporation |
| * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates. |
| * Portions Copyright (c) 2006-2008, PostgreSQL Global Development Group |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/access/bitmap/bitmapinsert.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| #include "miscadmin.h" |
| |
| #include "access/genam.h" |
| #include "access/tupdesc.h" |
| #include "access/heapam.h" |
| #include "access/bitmap.h" |
| #include "access/bitmap_private.h" |
| #include "access/bitmap_xlog.h" |
| #include "access/transam.h" |
| #include "parser/parse_oper.h" |
| #include "storage/bufmgr.h" |
| #include "utils/builtins.h" |
| #include "utils/datum.h" |
| #include "utils/guc.h" |
| #include "utils/lsyscache.h" |
| #include "utils/snapmgr.h" |
| #include "utils/faultinjector.h" |
| |
| /* |
| * The following structure along with BMTIDBuffer are used to buffer |
| * words for tids * during index create -- bmbuild(). |
| */ |
| |
| /* |
| * BMTIDLOVBuffer represents those bitmap vectors whose LOV item would be |
| * stored on the specified lov_block. The array bufs stores the TIDs for |
| * a distinct vector (see above). The index of the array we're up to tell |
| * us the offset number of the LOV item on the lov_block. |
| */ |
| |
| typedef struct BMTIDLOVBuffer |
| { |
| BlockNumber lov_block; |
| BMTIDBuffer *bufs[BM_MAX_LOVITEMS_PER_PAGE]; |
| } BMTIDLOVBuffer; |
| |
| static void _bitmap_write_new_bitmapwords(Relation rel, |
| Buffer lovBuffer, OffsetNumber lovOffset, |
| BMTIDBuffer* buf, bool use_wal); |
| static uint64 _bitmap_write_bitmapwords_on_page(Page bitmapPage, |
| BMTIDBuffer *buf, int startWordNo, |
| xl_bm_bitmapwords_perpage *xlrec_perpage); |
| static void create_lovitem(Relation rel, Buffer metabuf, uint64 tidnum, |
| TupleDesc tupDesc, |
| Datum *attdata, bool *nulls, |
| Relation lovHeap, Relation lovIndex, |
| BlockNumber *lovBlockP, |
| OffsetNumber *lovOffsetP, bool use_wal); |
| static void build_inserttuple(Relation rel, uint64 tidnum, |
| ItemPointerData ht_ctid, TupleDesc tupDesc, |
| Datum *attdata, bool *nulls, BMBuildState *state); |
| static void inserttuple(Relation rel, Buffer metabuf, |
| uint64 tidnum, ItemPointerData ht_ctid, |
| TupleDesc tupDesc, Datum* attdata, |
| bool *nulls, Relation lovHeap, |
| Relation lovIndex, ScanKey scanKey, |
| IndexScanDesc scanDesc, bool use_wal); |
| static void updatesetbit(Relation rel, |
| Buffer lovBuffer, OffsetNumber lovOffset, |
| uint64 tidnum, bool use_wal); |
| static void updatesetbit_inword(BM_HRL_WORD word, uint64 updateBitLoc, |
| uint64 firstTid, BMTIDBuffer* buf); |
| static void updatesetbit_inpage(Relation rel, uint64 tidnum, |
| Buffer lovBuffer, OffsetNumber lovOffset, |
| Buffer bitmapBuffer, uint64 firstTidNumber, |
| bool use_wal); |
| static void insertsetbit(Relation rel, BlockNumber lovBlock, OffsetNumber lovOffset, |
| uint64 tidnum, BMTIDBuffer *buf, bool use_wal); |
| static void findbitmappage(Relation rel, BMLOVItem lovitem, |
| uint64 tidnum, |
| Buffer *bitmapBufferP, uint64 *firstTidNumberP); |
| static void shift_header_bits(BM_HRL_WORD *words, uint32 numOfBits, |
| uint32 maxNumOfWords, uint32 startLoc, |
| uint32 numOfShiftingBits); |
| static void rshift_header_bits(BM_HRL_WORD *words, uint64 nwords, |
| uint32 bits); |
| static void lshift_header_bits(BM_HRL_WORD *words, uint64 nwords, |
| uint32 bits); |
| static void insert_newwords(BMTIDBuffer* words, uint32 insertPos, |
| BMTIDBuffer* new_words, BMTIDBuffer* words_left); |
| static int16 mergewords(BMTIDBuffer* buf, bool lastWordFill); |
| static void buf_make_space(Relation rel, |
| BMTidBuildBuf *tidLocsBuffer, bool use_wal); |
| static void verify_bitmappages(Relation rel, BMLOVItem lovitem); |
| static int16 buf_add_tid_with_fill(Relation rel, BMTIDBuffer *buf, |
| Buffer lovBuffer, OffsetNumber off, |
| uint64 tidnum, bool use_wal); |
| static uint16 buf_extend(BMTIDBuffer *buf); |
| static uint16 buf_ensure_head_space(Relation rel, BMTIDBuffer *buf, |
| Buffer lovBuffer, OffsetNumber off, |
| bool use_wal); |
| static uint16 buf_free_mem_block(Relation rel, BMTIDBuffer *buf, |
| Buffer lovBuffer, OffsetNumber off, |
| bool use_wal); |
| static uint16 buf_free_mem(Relation rel, BMTIDBuffer *buf, |
| BlockNumber lov_block, OffsetNumber off, |
| bool use_wal); |
| static uint16 _bitmap_free_tidbuf(BMTIDBuffer* buf); |
| |
| #define BUF_INIT_WORDS 8 /* as good a point as any */ |
| |
| /* |
| * updatesetbit() -- update a set bit in a bitmap. |
| * |
| * This function finds the bit in a given bitmap vector whose bit location is |
| * equal to tidnum, and changes this bit to 1. |
| * |
| * If this bit is already 1, then we are done. Otherwise, there are |
| * two possibilities: |
| * (1) This bit appears in a literal word. In this case, we simply change |
| * it to 1. |
| * (2) This bit appears in a fill word with bit 0. In this case, this word |
| * may generate two or three words after changing the corresponding bit |
| * to 1, depending on the position of this bit. |
| * |
| * Case (2) will make the corresponding bitmap page to grow. The words after |
| * this affected word in this bitmap page are shifted right to accommodate |
| * the newly generated words. If this bitmap page does not have enough space |
| * to hold all these words, the last few words will be shifted out of this |
| * page. In this case, the next bitmap page is checked to see if there are |
| * enough space for these extra words. If so, these extra words are inserted |
| * into the next page. Otherwise, we create a new bitmap page to hold |
| * these extra words. |
| */ |
| static void |
| updatesetbit(Relation rel, Buffer lovBuffer, OffsetNumber lovOffset, |
| uint64 tidnum, bool use_wal) |
| { |
| Page lovPage; |
| BMLOVItem lovItem; |
| |
| uint64 tidLocation; |
| uint16 insertingPos; |
| |
| uint64 firstTidNumber = 1; |
| Buffer bitmapBuffer = InvalidBuffer; |
| |
| lovPage = BufferGetPage(lovBuffer); |
| lovItem = (BMLOVItem) PageGetItem(lovPage, |
| PageGetItemId(lovPage, lovOffset)); |
| |
| /* Calculate the tid location in the last bitmap page. */ |
| tidLocation = lovItem->bm_last_tid_location; |
| if (BM_LAST_COMPWORD_IS_FILL(lovItem)) |
| tidLocation -= (FILL_LENGTH(lovItem->bm_last_compword) * |
| BM_HRL_WORD_SIZE); |
| else |
| tidLocation -= BM_HRL_WORD_SIZE; |
| |
| /* |
| * If tidnum is in either bm_last_compword or bm_last_word, |
| * and this does not generate any new words, we simply |
| * need to update the lov item. |
| */ |
| if ((tidnum > lovItem->bm_last_tid_location) || |
| ((tidnum > tidLocation) && |
| ((lovItem->lov_words_header == 0) || |
| (FILL_LENGTH(lovItem->bm_last_compword) == 1)))) |
| { |
| START_CRIT_SECTION(); |
| |
| MarkBufferDirty(lovBuffer); |
| |
| if (tidnum > lovItem->bm_last_tid_location) /* bm_last_word */ |
| { |
| insertingPos = (tidnum-1)%BM_HRL_WORD_SIZE; |
| lovItem->bm_last_word |= (((BM_HRL_WORD)1)<<insertingPos); |
| |
| if (Debug_bitmap_print_insert) |
| elog(LOG, "Bitmap Insert: updated a set bit in lovItem->bm_last_word" |
| " pos %d" |
| ", lovBlock=%d, lovOffset=%d" |
| ", tidnum=" INT64_FORMAT, |
| insertingPos, |
| BufferGetBlockNumber(lovBuffer), |
| lovOffset, |
| tidnum); |
| } |
| else /* bm_last_compword */ |
| { |
| if (BM_LAST_COMPWORD_IS_FILL(lovItem)) |
| { |
| if (GET_FILL_BIT(lovItem->bm_last_compword) == 1) |
| lovItem->bm_last_compword = LITERAL_ALL_ONE; |
| else |
| lovItem->bm_last_compword = 0; |
| } |
| |
| insertingPos = (tidnum - 1) % BM_HRL_WORD_SIZE; |
| lovItem->bm_last_compword |= (((BM_HRL_WORD)1) << insertingPos); |
| if (lovItem->bm_last_compword == LITERAL_ALL_ONE) |
| { |
| lovItem->lov_words_header = 2; |
| lovItem->bm_last_compword = BM_MAKE_FILL_WORD(1, 1); |
| } |
| else |
| lovItem->lov_words_header = 0; |
| |
| if (Debug_bitmap_print_insert) |
| elog(LOG, "Bitmap Insert: updated a set bit in lovItem->bm_last_compword" |
| " pos %d" |
| ", lovBlock=%d, lovOffset=%d" |
| ", tidnum=" INT64_FORMAT, |
| insertingPos, |
| BufferGetBlockNumber(lovBuffer), |
| lovOffset, |
| tidnum); |
| } |
| |
| if (use_wal) |
| _bitmap_log_bitmap_lastwords(rel, lovBuffer, lovOffset, lovItem); |
| |
| END_CRIT_SECTION(); |
| |
| return; |
| } |
| |
| /* |
| * Here, if tidnum is still in bm_last_compword, we know that |
| * bm_last_compword is a fill zero words with fill length greater |
| * than 1. This update will generate new words, we insert new words |
| * into the last bitmap page and update the lov item. |
| */ |
| if ((tidnum > tidLocation) && (lovItem->lov_words_header >= 2)) |
| { |
| /* |
| * We know that bm_last_compwords will be split into two |
| * or three words, depending on the splitting position. |
| */ |
| BMTIDBuffer buf; |
| MemSet(&buf, 0, sizeof(buf)); |
| buf_extend(&buf); |
| |
| updatesetbit_inword(lovItem->bm_last_compword, |
| tidnum - tidLocation - 1, |
| tidLocation + 1, &buf); |
| |
| /* set the last_compword and last_word */ |
| buf.last_compword = buf.cwords[buf.curword-1]; |
| buf.is_last_compword_fill = IS_FILL_WORD(buf.hwords, buf.curword-1); |
| buf.curword--; |
| buf.last_word = lovItem->bm_last_word; |
| buf.last_tid = lovItem->bm_last_setbit; |
| _bitmap_write_new_bitmapwords(rel, lovBuffer, lovOffset, |
| &buf, use_wal); |
| |
| if (Debug_bitmap_print_insert) |
| verify_bitmappages(rel, lovItem); |
| |
| _bitmap_free_tidbuf(&buf); |
| |
| return; |
| } |
| |
| /* |
| * Now, tidnum is in the middle of the bitmap vector. |
| * We try to find the bitmap page that contains this bit, |
| * and update the bit. |
| */ |
| /* find the page that contains this bit. */ |
| findbitmappage(rel, lovItem, tidnum, |
| &bitmapBuffer, &firstTidNumber); |
| |
| updatesetbit_inpage(rel, tidnum, lovBuffer, lovOffset, |
| bitmapBuffer, firstTidNumber, use_wal); |
| |
| _bitmap_relbuf(bitmapBuffer); |
| |
| if (Debug_bitmap_print_insert) |
| verify_bitmappages(rel, lovItem); |
| } |
| |
| /* |
| * updatesetbit_inword() -- update the given bit to 1 in a given |
| * word. |
| * |
| * The given word will generate at most three new words, depending on |
| * the position of the given bit to be updated. Make sure that the |
| * array 'words' has the size of 3 when you call this function. All new |
| * words will be put in this array, and the final number of new words is |
| * stored in '*numWordsP'. The bit location 'updateBitLoc' is relative to |
| * the beginning of the given word, starting from 0. |
| * |
| * We assume that word is a fill zero word. |
| */ |
| static void |
| updatesetbit_inword(BM_HRL_WORD word, uint64 updateBitLoc, |
| uint64 firstTid, BMTIDBuffer *buf) |
| { |
| uint64 numBits, usedNumBits; |
| uint16 insertingPos; |
| |
| Assert(updateBitLoc < BM_HRL_WORD_SIZE*FILL_LENGTH(word)); |
| |
| numBits = FILL_LENGTH(word) * BM_HRL_WORD_SIZE; |
| usedNumBits = 0; |
| if (updateBitLoc >= BM_HRL_WORD_SIZE) |
| { |
| firstTid += (updateBitLoc/BM_HRL_WORD_SIZE) * BM_HRL_WORD_SIZE; |
| buf->cwords[buf->curword] = |
| BM_MAKE_FILL_WORD(0, updateBitLoc/BM_HRL_WORD_SIZE); |
| buf->last_tids[buf->curword] = firstTid - 1; |
| buf->curword++; |
| buf_extend(buf); |
| buf->hwords[(buf->curword-1)/BM_HRL_WORD_SIZE] |= |
| (((BM_HRL_WORD)1)<<(BM_HRL_WORD_SIZE - buf->curword)); |
| usedNumBits += (updateBitLoc/BM_HRL_WORD_SIZE) * BM_HRL_WORD_SIZE; |
| } |
| |
| /* construct the literal word */ |
| insertingPos = updateBitLoc - usedNumBits; |
| firstTid += BM_HRL_WORD_SIZE; |
| buf->cwords[buf->curword] = |
| ((BM_HRL_WORD)0) | (((BM_HRL_WORD)1) << insertingPos); |
| buf->last_tids[buf->curword] = firstTid - 1; |
| buf->curword++; |
| buf_extend(buf); |
| usedNumBits += BM_HRL_WORD_SIZE; |
| |
| if (numBits > usedNumBits) |
| { |
| BM_HRL_WORD fill_length; |
| Assert((numBits - usedNumBits) % BM_HRL_WORD_SIZE == 0); |
| fill_length = (numBits - usedNumBits) / BM_HRL_WORD_SIZE; |
| |
| firstTid += fill_length * BM_HRL_WORD_SIZE; |
| buf->cwords[buf->curword] = BM_MAKE_FILL_WORD(0, fill_length); |
| buf->last_tids[buf->curword] = firstTid -1; |
| buf->curword++; |
| buf_extend(buf); |
| buf->hwords[(buf->curword-1)/BM_HRL_WORD_SIZE] |= |
| (((BM_HRL_WORD)1) << (BM_HRL_WORD_SIZE - buf->curword)); |
| } |
| } |
| |
| /* |
| * rshift_header_bits() -- 'in-place' right-shift bits in given words |
| * 'bits' bits. |
| * |
| * Assume that 'bits' is smaller than BM_HRL_WORD_SIZE. The right-most |
| * 'bits' bits will be ignored. |
| */ |
| static void |
| rshift_header_bits(BM_HRL_WORD* words, uint64 nwords, |
| uint32 bits) |
| { |
| BM_HRL_WORD shifting_bits = 0; |
| uint32 word_no; |
| |
| Assert(bits < BM_HRL_WORD_SIZE); |
| |
| for (word_no = 0; word_no < nwords; word_no++) |
| { |
| BM_HRL_WORD new_shifting_bits = |
| ((BM_HRL_WORD)words[word_no]) << (BM_HRL_WORD_SIZE - bits); |
| words[word_no] = (words[word_no] >> bits) | shifting_bits; |
| |
| shifting_bits = new_shifting_bits; |
| } |
| } |
| |
| /* |
| * lshift_header_bits() -- 'in-place' left-shift bits in given words |
| * 'bits' bits. |
| * |
| * Assume that 'bits' is smaller than BM_HRL_WORD_SIZE. The left-most |
| * 'bits' bits will be ignored. |
| */ |
| static void |
| lshift_header_bits(BM_HRL_WORD* words, uint64 nwords, |
| uint32 bits) |
| { |
| uint32 word_no; |
| Assert(bits < BM_HRL_WORD_SIZE); |
| |
| for (word_no = 0; word_no < nwords; word_no++) |
| { |
| BM_HRL_WORD shifting_bits = |
| words[word_no] >> (BM_HRL_WORD_SIZE - bits); |
| words[word_no] = ((BM_HRL_WORD)words[word_no]) << bits; |
| |
| if (word_no != 0) |
| words[word_no - 1] |= shifting_bits; |
| } |
| } |
| |
| /* |
| * shift_header_bits() -- right-shift bits after 'startLoc' for |
| * 'numofShiftingBits' bits. |
| * |
| * These bits are stored in an array of words with the word size of |
| * BM_HRL_WORD_SIZE. This shift is done in-place. The maximum number of |
| * words in this array is given. If the shifting causes the array not to |
| * have enough space for all bits, the right-most overflow bits will be |
| * discarded. The value 'startLoc' starts with 0. |
| */ |
| static void |
| shift_header_bits(BM_HRL_WORD* words, uint32 numOfBits, |
| uint32 maxNumOfWords, uint32 startLoc, |
| uint32 numOfShiftingBits) |
| { |
| uint32 startWordNo; |
| uint32 endWordNo; |
| uint32 wordNo; |
| uint32 numOfFinalShiftingBits; |
| BM_HRL_WORD tmpWord; |
| |
| Assert(startLoc <= numOfBits); |
| Assert((numOfBits-1)/BM_HRL_WORD_SIZE < maxNumOfWords); |
| |
| startWordNo = startLoc/BM_HRL_WORD_SIZE; |
| endWordNo = (numOfBits-1)/BM_HRL_WORD_SIZE; |
| |
| for (wordNo = endWordNo; wordNo > startWordNo; wordNo--) |
| { |
| /* |
| * obtain the last 'numOfShiftingBits' bits in the words[wordNo], |
| * and store them in the high-end of a word. |
| */ |
| tmpWord = (((BM_HRL_WORD)words[wordNo])<< |
| (BM_HRL_WORD_SIZE-numOfShiftingBits)); |
| |
| /* right-shift the original word 'numOfShiftingBits' bits. */ |
| words[wordNo] = (((BM_HRL_WORD)words[wordNo])>>numOfShiftingBits); |
| |
| /* OR those shifted bits into the next word in the array. */ |
| if (wordNo < maxNumOfWords-1) |
| words[wordNo + 1] |= tmpWord; |
| |
| } |
| |
| /* obtain bits after 'startLoc'.*/ |
| tmpWord = ((BM_HRL_WORD)(words[startWordNo]<< |
| (startLoc%BM_HRL_WORD_SIZE)))>>(startLoc%BM_HRL_WORD_SIZE); |
| |
| /* |
| * When startLoc%BM_HRL_WORD_SIZE is 0, we want to shift all 64 bits. |
| * There is no way to use the bit-shifting to shift all 64 bits out |
| * of a 64-bit integer. So just simply set the word to 0. |
| * Otherwise, use bit-shifting to shift out the bits after 'startLoc'. |
| */ |
| if (startLoc%BM_HRL_WORD_SIZE > 0) |
| { |
| words[startWordNo] = ((BM_HRL_WORD)(words[startWordNo]>> |
| (BM_HRL_WORD_SIZE-startLoc%BM_HRL_WORD_SIZE)))<< |
| (BM_HRL_WORD_SIZE-startLoc%BM_HRL_WORD_SIZE); |
| } |
| else |
| { |
| words[startWordNo] = 0; |
| } |
| |
| numOfFinalShiftingBits = numOfShiftingBits; |
| if (BM_HRL_WORD_SIZE - startLoc % BM_HRL_WORD_SIZE < numOfShiftingBits) |
| numOfFinalShiftingBits = BM_HRL_WORD_SIZE - startLoc % BM_HRL_WORD_SIZE; |
| |
| words[startWordNo] |= (tmpWord>>numOfFinalShiftingBits); |
| |
| if (startWordNo < maxNumOfWords-1) |
| { |
| tmpWord = ((BM_HRL_WORD)(tmpWord << (BM_HRL_WORD_SIZE - numOfFinalShiftingBits)))>> |
| (numOfShiftingBits - numOfFinalShiftingBits); |
| words[startWordNo+1] |= tmpWord; |
| } |
| } |
| |
| /* |
| * insert_newwords() -- insert a buffer of new words into a given buffer of |
| * words at a specified position. |
| * |
| * The new words will be inserted into the positions starting from |
| * 'insertPos'(>=0). The original words from 'insertPos' will be shifted |
| * to the right. If the given array does not have enough space to |
| * hold all words, the last '(*numWordsP+numNewWords-maxNumWords)' words |
| * will be stored in the buffer 'words_left', for which the caller should set |
| * the enough space to hold these left words. |
| * |
| * All three buffers are specified as BMTIDBuffer objects, in which the following |
| * fields are used: |
| * curword -- the number of content words in this buffer. |
| * num_cwords -- the maximum number of content words that are allowed. |
| * hwords -- the header words |
| * cwords -- the content words |
| * |
| * This function assumes that the number of new words is not greater than BM_HRL_WORD_SIZE. |
| */ |
| static void |
| insert_newwords(BMTIDBuffer* words, uint32 insertPos, |
| BMTIDBuffer* new_words, BMTIDBuffer* words_left) |
| { |
| int32 wordNo; |
| uint16 bitLoc; |
| |
| Assert(new_words->curword <= BM_HRL_WORD_SIZE); |
| Assert(insertPos <= words->num_cwords); |
| |
| words_left->curword = 0; |
| |
| /* if there are no words in the original buffer, we simply copy the new words. */ |
| if (words->curword == 0) |
| { |
| memcpy(words->cwords, new_words->cwords, new_words->curword*sizeof(BM_HRL_WORD)); |
| memcpy(words->hwords, new_words->hwords, |
| BM_CALC_H_WORDS(new_words->curword) * sizeof(BM_HRL_WORD)); |
| words->curword = new_words->curword; |
| |
| return; |
| } |
| |
| /* |
| * if insertPos is pointing to the position after the maximum position |
| * in this word, we simply copy the new words to leftContentWords. |
| */ |
| if (insertPos == words->num_cwords) |
| { |
| memcpy(words_left->cwords, new_words->cwords, |
| new_words->curword * sizeof(BM_HRL_WORD)); |
| memcpy(words_left->hwords, new_words->hwords, |
| BM_CALC_H_WORDS(new_words->curword) * sizeof(BM_HRL_WORD)); |
| words_left->curword = new_words->curword; |
| |
| return; |
| } |
| |
| Assert(words->curword > 0); |
| |
| /* Calculate how many words left after this insert. */ |
| if (words->curword + new_words->curword > words->num_cwords) |
| words_left->curword = words->curword + new_words->curword - words->num_cwords; |
| MemSet(words_left->hwords, 0, BM_NUM_OF_HEADER_WORDS * sizeof(BM_HRL_WORD)); |
| |
| /* |
| * Walk from the last word in the array back to 'insertPos'. |
| * If the word no + new_words->curword is greater than words->num_cwords, |
| * we store these words in words_left. |
| */ |
| for (wordNo=words->curword-1; wordNo>=0 && wordNo>=insertPos; wordNo--) |
| { |
| if (wordNo + new_words->curword >= words->num_cwords) |
| { |
| words_left->cwords[wordNo+new_words->curword-words->num_cwords] = |
| words->cwords[wordNo]; |
| if (IS_FILL_WORD(words->hwords, wordNo)) |
| { |
| uint32 o = (int)wordNo/BM_HRL_WORD_SIZE; |
| uint32 n = wordNo + new_words->curword - words->num_cwords; |
| |
| words_left->hwords[0] |= WORDNO_GET_HEADER_BIT(n); |
| words->hwords[o] &= ~(WORDNO_GET_HEADER_BIT(wordNo)); |
| } |
| } |
| else |
| words->cwords[wordNo + new_words->curword] = words->cwords[wordNo]; |
| } |
| |
| /* insert new words */ |
| for (wordNo=0; wordNo<new_words->curword; wordNo++) |
| { |
| if (insertPos+wordNo>= words->num_cwords) |
| { |
| uint32 n = insertPos + wordNo - words->num_cwords; |
| |
| words_left->cwords[n] = new_words->cwords[wordNo]; |
| if (IS_FILL_WORD(new_words->hwords, wordNo)) |
| words_left->hwords[0] |= WORDNO_GET_HEADER_BIT(n); |
| } |
| else |
| words->cwords[insertPos+wordNo] = new_words->cwords[wordNo]; |
| } |
| |
| /* right-shift the bits in the header words */ |
| shift_header_bits(words->hwords, words->curword, |
| BM_NUM_OF_HEADER_WORDS, insertPos, |
| new_words->curword); |
| |
| /* set the newWords header bits */ |
| for (bitLoc = insertPos; |
| bitLoc < insertPos + new_words->curword && bitLoc < words->num_cwords; |
| bitLoc++) |
| { |
| if (IS_FILL_WORD(new_words->hwords, bitLoc-insertPos)) |
| { |
| uint32 off = (uint32)bitLoc/BM_HRL_WORD_SIZE; |
| |
| words->hwords[off] |= WORDNO_GET_HEADER_BIT(bitLoc); |
| } |
| } |
| |
| words->curword += (new_words->curword - words_left->curword); |
| } |
| |
| /* |
| * updatesetbit_inpage() -- update the given bit to 1 in a given |
| * bitmap page. |
| * |
| * The argument 'firstTidNumber' indicates the first tid location of |
| * the bits stored in this page. This is necessary for locating the bit |
| * of 'tidnum'. |
| * |
| * This update may generate new words that cause this page to overflow. |
| * In this case, we will first check the next bitmap page have enough |
| * space for these new words. If so, we update these two pages. Otherwise, |
| * a new bitmap page is created. |
| */ |
| static void |
| updatesetbit_inpage(Relation rel, uint64 tidnum, |
| Buffer lovBuffer, OffsetNumber lovOffset, |
| Buffer bitmapBuffer, uint64 firstTidNumber, |
| bool use_wal) |
| { |
| Page bitmapPage; |
| BMBitmapOpaque bitmapOpaque; |
| BMBitmap bitmap; |
| Buffer nextBuffer; |
| Page nextPage; |
| BMBitmapOpaque nextOpaque; |
| BMBitmap nextBitmap; |
| |
| uint64 bitNo = 0; |
| uint32 wordNo; |
| uint32 free_words; |
| BM_HRL_WORD word = 0; |
| bool found = false; |
| |
| BMTIDBuffer words; |
| BMTIDBuffer new_words; |
| BMTIDBuffer words_left; |
| |
| bool new_page; |
| bool new_lastpage; |
| int word_no; |
| |
| bitmapPage = BufferGetPage(bitmapBuffer); |
| bitmapOpaque = (BMBitmapOpaque)PageGetSpecialPointer(bitmapPage); |
| |
| bitmap = (BMBitmap) PageGetContentsMaxAligned(bitmapPage); |
| bitNo = 0; |
| |
| if (Debug_bitmap_print_insert) |
| elog(LOG, "Bitmap Insert: updating a set bit in bitmap block %d, " |
| "lovBlock=%d, lovOffset=%d" |
| ", firstTidNumber=" INT64_FORMAT |
| ", bm_last_tid_location=" INT64_FORMAT |
| ", tidnum=" INT64_FORMAT |
| ", idxrelid=%u", |
| BufferGetBlockNumber(bitmapBuffer), |
| BufferGetBlockNumber(lovBuffer), |
| lovOffset, |
| firstTidNumber, |
| bitmapOpaque->bm_last_tid_location, |
| tidnum, |
| RelationGetRelid(rel)); |
| |
| /* Find the word that contains the bit of tidnum. */ |
| for (wordNo = 0; wordNo < bitmapOpaque->bm_hrl_words_used; wordNo++) |
| { |
| word = bitmap->cwords[wordNo]; |
| if (IS_FILL_WORD(bitmap->hwords, wordNo)) |
| bitNo += FILL_LENGTH(word) * BM_HRL_WORD_SIZE; |
| else |
| bitNo += BM_HRL_WORD_SIZE; |
| |
| if (firstTidNumber + bitNo - 1 >= tidnum) |
| { |
| found = true; |
| break; /* find the word */ |
| } |
| } |
| |
| if (!found) |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_INDEX_CORRUPTED), |
| errmsg("could not find bitmap word for tid " INT64_FORMAT |
| " in bitmap index %u" |
| " (relfilenode %u/%u/%u, bitmap block %d, LOV block %d, LOV offset %d)", |
| tidnum, RelationGetRelid(rel), |
| rel->rd_locator.spcOid, rel->rd_locator.dbOid, rel->rd_locator.relNumber, |
| BufferGetBlockNumber(bitmapBuffer), |
| BufferGetBlockNumber(lovBuffer), lovOffset), |
| errhint("Reindex bitmap index \"%s\".", RelationGetRelationName(rel)))); |
| } |
| |
| Assert (wordNo <= bitmapOpaque->bm_hrl_words_used); |
| |
| /* |
| * If the word containing the updating bit is a literal word, |
| * we simply update the word, and return. |
| */ |
| if (!IS_FILL_WORD(bitmap->hwords, wordNo)) |
| { |
| uint16 insertingPos = (tidnum - 1) % BM_HRL_WORD_SIZE; |
| |
| START_CRIT_SECTION(); |
| |
| MarkBufferDirty(bitmapBuffer); |
| |
| bitmap->cwords[wordNo] |= (((BM_HRL_WORD)1)<<insertingPos); |
| |
| if (use_wal) |
| _bitmap_log_updateword(rel, bitmapBuffer, wordNo); |
| |
| END_CRIT_SECTION(); |
| |
| if (Debug_bitmap_print_insert) |
| elog(LOG, "Bitmap Insert: updated a set bit in bitmap block %d, " |
| "lovBlock=%d, lovOffset=%d" |
| ", firstTidNumber=" INT64_FORMAT |
| ", bm_last_tid_location=" INT64_FORMAT |
| ", tidnum=" INT64_FORMAT |
| ", wordNo=%d" |
| ", idxrelid=%u", |
| BufferGetBlockNumber(bitmapBuffer), |
| BufferGetBlockNumber(lovBuffer), |
| lovOffset, |
| firstTidNumber, |
| bitmapOpaque->bm_last_tid_location, |
| tidnum, |
| wordNo, |
| RelationGetRelid(rel)); |
| |
| return; |
| } |
| |
| /* If this bit is already 1, then simply return. */ |
| if (GET_FILL_BIT(word) == 1) |
| { |
| if (Debug_bitmap_print_insert) |
| elog(LOG, "Bitmap Insert: no update is needed in bitmap block %d, " |
| "lovBlock=%d, lovOffset=%d" |
| ", firstTidNumber=" INT64_FORMAT |
| ", bm_last_tid_location=" INT64_FORMAT |
| ", tidnum=" INT64_FORMAT |
| ", wordNo=%d" |
| ", idxrelid=%u", |
| BufferGetBlockNumber(bitmapBuffer), |
| BufferGetBlockNumber(lovBuffer), |
| lovOffset, |
| firstTidNumber, |
| bitmapOpaque->bm_last_tid_location, |
| tidnum, |
| wordNo, |
| RelationGetRelid(rel)); |
| |
| return; |
| } |
| |
| firstTidNumber = firstTidNumber + bitNo - |
| FILL_LENGTH(word) * BM_HRL_WORD_SIZE; |
| |
| Assert(tidnum >= firstTidNumber); |
| |
| MemSet(&new_words, 0, sizeof(new_words)); |
| buf_extend(&new_words); |
| updatesetbit_inword(word, tidnum - firstTidNumber, firstTidNumber, |
| &new_words); |
| |
| /* Make sure that there are at most 3 new words. */ |
| Assert(new_words.curword <= 3); |
| |
| if (new_words.curword == 1) |
| { |
| uint32 off = wordNo/BM_HRL_WORD_SIZE; |
| |
| START_CRIT_SECTION(); |
| |
| MarkBufferDirty(bitmapBuffer); |
| |
| bitmap->cwords[wordNo] = new_words.cwords[0]; |
| bitmap->hwords[off] &= (BM_HRL_WORD)(~WORDNO_GET_HEADER_BIT(wordNo)); |
| |
| if (IS_FILL_WORD(new_words.hwords, 0)) |
| ereport(ERROR, |
| (errcode(ERRCODE_INDEX_CORRUPTED), |
| errmsg("incorrect header bit found in bitmap index %u; expected header bit 1" |
| " (relfilenode %u/%u/%u, bitmap block %d, LOV block %d, LOV offset %d)", |
| RelationGetRelid(rel), |
| rel->rd_locator.spcOid, rel->rd_locator.spcOid, rel->rd_locator.spcOid, |
| BufferGetBlockNumber(bitmapBuffer), |
| BufferGetBlockNumber(lovBuffer), lovOffset))); |
| |
| Assert(IS_FILL_WORD(new_words.hwords, 0) == |
| IS_FILL_WORD(bitmap->hwords, wordNo)); |
| |
| if (use_wal) |
| _bitmap_log_updateword(rel, bitmapBuffer, wordNo); |
| |
| END_CRIT_SECTION(); |
| |
| if (Debug_bitmap_print_insert) |
| elog(LOG, "Bitmap Insert: updated a set bit in bitmap block %d, " |
| "lovBlock=%d, lovOffset=%d" |
| ", firstTidNumber=" INT64_FORMAT |
| ", bm_last_tid_location=" INT64_FORMAT |
| ", tidnum=" INT64_FORMAT |
| ", wordNo=%d, header bit=%d, idxrelid=%u", |
| BufferGetBlockNumber(bitmapBuffer), |
| BufferGetBlockNumber(lovBuffer), |
| lovOffset, |
| firstTidNumber, |
| bitmapOpaque->bm_last_tid_location, |
| tidnum, |
| wordNo, |
| IS_FILL_WORD(bitmap->hwords, off), |
| RelationGetRelid(rel)); |
| return; |
| } |
| |
| /* |
| * Check if this page has enough space for all new words. If so, |
| * replace this word with new words. Otherwise, |
| * we first check if the next page has enough space for all new words. |
| * If so, insert new words to the next page, otherwise, |
| * create a new page. |
| */ |
| free_words = BM_NUM_OF_HRL_WORDS_PER_PAGE - |
| bitmapOpaque->bm_hrl_words_used; |
| |
| new_page = false; |
| new_lastpage = false; |
| nextBuffer = InvalidBuffer; |
| |
| if (free_words < new_words.curword - 1) |
| { |
| if (bitmapOpaque->bm_bitmap_next != InvalidBlockNumber) |
| { |
| nextBuffer = _bitmap_getbuf(rel, bitmapOpaque->bm_bitmap_next, |
| BM_WRITE); |
| nextPage = BufferGetPage(nextBuffer); |
| nextOpaque = (BMBitmapOpaque)PageGetSpecialPointer(nextPage); |
| free_words = BM_NUM_OF_HRL_WORDS_PER_PAGE - |
| nextOpaque->bm_hrl_words_used; |
| } else |
| { |
| new_lastpage = true; |
| } |
| } |
| |
| if (free_words < new_words.curword - 1) |
| { |
| if (BufferIsValid(nextBuffer)) |
| _bitmap_relbuf(nextBuffer); |
| |
| nextBuffer = _bitmap_getbuf(rel, P_NEW, BM_WRITE); |
| _bitmap_init_bitmappage(BufferGetPage(nextBuffer)); |
| new_page = true; |
| free_words = BM_NUM_OF_HRL_WORDS_PER_PAGE; |
| } |
| |
| START_CRIT_SECTION(); |
| |
| MarkBufferDirty(bitmapBuffer); |
| if (BufferIsValid(nextBuffer)) |
| MarkBufferDirty(nextBuffer); |
| |
| if (new_lastpage) |
| { |
| Page lovPage; |
| BMLOVItem lovItem; |
| MarkBufferDirty(lovBuffer); |
| |
| lovPage = BufferGetPage(lovBuffer); |
| lovItem = (BMLOVItem) PageGetItem(lovPage, |
| PageGetItemId(lovPage, lovOffset)); |
| lovItem->bm_lov_tail = BufferGetBlockNumber(nextBuffer); |
| } |
| |
| if (Debug_bitmap_print_insert) |
| elog(LOG, "Bitmap Insert: updated a set bit in bitmap block %d, " |
| "lovBlock=%d, lovOffset=%d" |
| ", firstTidNumber=" INT64_FORMAT |
| ", bm_last_tid_location=" INT64_FORMAT |
| ", tidnum=" INT64_FORMAT |
| ", generate %d new words, %d new bitmap page" |
| ", idxrelid=%u", |
| BufferGetBlockNumber(bitmapBuffer), |
| BufferGetBlockNumber(lovBuffer), |
| lovOffset, |
| firstTidNumber, |
| bitmapOpaque->bm_last_tid_location, |
| tidnum, |
| new_words.curword, |
| new_page, |
| RelationGetRelid(rel)); |
| |
| bitmap->cwords[wordNo] = new_words.cwords[0]; |
| if (tidnum - firstTidNumber + 1 <= BM_HRL_WORD_SIZE) |
| { |
| uint32 off = wordNo/BM_HRL_WORD_SIZE; |
| |
| bitmap->hwords[off] &= (BM_HRL_WORD)(~WORDNO_GET_HEADER_BIT(wordNo)); |
| if (IS_FILL_WORD(new_words.hwords, 0)) |
| ereport(ERROR, |
| (errcode(ERRCODE_INDEX_CORRUPTED), |
| errmsg("incorrect header bit found in bitmap index %u; expected header bit 1" |
| " (relfilenode %u/%u/%u, bitmap block %d, LOV block %d, LOV offset %d)", |
| RelationGetRelid(rel), |
| rel->rd_locator.spcOid, rel->rd_locator.spcOid, rel->rd_locator.spcOid, |
| BufferGetBlockNumber(bitmapBuffer), |
| BufferGetBlockNumber(lovBuffer), lovOffset))); |
| } |
| |
| /* ignore the first word in new_words.cwords. */ |
| new_words.hwords[0] = ((BM_HRL_WORD)new_words.hwords[0]) << 1; |
| for (word_no = 0; word_no < new_words.curword - 1; word_no++) |
| new_words.cwords[word_no] = new_words.cwords[word_no+1]; |
| new_words.curword--; |
| |
| /* Create the buffer for the original words */ |
| MemSet(&words, 0, sizeof(words)); |
| words.cwords = bitmap->cwords; |
| memcpy(words.hwords, bitmap->hwords, |
| BM_CALC_H_WORDS(bitmapOpaque->bm_hrl_words_used) * sizeof(BM_HRL_WORD)); |
| words.num_cwords = BM_NUM_OF_HRL_WORDS_PER_PAGE; |
| words.curword = bitmapOpaque->bm_hrl_words_used; |
| |
| MemSet(&words_left, 0, sizeof(words_left)); |
| buf_extend(&words_left); |
| |
| insert_newwords(&words, wordNo + 1, &new_words, &words_left); |
| |
| /* |
| * We have to copy header words back to the page, and set the correct |
| * number of words in the page. |
| */ |
| bitmapOpaque->bm_hrl_words_used = words.curword; |
| memcpy(bitmap->hwords, words.hwords, |
| BM_CALC_H_WORDS(bitmapOpaque->bm_hrl_words_used) * sizeof(BM_HRL_WORD)); |
| |
| if (new_page) |
| { |
| nextPage = BufferGetPage(nextBuffer); |
| nextOpaque = (BMBitmapOpaque)PageGetSpecialPointer(nextPage); |
| nextBitmap = (BMBitmap)PageGetContentsMaxAligned(nextPage); |
| |
| nextOpaque->bm_last_tid_location = bitmapOpaque->bm_last_tid_location; |
| nextOpaque->bm_bitmap_next = bitmapOpaque->bm_bitmap_next; |
| bitmapOpaque->bm_bitmap_next = BufferGetBlockNumber(nextBuffer); |
| } |
| |
| bitmapOpaque->bm_last_tid_location -= |
| GET_NUM_BITS(words_left.cwords, words_left.hwords, words_left.curword); |
| |
| if (words_left.curword > 0) |
| { |
| nextPage = BufferGetPage(nextBuffer); |
| nextOpaque = (BMBitmapOpaque)PageGetSpecialPointer(nextPage); |
| nextBitmap = (BMBitmap)PageGetContentsMaxAligned(nextPage); |
| |
| /* Create the buffer for the original words */ |
| MemSet(&words, 0, sizeof(words)); |
| words.cwords = nextBitmap->cwords; |
| memcpy(words.hwords, nextBitmap->hwords, |
| BM_CALC_H_WORDS(nextOpaque->bm_hrl_words_used) * sizeof(BM_HRL_WORD)); |
| words.num_cwords = BM_NUM_OF_HRL_WORDS_PER_PAGE; |
| words.curword = nextOpaque->bm_hrl_words_used; |
| |
| MemSet(&new_words, 0, sizeof(new_words)); |
| |
| insert_newwords(&words, 0, &words_left, &new_words); |
| |
| /* |
| * We have to copy header words back to the page, and set the correct |
| * number of words in the page. |
| */ |
| nextOpaque->bm_hrl_words_used = words.curword; |
| memcpy(nextBitmap->hwords, words.hwords, |
| BM_CALC_H_WORDS(nextOpaque->bm_hrl_words_used) * sizeof(BM_HRL_WORD)); |
| |
| SIMPLE_FAULT_INJECTOR("rearrange_word_to_next_bitmap_page"); |
| Assert(new_words.curword == 0); |
| } |
| |
| if (use_wal) |
| _bitmap_log_updatewords(rel, lovBuffer, lovOffset, |
| bitmapBuffer, nextBuffer, new_lastpage); |
| |
| END_CRIT_SECTION(); |
| |
| if (BufferIsValid(nextBuffer)) |
| _bitmap_relbuf(nextBuffer); |
| _bitmap_free_tidbuf(&new_words); |
| _bitmap_free_tidbuf(&words_left); |
| } |
| |
| /* |
| * findbitmappage() -- find the bitmap page that contains |
| * the given tid location, and obtain the first tid location |
| * in this page. |
| * |
| * We assume that this tid location is not in bm_last_compword or |
| * bm_last_word of its LOVItem. |
| * |
| * We will have write lock on the bitmap page we find. |
| */ |
| static void |
| findbitmappage(Relation rel, BMLOVItem lovitem, |
| uint64 tidnum, |
| Buffer* bitmapBufferP, uint64* firstTidNumberP) |
| { |
| BlockNumber nextBlockNo = lovitem->bm_lov_head; |
| |
| *firstTidNumberP = 1; |
| |
| while (BlockNumberIsValid(nextBlockNo)) |
| { |
| Page bitmapPage; |
| BMBitmapOpaque bitmapOpaque; |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| *bitmapBufferP = _bitmap_getbuf(rel, nextBlockNo, BM_WRITE); |
| bitmapPage = BufferGetPage(*bitmapBufferP); |
| bitmapOpaque = (BMBitmapOpaque) |
| PageGetSpecialPointer(bitmapPage); |
| |
| if (bitmapOpaque->bm_last_tid_location >= tidnum) |
| return; /* find the page */ |
| |
| (*firstTidNumberP) = bitmapOpaque->bm_last_tid_location + 1; |
| nextBlockNo = bitmapOpaque->bm_bitmap_next; |
| |
| _bitmap_relbuf(*bitmapBufferP); |
| } |
| |
| /* |
| * We can't find such a page. This should not happen. |
| * So we error out. |
| */ |
| ereport(ERROR, |
| (errcode(ERRCODE_INDEX_CORRUPTED), |
| errmsg("could not find bitmap page containing tid " INT64_FORMAT |
| " in bitmap index %u (relfilenode %u/%u/%u" |
| ", next block %d, LOV head %d, LOV tail %d" |
| ", firstTidNumber " INT64_FORMAT |
| ", bm_last_tid_location " INT64_FORMAT |
| ", bm_last_setbit " INT64_FORMAT |
| ", bm_last_compword " INT64_FORMAT |
| ", bm_last_word " INT64_FORMAT |
| ", lov_words_header %d)", |
| tidnum, RelationGetRelid(rel), |
| rel->rd_locator.spcOid, rel->rd_locator.spcOid, rel->rd_locator.spcOid, |
| nextBlockNo, lovitem->bm_lov_head, |
| lovitem->bm_lov_tail, *firstTidNumberP, lovitem->bm_last_tid_location, |
| lovitem->bm_last_setbit, lovitem->bm_last_compword, |
| lovitem->bm_last_word, lovitem->lov_words_header), |
| errhint("Reindex bitmap index \"%s\".", RelationGetRelationName(rel)))); |
| } |
| |
| /* |
| * verify_bitmappages() -- verify if the bm_last_tid_location values |
| * are valid in all bitmap pages. Only used during debugging. |
| */ |
| static void |
| verify_bitmappages(Relation rel, BMLOVItem lovitem) |
| { |
| BlockNumber nextBlockNo = lovitem->bm_lov_head; |
| uint64 tidnum = 0; |
| |
| while (BlockNumberIsValid(nextBlockNo)) |
| { |
| Page bitmapPage; |
| BMBitmapOpaque bitmapOpaque; |
| Buffer bitmapBuffer; |
| uint32 wordNo; |
| BMBitmap bitmap; |
| |
| bitmapBuffer = _bitmap_getbuf(rel, nextBlockNo, BM_READ); |
| bitmapPage = BufferGetPage(bitmapBuffer); |
| bitmapOpaque = (BMBitmapOpaque) |
| PageGetSpecialPointer(bitmapPage); |
| bitmap = (BMBitmap) PageGetContentsMaxAligned(bitmapPage); |
| |
| for (wordNo = 0; wordNo < bitmapOpaque->bm_hrl_words_used; wordNo++) |
| { |
| BM_HRL_WORD word = bitmap->cwords[wordNo]; |
| if (IS_FILL_WORD(bitmap->hwords, wordNo)) |
| tidnum += FILL_LENGTH(word) * BM_HRL_WORD_SIZE; |
| else |
| tidnum += BM_HRL_WORD_SIZE; |
| |
| } |
| |
| if (bitmapOpaque->bm_last_tid_location != tidnum) |
| { |
| elog(ERROR, "unexpected bm_last_tid_location " INT64_FORMAT |
| " found for bitmap block %d in bitmap index %u (relfilenode %u/%u/%u);" |
| " expected tid " INT64_FORMAT, |
| bitmapOpaque->bm_last_tid_location, nextBlockNo, RelationGetRelid(rel), |
| rel->rd_locator.spcOid, rel->rd_locator.spcOid, rel->rd_locator.spcOid, tidnum); |
| } |
| |
| nextBlockNo = bitmapOpaque->bm_bitmap_next; |
| |
| _bitmap_relbuf(bitmapBuffer); |
| } |
| } |
| |
| /* |
| * mergewords() -- merge last two bitmap words based on the HRL compression |
| * scheme. If these two words can not be merged, the last complete |
| * word will be appended into the word array in the buffer. |
| * |
| * If the buffer is extended, this function returns the number |
| * of bytes used. |
| */ |
| static int16 |
| mergewords(BMTIDBuffer *buf, bool lastWordFill) |
| { |
| int16 bytes_used = 0; |
| |
| /* the last_tid in the complete word */ |
| uint64 last_tid = buf->last_tid - buf->last_tid%BM_HRL_WORD_SIZE; |
| |
| /* |
| * If last_compword is LITERAL_ALL_ONE, it is not set yet. |
| * We move last_word to it. |
| */ |
| if (buf->last_compword == LITERAL_ALL_ONE) |
| { |
| buf->last_compword = buf->last_word; |
| buf->is_last_compword_fill = lastWordFill; |
| if (lastWordFill) |
| last_tid = FILL_LENGTH(buf->last_word) * BM_HRL_WORD_SIZE; |
| else |
| last_tid = BM_HRL_WORD_SIZE; |
| |
| buf->last_word = 0; |
| buf->last_tid = last_tid; |
| |
| return bytes_used; |
| } |
| /* |
| * If both words are fill words, and have the same fill bit, |
| * we increment the fill length of the last complete word by |
| * the fill length stored in the last word. |
| */ |
| if (buf->is_last_compword_fill && lastWordFill && |
| (GET_FILL_BIT(buf->last_compword) == |
| GET_FILL_BIT(buf->last_word))) |
| { |
| BM_HRL_WORD lengthMerged; |
| |
| if (FILL_LENGTH(buf->last_compword) + FILL_LENGTH(buf->last_word) <= |
| MAX_FILL_LENGTH) |
| { |
| last_tid += FILL_LENGTH(buf->last_word)*BM_HRL_WORD_SIZE; |
| buf->last_compword += FILL_LENGTH(buf->last_word); |
| buf->last_word = LITERAL_ALL_ZERO; |
| buf->last_tid = last_tid; |
| |
| return bytes_used; |
| } |
| |
| lengthMerged = |
| MAX_FILL_LENGTH - FILL_LENGTH(buf->last_compword); |
| buf->last_word -= lengthMerged; |
| last_tid += lengthMerged * BM_HRL_WORD_SIZE; |
| buf->last_compword += lengthMerged; |
| } |
| |
| /* |
| * Here, these two words can not be merged together. We |
| * move the last complete word to the array, and set it to be the |
| * last word. |
| */ |
| |
| /* |
| * When there are not enough space in the array of new words, |
| * we re-allocate a bigger space. |
| */ |
| bytes_used += buf_extend(buf); |
| |
| buf->cwords[buf->curword] = buf->last_compword; |
| buf->last_tids[buf->curword] = last_tid; |
| |
| if (buf->is_last_compword_fill) |
| buf->hwords[buf->curword/BM_HRL_WORD_SIZE] |= |
| ((BM_HRL_WORD)1) << (BM_HRL_WORD_SIZE - 1 - |
| buf->curword % BM_HRL_WORD_SIZE); |
| |
| buf->curword ++; |
| |
| buf->last_compword = buf->last_word; |
| buf->is_last_compword_fill = lastWordFill; |
| if (buf->is_last_compword_fill) |
| last_tid += FILL_LENGTH(buf->last_compword) * BM_HRL_WORD_SIZE; |
| else |
| last_tid += BM_HRL_WORD_SIZE; |
| |
| buf->last_word = 0; |
| buf->last_tid = last_tid; |
| |
| return bytes_used; |
| } |
| |
| /* |
| * _bitmap_write_new_bitmapwords() -- write a given buffer of new bitmap words |
| * into the end of bitmap page(s). |
| * |
| * If the last bitmap page does not have enough space for all these new |
| * words, new pages will be allocated here. |
| * |
| * We consider a write to one bitmap page as one atomic-action WAL |
| * record. The WAL record for the write to the last bitmap page also |
| * includes updates on the lov item. Writes to the non-last |
| * bitmap page are not self-consistent. We need to do some fix-up |
| * during WAL logic replay. |
| */ |
| static void |
| _bitmap_write_new_bitmapwords(Relation rel, |
| Buffer lovBuffer, OffsetNumber lovOffset, |
| BMTIDBuffer* buf, bool use_wal) |
| { |
| Page lovPage; |
| BMLOVItem lovItem; |
| bool first_page_needs_init = false; |
| List *perpage_buffers = NIL; |
| List *perpage_xlrecs = NIL; |
| ListCell *lcb; |
| |
| lovPage = BufferGetPage(lovBuffer); |
| lovItem = (BMLOVItem) PageGetItem(lovPage, |
| PageGetItemId(lovPage, lovOffset)); |
| |
| /* |
| * Write changes to bitmap pages, if needed. (We might get away by |
| * updating just the last words stored on the LOV item.) |
| */ |
| if (buf->curword > 0) |
| { |
| ListCell *lcp; |
| BlockNumber first_blkno; |
| BlockNumber last_blkno; |
| List *perpage_tmppages = NIL; |
| bool is_first; |
| ListCell *buffer_cell; |
| int start_wordno; |
| |
| /* |
| * Write bitmap words, one page at a time, allocating new pages as |
| * required. |
| */ |
| is_first = true; |
| start_wordno = 0; |
| do |
| { |
| Buffer bitmapBuffer; |
| bool bitmapBufferNeedsInit = false; |
| Page bitmapPage; |
| BMBitmapOpaque bitmapPageOpaque; |
| uint32 numFreeWords; |
| uint32 words_written; |
| xl_bm_bitmapwords_perpage *xlrec_perpage; |
| Page tmppage = NULL; |
| |
| if (is_first && lovItem->bm_lov_head != InvalidBlockNumber) |
| { |
| bitmapBuffer = _bitmap_getbuf(rel, lovItem->bm_lov_tail, BM_WRITE); |
| |
| /* Append to an existing LOV page as much as fits */ |
| bitmapPage = BufferGetPage(bitmapBuffer); |
| bitmapPageOpaque = |
| (BMBitmapOpaque) PageGetSpecialPointer(bitmapPage); |
| |
| numFreeWords = BM_NUM_OF_HRL_WORDS_PER_PAGE - |
| bitmapPageOpaque->bm_hrl_words_used; |
| } |
| else |
| { |
| /* Allocate new page */ |
| bitmapBuffer = _bitmap_getbuf(rel, P_NEW, BM_WRITE); |
| bitmapBufferNeedsInit = true; |
| numFreeWords = BM_NUM_OF_HRL_WORDS_PER_PAGE; |
| } |
| |
| /* |
| * Remember information about the first page, needed |
| * for updating the LOV and for the WAL record. |
| */ |
| if (is_first) |
| { |
| first_blkno = BufferGetBlockNumber(bitmapBuffer); |
| first_page_needs_init = bitmapBufferNeedsInit; |
| } |
| |
| if (use_wal) |
| { |
| xlrec_perpage = palloc0(sizeof(xl_bm_bitmapwords_perpage)); |
| xlrec_perpage->bmp_blkno = BufferGetBlockNumber(bitmapBuffer); |
| } |
| else |
| xlrec_perpage = NULL; |
| perpage_buffers = lappend_int(perpage_buffers, bitmapBuffer); |
| perpage_xlrecs = lappend(perpage_xlrecs, xlrec_perpage); |
| |
| if (list_length(perpage_buffers) > MAX_BITMAP_PAGES_PER_INSERT) |
| elog(ERROR, "too many bitmap pages in one insert batch into bitmap index %u" |
| " (relfilenode %u/%u/%u, LOV block %d, LOV offset %d)", |
| RelationGetRelid(rel), |
| rel->rd_locator.spcOid, rel->rd_locator.spcOid, rel->rd_locator.spcOid, |
| BufferGetBlockNumber(lovBuffer), lovOffset); |
| |
| /* |
| * Allocate a new temporary page to operate on, in case we fail |
| * half-way through the updates (because of running out of memory |
| * or disk space, most likely). If this is the last bitmap page, |
| * i.e. we can fit all the remaining words on this bitmap page, |
| * though, we can skip that, and modify the page directly. |
| * |
| * If this is not the last page, we will need to allocate more |
| * pages. That in turn might fail, so we must not modify the |
| * existing pages yet. |
| */ |
| if (numFreeWords < buf->curword - start_wordno) |
| { |
| /* |
| * Does not fit, we will need to expand. |
| * |
| * Note: we don't write to the page until we're sure we get |
| * all of them. We do all the action on temp copies. |
| */ |
| if (bitmapBufferNeedsInit) |
| { |
| tmppage = palloc(BLCKSZ); |
| _bitmap_init_bitmappage(tmppage); |
| } |
| else |
| tmppage = PageGetTempPageCopy(BufferGetPage(bitmapBuffer)); |
| |
| bitmapPage = tmppage; |
| |
| perpage_tmppages = lappend(perpage_tmppages, tmppage); |
| } |
| else |
| { |
| /* |
| * This is the last page. Now that we have successfully |
| * fetched/allocated it, none of the things we do should |
| * ereport(), so we can make the changes directly to the |
| * buffer. |
| */ |
| bitmapPage = BufferGetPage(bitmapBuffer); |
| START_CRIT_SECTION(); |
| if (bitmapBufferNeedsInit) |
| _bitmap_init_bitmappage(bitmapPage); |
| } |
| |
| words_written = |
| _bitmap_write_bitmapwords_on_page(bitmapPage, buf, |
| start_wordno, xlrec_perpage); |
| Assert(is_first || words_written > 0); |
| start_wordno += words_written; |
| |
| if (bitmapPage != tmppage) |
| MarkBufferDirty(bitmapBuffer); |
| |
| last_blkno = BufferGetBlockNumber(bitmapBuffer); |
| is_first = false; |
| } while (buf->curword - start_wordno > 0); |
| Assert(start_wordno == buf->curword); |
| |
| /* |
| * Ok, we have locked all the pages we need. Apply any changes we had made on |
| * temporary pages. |
| * |
| * NOTE: there is one fewer temppage. |
| */ |
| buffer_cell = list_head(perpage_buffers); |
| foreach(lcp, perpage_tmppages) |
| { |
| Page tmppage = (Page) lfirst(lcp); |
| Buffer buffer = (Buffer) lfirst_int(buffer_cell); |
| Page page = BufferGetPage(buffer); |
| BlockNumber nextBlkNo; |
| BMBitmapOpaque bitmapPageOpaque; |
| |
| PageRestoreTempPage(tmppage, page); |
| MarkBufferDirty(buffer); |
| |
| /* Update the 'next' pointer on this page, before moving on */ |
| buffer_cell = lnext(perpage_buffers, buffer_cell); |
| Assert(buffer_cell); |
| nextBlkNo = BufferGetBlockNumber((Buffer) lfirst_int(buffer_cell)); |
| |
| bitmapPageOpaque = |
| (BMBitmapOpaque) PageGetSpecialPointer(page); |
| |
| bitmapPageOpaque->bm_bitmap_next = nextBlkNo; |
| } |
| list_free(perpage_tmppages); |
| |
| /* Update the bitmap page pointers in the LOV item */ |
| if (first_page_needs_init) |
| lovItem->bm_lov_head = first_blkno; |
| lovItem->bm_lov_tail = last_blkno; |
| } |
| else |
| { |
| START_CRIT_SECTION(); |
| } |
| |
| /* Update LOV item (lov_head/tail were updated above already) */ |
| lovItem->bm_last_compword = buf->last_compword; |
| lovItem->bm_last_word = buf->last_word; |
| lovItem->lov_words_header = (buf->is_last_compword_fill) ? 2 : 0; |
| lovItem->bm_last_setbit = buf->last_tid; |
| lovItem->bm_last_tid_location = buf->last_tid - buf->last_tid % BM_HRL_WORD_SIZE; |
| MarkBufferDirty(lovBuffer); |
| |
| /* Write WAL record */ |
| if (use_wal) |
| { |
| if (buf->curword > 0) |
| _bitmap_log_bitmapwords(rel, buf, |
| first_page_needs_init, perpage_xlrecs, perpage_buffers, |
| lovBuffer, lovOffset, buf->last_tid); |
| else |
| _bitmap_log_bitmap_lastwords(rel, lovBuffer, lovOffset, lovItem); |
| } |
| |
| END_CRIT_SECTION(); |
| |
| if (Debug_bitmap_print_insert) |
| elog(LOG, "Bitmap Insert: write bitmapwords: numwords=%d" |
| ", last_tid=" INT64_FORMAT |
| ", lov_blkno=%d, lov_offset=%d, lovItem->bm_last_setbit=" INT64_FORMAT |
| ", lovItem->bm_last_tid_location=" INT64_FORMAT |
| ", idxrelid=%u", |
| (int) buf->curword, buf->last_tid, |
| BufferGetBlockNumber(lovBuffer), lovOffset, |
| lovItem->bm_last_setbit, lovItem->bm_last_tid_location, |
| RelationGetRelid(rel)); |
| |
| /* release all bitmap buffers. */ |
| foreach(lcb, perpage_buffers) |
| { |
| UnlockReleaseBuffer((Buffer) lfirst_int(lcb)); |
| } |
| } |
| |
| |
| /* |
| * _bitmap_write_bitmapwords_on_page() -- Write an array of bitmap words into |
| * the given bitmap page. Returns the number of words that have been written |
| * in this call. |
| * |
| * The number of bitmap words writing to a given bitmap page is the maximum |
| * number of words that can be appended into the page. |
| */ |
| static uint64 |
| _bitmap_write_bitmapwords_on_page(Page bitmapPage, BMTIDBuffer *buf, int startWordNo, |
| xl_bm_bitmapwords_perpage *xlrec_perpage) |
| { |
| BMBitmapOpaque bitmapPageOpaque = (BMBitmapOpaque)PageGetSpecialPointer(bitmapPage); |
| BMBitmap bitmap = (BMBitmap) PageGetContentsMaxAligned(bitmapPage); |
| uint64 cwords; |
| uint64 words_written; |
| uint64 start_hword_no, end_hword_no; |
| uint64 final_start_hword_no, final_end_hword_no; |
| BM_HRL_WORD hwords[BM_MAX_NUM_OF_HEADER_WORDS + 1]; |
| uint64 num_hwords; |
| uint32 start_hword_bit, end_hword_bit, final_start_hword_bit; |
| |
| cwords = bitmapPageOpaque->bm_hrl_words_used; |
| |
| words_written = buf->curword - startWordNo; |
| if (words_written > BM_NUM_OF_HRL_WORDS_PER_PAGE - cwords) |
| words_written = BM_NUM_OF_HRL_WORDS_PER_PAGE - cwords; |
| |
| if (words_written == 0) |
| { |
| /* |
| * This page is full. We still include it in the WAL record, to keep this |
| * case the same as the general case. |
| */ |
| if (xlrec_perpage) |
| { |
| xlrec_perpage->bmp_last_tid = bitmapPageOpaque->bm_last_tid_location; |
| xlrec_perpage->bmp_start_hword_no = 0; |
| xlrec_perpage->bmp_num_hwords = 0; |
| xlrec_perpage->bmp_start_cword_no = cwords; |
| xlrec_perpage->bmp_num_cwords = 0; |
| } |
| return 0; |
| } |
| |
| /* Copy the content words */ |
| memcpy(bitmap->cwords + cwords, |
| buf->cwords + startWordNo, |
| words_written * sizeof(BM_HRL_WORD)); |
| /* and make note of what we changed for the WAL record */ |
| if (xlrec_perpage) |
| { |
| xlrec_perpage->bmp_start_cword_no = cwords; |
| xlrec_perpage->bmp_num_cwords = words_written; |
| } |
| |
| /* |
| * Shift the header words in 'words' to match with the bit positions |
| * in the header words in this page, and then copy them. |
| */ |
| start_hword_no = startWordNo/BM_HRL_WORD_SIZE; |
| end_hword_no = (startWordNo + words_written - 1) / BM_HRL_WORD_SIZE; |
| num_hwords = end_hword_no - start_hword_no + 1; |
| |
| Assert(num_hwords < BM_MAX_NUM_OF_HEADER_WORDS); |
| memcpy(hwords, buf->hwords + start_hword_no, |
| num_hwords * sizeof(BM_HRL_WORD)); |
| hwords[num_hwords] = 0; |
| |
| /* clean up the first and last header words */ |
| start_hword_bit = startWordNo % BM_HRL_WORD_SIZE; |
| end_hword_bit = (startWordNo + words_written - 1) % BM_HRL_WORD_SIZE; |
| |
| hwords[0] = ((BM_HRL_WORD)(hwords[0] << start_hword_bit)) >> |
| start_hword_bit; |
| hwords[num_hwords - 1] = (hwords[num_hwords - 1] >> |
| (BM_HRL_WORD_SIZE - end_hword_bit - 1)) << |
| (BM_HRL_WORD_SIZE - end_hword_bit - 1); |
| |
| final_start_hword_bit = cwords % BM_HRL_WORD_SIZE; |
| |
| if (final_start_hword_bit > start_hword_bit) |
| { |
| /* right-shift 'final-start_hword_bit - start_hword_bit' */ |
| rshift_header_bits(hwords, num_hwords + 1, |
| final_start_hword_bit - start_hword_bit); |
| } |
| |
| else if (final_start_hword_bit < start_hword_bit) |
| { |
| /* left-shift 'start_hword_bit - final_start_hword_bit' */ |
| lshift_header_bits(hwords, num_hwords, |
| start_hword_bit - final_start_hword_bit); |
| } |
| |
| /* copy the header bits */ |
| final_start_hword_no = cwords / BM_HRL_WORD_SIZE; |
| final_end_hword_no = (cwords + words_written - 1) / BM_HRL_WORD_SIZE; |
| |
| bitmap->hwords[final_start_hword_no] |= hwords[0]; |
| memcpy(bitmap->hwords + (final_start_hword_no + 1), |
| hwords + 1, |
| (final_end_hword_no - final_start_hword_no) * |
| sizeof(BM_HRL_WORD)); |
| /* and make note of what we changed for the WAL record */ |
| if (xlrec_perpage) |
| { |
| xlrec_perpage->bmp_start_hword_no = final_start_hword_no; |
| xlrec_perpage->bmp_num_hwords = (final_end_hword_no + 1) - final_start_hword_no; |
| } |
| |
| bitmapPageOpaque->bm_hrl_words_used += words_written; |
| |
| if (Debug_bitmap_print_insert) |
| { |
| elog(LOG, "Bitmap Insert: insert bitmapwords: " |
| ", old bm_last_tid_location=" INT64_FORMAT |
| ", new bm_last_tid_location=" INT64_FORMAT |
| ", first last_tid=" INT64_FORMAT |
| ", last_compword=" INT64_FORMAT |
| ", is_last_compword_fill=%s" |
| ", last_word=" INT64_FORMAT |
| ", last_setbit=" INT64_FORMAT, |
| bitmapPageOpaque->bm_last_tid_location, |
| buf->last_tids[startWordNo + words_written-1], |
| buf->last_tids[startWordNo], |
| buf->last_compword, |
| (buf->is_last_compword_fill ? "yes" : "no"), |
| buf->last_word, |
| buf->last_tid); |
| } |
| |
| Assert(bitmapPageOpaque->bm_last_tid_location <= |
| buf->last_tids[startWordNo + words_written-1]); |
| |
| bitmapPageOpaque->bm_last_tid_location = |
| buf->last_tids[startWordNo + words_written-1]; |
| if (xlrec_perpage) |
| xlrec_perpage->bmp_last_tid = bitmapPageOpaque->bm_last_tid_location; |
| |
| return words_written; |
| } |
| |
| /* |
| * create_lovitem() -- create a new LOV item. |
| * |
| * Create a new LOV item and append this item into the last LOV page. |
| * Each LOV item is associated with one distinct value for attributes |
| * to be indexed. This function also inserts this distinct value along |
| * with this new LOV item's block number and offsetnumber into the |
| * auxiliary heap and its b-tree of this bitmap index. |
| * |
| * This function returns the block number and offset number of this |
| * new LOV item. |
| * |
| * The caller should have an exclusive lock on metabuf. |
| */ |
| static void |
| create_lovitem(Relation rel, Buffer metabuf, uint64 tidnum, |
| TupleDesc tupDesc, Datum *attdata, bool *nulls, |
| Relation lovHeap, Relation lovIndex, BlockNumber *lovBlockP, |
| OffsetNumber *lovOffsetP, bool use_wal) |
| { |
| BMMetaPage metapage; |
| Buffer currLovBuffer; |
| Page currLovPage; |
| Datum* lovDatum; |
| bool* lovNulls; |
| OffsetNumber itemSize; |
| BMLOVItem lovitem; |
| int numOfAttrs; |
| bool is_new_lov_blkno = false; |
| |
| numOfAttrs = tupDesc->natts; |
| |
| /* Get the last LOV page. Meta page should be locked. */ |
| metapage = _bitmap_get_metapage_data(rel, metabuf); |
| *lovBlockP = metapage->bm_lov_lastpage; |
| |
| currLovBuffer = _bitmap_getbuf(rel, *lovBlockP, BM_WRITE); |
| currLovPage = BufferGetPage(currLovBuffer); |
| |
| lovitem = _bitmap_formitem(tidnum); |
| |
| *lovOffsetP = OffsetNumberNext(PageGetMaxOffsetNumber(currLovPage)); |
| itemSize = sizeof(BMLOVItemData); |
| |
| /* |
| * If there is not enough space in the last LOV page for |
| * a new item, create a new LOV page, and update the metapage. |
| */ |
| if (itemSize > PageGetFreeSpace(currLovPage)) |
| { |
| Buffer newLovBuffer; |
| |
| /* create a new LOV page */ |
| newLovBuffer = _bitmap_getbuf(rel, P_NEW, BM_WRITE); |
| _bitmap_init_lovpage(rel, newLovBuffer); |
| |
| _bitmap_relbuf(currLovBuffer); |
| |
| currLovBuffer = newLovBuffer; |
| currLovPage = BufferGetPage(currLovBuffer); |
| |
| is_new_lov_blkno = true; |
| } |
| |
| /* First create the LOV item. */ |
| START_CRIT_SECTION(); |
| |
| if (is_new_lov_blkno) |
| { |
| MarkBufferDirty(metabuf); |
| |
| metapage->bm_lov_lastpage = BufferGetBlockNumber(currLovBuffer); |
| |
| *lovOffsetP = OffsetNumberNext(PageGetMaxOffsetNumber(currLovPage)); |
| *lovBlockP = BufferGetBlockNumber(currLovBuffer); |
| } |
| |
| MarkBufferDirty(currLovBuffer); |
| |
| if (PageAddItem(currLovPage, (Item)lovitem, itemSize, *lovOffsetP, |
| false, false) == InvalidOffsetNumber) |
| ereport(ERROR, |
| (errcode(ERRCODE_INTERNAL_ERROR), |
| errmsg("failed to add LOV item in bitmap index \"%s\"" |
| " (relfilenode %u/%u/%u, LOV block %d, LOV offset %d)", |
| RelationGetRelationName(rel), |
| rel->rd_locator.spcOid, rel->rd_locator.spcOid, rel->rd_locator.spcOid, |
| *lovBlockP, *lovOffsetP))); |
| |
| if(use_wal) |
| _bitmap_log_lovitem(rel, MAIN_FORKNUM, currLovBuffer, *lovOffsetP, lovitem, |
| metabuf, is_new_lov_blkno); |
| |
| END_CRIT_SECTION(); |
| |
| /* |
| * .. and then create the entry in the auxiliary LOV heap and index for it. |
| * |
| * This could still fail for various reasons, e.g. if you run out of disk |
| * space. In that case, we'll leave behind an "orphan" LOV item, with no |
| * corresponding item in the LOV heap. That's a bit sloppy and leaky, but |
| * harmless; the orphaned LOV item won't be encountered by any scans. |
| */ |
| lovDatum = palloc0((numOfAttrs + 2) * sizeof(Datum)); |
| lovNulls = palloc0((numOfAttrs + 2) * sizeof(bool)); |
| memcpy(lovDatum, attdata, numOfAttrs * sizeof(Datum)); |
| memcpy(lovNulls, nulls, numOfAttrs * sizeof(bool)); |
| lovDatum[numOfAttrs] = Int32GetDatum(*lovBlockP); |
| lovNulls[numOfAttrs] = false; |
| lovDatum[numOfAttrs + 1] = Int16GetDatum(*lovOffsetP); |
| lovNulls[numOfAttrs + 1] = false; |
| |
| _bitmap_insert_lov(lovHeap, lovIndex, lovDatum, lovNulls, use_wal); |
| |
| _bitmap_relbuf(currLovBuffer); |
| |
| if (Debug_bitmap_print_insert) |
| elog(LOG, "Bitmap Insert: create a lov item: " |
| "lovBlock=%d, lovOffset=%d, is_new_lovblock=%d, idxrelid=%u", |
| *lovBlockP, *lovOffsetP, is_new_lov_blkno, RelationGetRelid(rel)); |
| |
| pfree(lovitem); |
| pfree(lovDatum); |
| pfree(lovNulls); |
| } |
| |
| /* |
| * When building an index we try and buffer calls to write tids to disk |
| * as it will result in lots of I/Os. |
| */ |
| |
| static void |
| buf_add_tid(Relation rel, BMTidBuildBuf *tids, uint64 tidnum, |
| BMBuildState *state, BlockNumber lov_block, OffsetNumber off) |
| { |
| BMTIDBuffer *buf; |
| BMTIDLOVBuffer *lov_buf = NULL; |
| |
| /* If we surpass maintenance_work_mem, free some space from the buffer */ |
| if (tids->byte_size >= maintenance_work_mem * 1024L) |
| buf_make_space(rel, tids, state->use_wal); |
| |
| /* |
| * tids is lazily initialized. If we do not have a current LOV block |
| * buffer, initialize one. |
| */ |
| if (!BlockNumberIsValid(tids->max_lov_block) || |
| tids->max_lov_block < lov_block) |
| { |
| /* |
| * XXX: We're currently not including the size of this data structure |
| * in out byte_size count... should we? |
| */ |
| lov_buf = palloc(sizeof(BMTIDLOVBuffer)); |
| lov_buf->lov_block = lov_block; |
| MemSet(lov_buf->bufs, 0, BM_MAX_LOVITEMS_PER_PAGE * sizeof(BMTIDBuffer *)); |
| tids->max_lov_block = lov_block; |
| |
| /* |
| * Add the new LOV buffer to the list head. It seems reasonable that |
| * future calls to this function will want this lov_block rather than |
| * older lov_blocks. |
| */ |
| tids->lov_blocks = lcons(lov_buf, tids->lov_blocks); |
| } |
| else |
| { |
| ListCell *cell; |
| |
| foreach(cell, tids->lov_blocks) |
| { |
| BMTIDLOVBuffer *tmp = lfirst(cell); |
| if(tmp->lov_block == lov_block) |
| { |
| lov_buf = tmp; |
| break; |
| } |
| } |
| } |
| |
| Assert(lov_buf); |
| Assert(off - 1 < BM_MAX_LOVITEMS_PER_PAGE); |
| |
| if (lov_buf->bufs[off - 1]) |
| { |
| |
| buf = lov_buf->bufs[off - 1]; |
| |
| Buffer lovbuf = _bitmap_getbuf(rel, lov_block, BM_WRITE); |
| |
| if (tidnum < buf->last_tid) |
| { |
| /* |
| * Usually, tidnum is greater than lovItem->bm_last_setbit. |
| * However, if we build bitmap index on a heap table, there could |
| * have HOT-chain, and it'll return the root tuple's tid, which could |
| * lead to the TIDs we scanned are not in order, so we need to scan |
| * through the bitmap vector, and update the bit in tidnum directly. |
| */ |
| _bitmap_write_new_bitmapwords(rel, lovbuf, off, buf, state->use_wal); |
| _bitmap_free_tidbuf(buf); |
| |
| updatesetbit(rel, lovbuf, off, tidnum, state->use_wal); |
| } |
| else |
| buf_add_tid_with_fill(rel, buf, lovbuf, off, tidnum, state->use_wal); |
| |
| _bitmap_relbuf(lovbuf); |
| } |
| else |
| { |
| /* no pre-existing buffer found, create a new one */ |
| Buffer lovbuf; |
| Page page; |
| BMLOVItem lovitem; |
| uint16 bytes_added; |
| |
| buf = (BMTIDBuffer *)palloc0(sizeof(BMTIDBuffer)); |
| |
| lovbuf = _bitmap_getbuf(rel, lov_block, BM_WRITE); |
| page = BufferGetPage(lovbuf); |
| lovitem = (BMLOVItem)PageGetItem(page, PageGetItemId(page, off)); |
| |
| buf->last_tid = lovitem->bm_last_setbit; |
| buf->last_compword = lovitem->bm_last_compword; |
| buf->last_word = lovitem->bm_last_word; |
| buf->is_last_compword_fill = (lovitem->lov_words_header == 2); |
| |
| MemSet(buf->hwords, 0, BM_NUM_OF_HEADER_WORDS * sizeof(BM_HRL_WORD)); |
| |
| bytes_added = buf_extend(buf); |
| |
| buf->curword = 0; |
| |
| buf_add_tid_with_fill(rel, buf, lovbuf, off, tidnum, state->use_wal); |
| |
| _bitmap_relbuf(lovbuf); |
| |
| lov_buf->bufs[off - 1] = buf; |
| tids->byte_size += bytes_added; |
| } |
| } |
| |
| /* |
| * buf_add_tid_with_fill() -- Worker for buf_add_tid(). |
| * |
| * Return how many bytes are used. Since we move words to disk when |
| * there is no space left for new header words, this returning number |
| * can be negative. |
| */ |
| static int16 |
| buf_add_tid_with_fill(Relation rel, BMTIDBuffer *buf, |
| Buffer lovBuffer, OffsetNumber off, |
| uint64 tidnum, bool use_wal) |
| { |
| int64 zeros; |
| uint16 inserting_pos; |
| int16 bytes_used = 0; |
| |
| /* |
| * Compute how many zeros between this set bit and the last inserted |
| * set bit. |
| */ |
| zeros = tidnum - buf->last_tid - 1; |
| |
| if (zeros > 0) |
| { |
| uint64 zerosNeeded; |
| uint64 numOfTotalFillWords; |
| |
| /* |
| * Calculate how many bits are needed to fill up the existing last |
| * bitmap word. |
| */ |
| zerosNeeded = |
| BM_HRL_WORD_SIZE - ((buf->last_tid - 1) % BM_HRL_WORD_SIZE) - 1; |
| |
| if (zerosNeeded > 0 && zeros >= zerosNeeded) |
| { |
| /* |
| * The last bitmap word is complete now. We merge it with the |
| * last bitmap complete word. |
| */ |
| bytes_used -= |
| buf_ensure_head_space(rel, buf, lovBuffer, off, use_wal); |
| |
| bytes_used += mergewords(buf, false); |
| zeros -= zerosNeeded; |
| } |
| |
| /* |
| * If the remaining zeros are more than BM_HRL_WORD_SIZE, |
| * We construct the last bitmap word to be a fill word, and merge it |
| * with the last complete bitmap word. |
| */ |
| numOfTotalFillWords = zeros/BM_HRL_WORD_SIZE; |
| |
| while (numOfTotalFillWords > 0) |
| { |
| BM_HRL_WORD numOfFillWords; |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| if (numOfTotalFillWords >= MAX_FILL_LENGTH) |
| numOfFillWords = MAX_FILL_LENGTH; |
| else |
| numOfFillWords = numOfTotalFillWords; |
| |
| buf->last_word = BM_MAKE_FILL_WORD(0, numOfFillWords); |
| |
| bytes_used -= |
| buf_ensure_head_space(rel, buf, lovBuffer, off, use_wal); |
| bytes_used += mergewords(buf, true); |
| |
| numOfTotalFillWords -= numOfFillWords; |
| zeros -= ((uint64)numOfFillWords * BM_HRL_WORD_SIZE); |
| } |
| } |
| |
| Assert((zeros >= 0) && (zeros<BM_HRL_WORD_SIZE)); |
| |
| inserting_pos = (tidnum-1)%BM_HRL_WORD_SIZE; |
| buf->last_word |= (((BM_HRL_WORD)1) << inserting_pos); |
| |
| if (tidnum % BM_HRL_WORD_SIZE == 0) |
| { |
| bool lastWordFill = false; |
| |
| if (buf->last_word == LITERAL_ALL_ZERO) |
| { |
| buf->last_word = BM_MAKE_FILL_WORD(0, 1); |
| lastWordFill = true; |
| } |
| |
| else if (buf->last_word == LITERAL_ALL_ONE) |
| { |
| buf->last_word = BM_MAKE_FILL_WORD(1, 1); |
| lastWordFill = true; |
| } |
| |
| bytes_used -= |
| buf_ensure_head_space(rel, buf, lovBuffer, off, use_wal); |
| bytes_used += mergewords(buf, lastWordFill); |
| } |
| |
| buf->last_tid = tidnum; |
| |
| return bytes_used; |
| } |
| |
| /* |
| * buf_ensure_head_space() -- If there is no space in the header words, |
| * move words in the given buffer to disk and free the existing space, |
| * and then allocate new space for future new words. |
| * |
| * The number of bytes freed are returned. |
| */ |
| static uint16 |
| buf_ensure_head_space(Relation rel, BMTIDBuffer *buf, |
| Buffer lovBuffer, OffsetNumber off, bool use_wal) |
| { |
| uint16 bytes_freed = 0; |
| |
| if (buf->curword >= (BM_NUM_OF_HEADER_WORDS * BM_HRL_WORD_SIZE)) |
| { |
| bytes_freed = buf_free_mem_block(rel, buf, lovBuffer, off, use_wal); |
| bytes_freed -= buf_extend(buf); |
| } |
| |
| return bytes_freed; |
| } |
| |
| /* |
| * buf_extend() -- Enlarge the memory allocated to a buffer. |
| * Return how many bytes are added to the buffer. |
| */ |
| static uint16 |
| buf_extend(BMTIDBuffer *buf) |
| { |
| uint16 bytes; |
| uint16 size; |
| |
| if (buf->num_cwords > 0 && buf->curword < buf->num_cwords - 1) |
| return 0; /* already large enough */ |
| |
| if(buf->num_cwords == 0) |
| { |
| size = BUF_INIT_WORDS; |
| buf->cwords = (BM_HRL_WORD *) |
| palloc0(BUF_INIT_WORDS * sizeof(BM_HRL_WORD)); |
| buf->last_tids = (uint64 *)palloc0(BUF_INIT_WORDS * sizeof(uint64)); |
| bytes = BUF_INIT_WORDS * sizeof(BM_HRL_WORD) + |
| BUF_INIT_WORDS * sizeof(uint64); |
| } |
| else |
| { |
| size = buf->num_cwords; |
| buf->cwords = repalloc(buf->cwords, 2 * size * sizeof(BM_HRL_WORD)); |
| MemSet(buf->cwords + size, 0, size * sizeof(BM_HRL_WORD)); |
| buf->last_tids = repalloc(buf->last_tids, 2 * size * sizeof(uint64)); |
| MemSet(buf->last_tids + size, 0, size * sizeof(uint64)); |
| bytes = 2 * size * sizeof(BM_HRL_WORD) + |
| 2 * size * sizeof(uint64); |
| } |
| buf->num_cwords += size; |
| return bytes; |
| } |
| |
| /* |
| * Spill some HRL compressed tids to disk |
| * |
| * Use buf_free_mem_block if the caller already holds a BM_WRITE lock |
| * on lovbuf (Buffer item). Else, buf_free_mem will get and lock lovBuffer |
| * by its block number, internally call buf_free_mem_block and finally |
| * release lovBuffer. |
| */ |
| |
| static uint16 |
| buf_free_mem(Relation rel, BMTIDBuffer *buf, BlockNumber lov_block, |
| OffsetNumber off, bool use_wal) |
| { |
| Buffer lovbuf; |
| uint16 bytes_freed; |
| |
| /* already done */ |
| if (buf->num_cwords == 0) |
| return 0; |
| |
| lovbuf = _bitmap_getbuf(rel, lov_block, BM_WRITE); |
| |
| bytes_freed = buf_free_mem_block(rel, buf, lovbuf, off, use_wal); |
| |
| _bitmap_relbuf(lovbuf); |
| |
| return bytes_freed; |
| } |
| |
| static uint16 |
| buf_free_mem_block(Relation rel, BMTIDBuffer *buf, Buffer lovbuf, |
| OffsetNumber off, bool use_wal) |
| { |
| /* already done */ |
| if (buf->num_cwords == 0) |
| return 0; |
| |
| _bitmap_write_new_bitmapwords(rel, lovbuf, off, buf, use_wal); |
| |
| return _bitmap_free_tidbuf(buf); |
| } |
| |
| /* |
| * Spill some data out of the buffer to free up space. |
| */ |
| static void |
| buf_make_space(Relation rel, BMTidBuildBuf *locbuf, bool use_wal) |
| { |
| ListCell *cell; |
| |
| /* |
| * Now, we could just pull the head of lov_blocks but there'd be no |
| * guarantee that we'd free up enough space. |
| */ |
| foreach(cell, locbuf->lov_blocks) |
| { |
| int i; |
| BMTIDLOVBuffer *lov_buf = (BMTIDLOVBuffer *)lfirst(cell); |
| BlockNumber lov_block = lov_buf->lov_block; |
| |
| for (i = 0; i < BM_MAX_LOVITEMS_PER_PAGE; i++) |
| { |
| BMTIDBuffer *buf = (BMTIDBuffer *)lov_buf->bufs[i]; |
| OffsetNumber off; |
| |
| /* return if we've freed enough space */ |
| #ifdef NOT_USED |
| if (locbuf->byte_size < (maintenance_work_mem * 1024L)) |
| return; |
| #endif |
| if (!buf || buf->num_cwords == 0) |
| continue; |
| |
| off = i + 1; |
| locbuf->byte_size -= buf_free_mem(rel, buf, lov_block, off, use_wal); |
| |
| if (QueryCancelPending || ProcDiePending) |
| return; |
| } |
| #ifdef NOT_USED |
| if (locbuf->byte_size < (maintenance_work_mem * 1024L)) |
| return; |
| #endif |
| } |
| } |
| |
| /* |
| * _bitmap_free_tidbuf() -- release the space. |
| */ |
| static uint16 |
| _bitmap_free_tidbuf(BMTIDBuffer* buf) |
| { |
| uint16 bytes_freed = 0; |
| |
| if (buf->last_tids) |
| pfree(buf->last_tids); |
| if (buf->cwords) |
| pfree(buf->cwords); |
| |
| bytes_freed = buf->num_cwords * sizeof(BM_HRL_WORD) + |
| buf->num_cwords * sizeof(uint64); |
| |
| buf->num_cwords = 0; |
| buf->curword = 0; |
| /* Paranoia */ |
| MemSet(buf->hwords, 0, sizeof(BM_HRL_WORD) * BM_NUM_OF_HEADER_WORDS); |
| |
| return bytes_freed; |
| } |
| |
| /* |
| * insertsetbit() -- insert a given set bit into a bitmap |
| * specified by lovBlock and lovOffset. |
| */ |
| static void |
| insertsetbit(Relation rel, BlockNumber lovBlock, OffsetNumber lovOffset, |
| uint64 tidnum, BMTIDBuffer *buf, bool use_wal) |
| { |
| Buffer lovBuffer; |
| Page lovPage; |
| BMLOVItem lovItem; |
| |
| lovBuffer = _bitmap_getbuf(rel, lovBlock, BM_WRITE); |
| lovPage = BufferGetPage(lovBuffer); |
| lovItem = (BMLOVItem) PageGetItem(lovPage, |
| PageGetItemId(lovPage, lovOffset)); |
| |
| buf->last_compword = lovItem->bm_last_compword; |
| buf->last_word = lovItem->bm_last_word; |
| buf->is_last_compword_fill = (lovItem->lov_words_header >= 2); |
| buf->last_tid = lovItem->bm_last_setbit; |
| MemSet(buf->hwords, 0, BM_NUM_OF_HEADER_WORDS * sizeof(BM_HRL_WORD)); |
| if (buf->cwords) |
| { |
| MemSet(buf->cwords, 0, |
| buf->num_cwords * sizeof(BM_HRL_WORD)); |
| } |
| if (buf->last_tids) |
| MemSet(buf->last_tids, 0, |
| buf->num_cwords * sizeof(uint64)); |
| buf->curword = 0; |
| |
| /* |
| * Usually, tidnum is greater than lovItem->bm_last_setbit. |
| * However, if this is not the case, this should be called while |
| * doing 'vacuum full' or doing insertion after 'vacuum'. In this |
| * case, we try to update this bit in the corresponding bitmap |
| * vector. |
| */ |
| if (tidnum <= lovItem->bm_last_setbit) |
| { |
| /* |
| * Scan through the bitmap vector, and update the bit in |
| * tidnum. |
| */ |
| updatesetbit(rel, lovBuffer, lovOffset, tidnum, use_wal); |
| |
| _bitmap_relbuf(lovBuffer); |
| return; |
| } |
| |
| /* |
| * To insert this new set bit, we also need to add all zeros between |
| * this set bit and last set bit. We construct all new words here. |
| */ |
| buf_add_tid_with_fill(rel, buf, lovBuffer, lovOffset, tidnum, use_wal); |
| |
| /* |
| * If there are only updates to the last bitmap complete word and |
| * last bitmp word, we simply needs to update the lov buffer. |
| */ |
| if (buf->num_cwords == 0) |
| { |
| START_CRIT_SECTION(); |
| |
| MarkBufferDirty(lovBuffer); |
| |
| lovItem->bm_last_compword = buf->last_compword; |
| lovItem->bm_last_word = buf->last_word; |
| lovItem->lov_words_header = |
| (buf->is_last_compword_fill) ? 2 : 0; |
| lovItem->bm_last_setbit = tidnum; |
| lovItem->bm_last_tid_location = tidnum - tidnum % BM_HRL_WORD_SIZE; |
| |
| if (use_wal) |
| _bitmap_log_bitmap_lastwords |
| (rel, lovBuffer, lovOffset, lovItem); |
| |
| END_CRIT_SECTION(); |
| |
| if (Debug_bitmap_print_insert) |
| elog(LOG, "Bitmap Insert: insert to last two words" |
| ", bm_last_setbit=" INT64_FORMAT |
| ", bm_last_tid_location=" INT64_FORMAT |
| ", idxrelid=%u", |
| lovItem->bm_last_setbit, |
| lovItem->bm_last_tid_location, |
| RelationGetRelid(rel)); |
| |
| _bitmap_relbuf(lovBuffer); |
| |
| return; |
| } |
| |
| /* |
| * Write bitmap words to bitmap pages. When there are no enough |
| * space for all these bitmap words, new bitmap pages are created. |
| */ |
| _bitmap_write_new_bitmapwords(rel, lovBuffer, lovOffset, |
| buf, use_wal); |
| _bitmap_relbuf(lovBuffer); |
| } |
| |
| /* |
| * _bitmap_write_alltids() -- write all tids in the given buffer into disk. |
| */ |
| void |
| _bitmap_write_alltids(Relation rel, BMTidBuildBuf *tids, |
| bool use_wal) |
| { |
| ListCell *cell; |
| |
| foreach(cell, tids->lov_blocks) |
| { |
| int i; |
| BMTIDLOVBuffer *lov_buf = (BMTIDLOVBuffer *)lfirst(cell); |
| BlockNumber lov_block = lov_buf->lov_block; |
| |
| for (i = 0; i < BM_MAX_LOVITEMS_PER_PAGE; i++) |
| { |
| BMTIDBuffer *buf = (BMTIDBuffer *)lov_buf->bufs[i]; |
| OffsetNumber off; |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| if (!buf || buf->num_cwords == 0) |
| continue; |
| |
| off = i + 1; |
| |
| buf_free_mem(rel, buf, lov_block, off, use_wal); |
| |
| pfree(buf); |
| |
| lov_buf->bufs[i] = NULL; |
| } |
| } |
| list_free_deep(tids->lov_blocks); |
| tids->lov_blocks = NIL; |
| tids->byte_size = 0; |
| } |
| |
| /* |
| * build_inserttuple() -- insert a new tuple into the bitmap index |
| * during the bitmap index construction. |
| * |
| * Each new tuple has an assigned number -- tidnum, called a |
| * tid location, which represents the bit location for this tuple in |
| * a bitmap vector. To speed up the construction, this function does not |
| * write this tid location into its bitmap vector immediately. We maintain |
| * a buffer -- BMTidBuildBuf to keep an array of tid locations |
| * for each distinct attribute value. |
| * |
| * If this insertion causes the buffer to overflow, we write tid locations |
| * for enough distinct values to disk to accommodate this new tuple. |
| */ |
| static void |
| build_inserttuple(Relation rel, uint64 tidnum, |
| ItemPointerData ht_ctid pg_attribute_unused(), TupleDesc tupDesc, |
| Datum *attdata, bool *nulls, BMBuildState *state) |
| { |
| Buffer metabuf; |
| BlockNumber lovBlock; |
| OffsetNumber lovOffset; |
| bool blockNull; |
| bool offsetNull; |
| BMTidBuildBuf *tidLocsBuffer; |
| int attno; |
| bool allNulls = true; |
| BMBuildHashKey *entry; |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| tidLocsBuffer = state->bm_tidLocsBuffer; |
| |
| /* Check if all attributes have value of NULL. */ |
| for (attno = 0; attno < tupDesc->natts; attno++) |
| { |
| if (!nulls[attno]) |
| { |
| allNulls = false; |
| break; |
| } |
| } |
| |
| metabuf = _bitmap_getbuf(rel, BM_METAPAGE, BM_WRITE); |
| |
| /* |
| * if the inserting tuple has the value of NULL, then |
| * the corresponding tid array is the first. |
| */ |
| if (allNulls) |
| { |
| lovBlock = BM_LOV_STARTPAGE; |
| lovOffset = 1; |
| } |
| else |
| { |
| bool found; |
| BMBuildLovData *lov; |
| |
| if (state->lovitem_hash) |
| { |
| BMBuildHashKey toLookup; |
| toLookup.attributeValueArr = attdata; |
| toLookup.isNullArr = nulls; |
| |
| /* look up the hash to see if we can find the lov data that way */ |
| entry = (BMBuildHashKey *)hash_search(state->lovitem_hash, |
| (void*)&toLookup, |
| HASH_ENTER, &found); |
| if (!found) |
| { |
| /* Copy the key values in case someone modifies them */ |
| for(attno = 0; attno < tupDesc->natts; attno++) |
| { |
| Form_pg_attribute at = TupleDescAttr(tupDesc, attno); |
| |
| if (entry->isNullArr[attno]) |
| entry->attributeValueArr[attno] = 0; |
| else |
| entry->attributeValueArr[attno] = datumCopy(entry->attributeValueArr[attno], at->attbyval, |
| at->attlen); |
| } |
| |
| /* |
| * If the inserting tuple has a new value, then we create a new |
| * LOV item. |
| */ |
| create_lovitem(rel, metabuf, tidnum, tupDesc, attdata, |
| nulls, state->bm_lov_heap, state->bm_lov_index, |
| &lovBlock, &lovOffset, state->use_wal); |
| |
| lov = (BMBuildLovData *) (((char*)entry) + state->lovitem_hashKeySize ); |
| lov->lov_block = lovBlock; |
| lov->lov_off = lovOffset; |
| } |
| |
| else |
| { |
| lov = (BMBuildLovData *) (((char*)entry) + state->lovitem_hashKeySize ); |
| lovBlock = lov->lov_block; |
| lovOffset = lov->lov_off; |
| } |
| } |
| |
| else { |
| /* |
| * Search the btree to find the right bitmap vector to append |
| * this bit. Here, we reset the scan key and call index_rescan. |
| */ |
| for (attno = 0; attno<tupDesc->natts; attno++) |
| { |
| ScanKey theScanKey = (ScanKey)(((char*)state->bm_lov_scanKeys) + |
| attno * sizeof(ScanKeyData)); |
| if (nulls[attno]) |
| { |
| theScanKey->sk_flags = SK_ISNULL; |
| theScanKey->sk_argument = attdata[attno]; |
| } |
| else |
| { |
| theScanKey->sk_flags = 0; |
| theScanKey->sk_argument = attdata[attno]; |
| } |
| } |
| |
| index_rescan(state->bm_lov_scanDesc, state->bm_lov_scanKeys, tupDesc->natts, NULL, 0); |
| |
| found = _bitmap_findvalue(state->bm_lov_heap, state->bm_lov_index, |
| state->bm_lov_scanKeys, state->bm_lov_scanDesc, |
| &lovBlock, &blockNull, &lovOffset, &offsetNull); |
| |
| if (!found) |
| { |
| /* |
| * If the inserting tuple has a new value, then we create a new |
| * LOV item. |
| */ |
| create_lovitem(rel, metabuf, tidnum, tupDesc, attdata, |
| nulls, state->bm_lov_heap, state->bm_lov_index, |
| &lovBlock, &lovOffset, state->use_wal); |
| } |
| } |
| } |
| |
| buf_add_tid(rel, tidLocsBuffer, tidnum, state, lovBlock, lovOffset); |
| _bitmap_wrtbuf(metabuf); |
| |
| CHECK_FOR_INTERRUPTS(); |
| } |
| |
| /* |
| * inserttuple() -- insert a new tuple into the bitmap index. |
| * |
| * This function finds the corresponding bitmap vector associated with |
| * the given attribute value, and inserts a set bit into this bitmap |
| * vector. Each distinct attribute value is stored as a LOV item, which |
| * is stored in a list of LOV pages. |
| * |
| * If there is no LOV item associated with the given attribute value, |
| * a new LOV item is created and appended into the last LOV page. |
| * |
| * For support the high-cardinality case for attributes to be indexed, |
| * we also maintain an auxiliary heap and a btree structure for all |
| * the distinct attribute values so that the search for the |
| * corresponding bitmap vector can be done faster. The heap |
| * contains all attributes to be indexed and 2 more attributes -- |
| * the block number of the offset number of the block that stores |
| * the corresponding LOV item. The b-tree index is on this new heap |
| * and the key contains all attributes to be indexed. |
| */ |
| static void |
| inserttuple(Relation rel, Buffer metabuf, uint64 tidnum, |
| ItemPointerData ht_ctid pg_attribute_unused(), TupleDesc tupDesc, Datum *attdata, |
| bool *nulls, Relation lovHeap, Relation lovIndex, ScanKey scanKey, |
| IndexScanDesc scanDesc, bool use_wal) |
| { |
| BlockNumber lovBlock; |
| OffsetNumber lovOffset; |
| bool blockNull, offsetNull; |
| bool allNulls = true; |
| int attno; |
| |
| BMTIDBuffer buf; |
| MemSet(&buf, 0, sizeof(buf)); |
| |
| /* Check if the values of given attributes are all NULL. */ |
| for (attno = 0; attno < tupDesc->natts; attno++) |
| { |
| if (!nulls[attno]) |
| { |
| allNulls = false; |
| break; |
| } |
| } |
| |
| /* |
| * if the inserting tuple has the value NULL, then the LOV item is |
| * the first item in the lovBuffer. |
| */ |
| if (allNulls) |
| { |
| lovBlock = BM_LOV_STARTPAGE; |
| lovOffset = 1; |
| } |
| else |
| { |
| bool res = false; |
| |
| /* |
| * Search through the lov heap and index to find the LOV item which |
| * has the same value as the inserting tuple. If such an item is |
| * not found, then we create a new LOV item, and insert it into the |
| * lov heap and index. |
| */ |
| |
| /* |
| * XXX: We lock the meta page to guarantee that only one writer |
| * will create a new lovItem at once. However, this does not |
| * guard against a race condition where a concurrent writer is |
| * inserting the same key as us. So we do another search with |
| * SnapshotDirty. If such a key is found, we have to wait for |
| * the other guy, and try again. However, this may cause |
| * distributed deadlock (see MPP-3155). The fix is to use |
| * FrozenTransactionId for tuples in the LOV heap so that |
| * all tuples are always visible to any transactions. |
| * |
| * The problem is, locking the metapage is pretty heavy handed |
| * because the read routines need a read lock on it. There are a |
| * few other things we could do instead: use a BM insert lock or |
| * wrap the code below in a PG_TRY and try and catch the unique |
| * constraint violation from the btree code. |
| */ |
| |
| LockBuffer(metabuf, BM_WRITE); |
| |
| res = _bitmap_findvalue(lovHeap, lovIndex, scanKey, scanDesc, &lovBlock, |
| &blockNull, &lovOffset, &offsetNull); |
| |
| if(!res) |
| { |
| create_lovitem(rel, metabuf, tidnum, tupDesc, |
| attdata, nulls, lovHeap, lovIndex, |
| &lovBlock, &lovOffset, use_wal); |
| } |
| LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); |
| } |
| |
| /* |
| * Here, we have found the block number and offset number of the |
| * LOV item that points to the bitmap page, to which we will |
| * append the set bit. |
| */ |
| |
| insertsetbit(rel, lovBlock, lovOffset, tidnum, &buf, use_wal); |
| |
| _bitmap_free_tidbuf(&buf); |
| } |
| |
| /* |
| * _bitmap_buildinsert() -- insert an index tuple during index creation. |
| */ |
| void |
| _bitmap_buildinsert(Relation rel, ItemPointerData ht_ctid, Datum *attdata, |
| bool *nulls, BMBuildState *state) |
| { |
| TupleDesc tupDesc; |
| uint64 tidOffset; |
| |
| tidOffset = BM_IPTR_TO_INT(&ht_ctid); |
| |
| tupDesc = RelationGetDescr(rel); |
| |
| /* insert a new bit into the corresponding bitmap */ |
| build_inserttuple(rel, tidOffset, ht_ctid, |
| tupDesc, attdata, nulls, state); |
| } |
| |
| /* |
| * _bitmap_doinsert() -- insert an index tuple for a given tuple. |
| */ |
| void |
| _bitmap_doinsert(Relation rel, ItemPointerData ht_ctid, Datum *attdata, |
| bool *nulls) |
| { |
| uint64 tidOffset; |
| TupleDesc tupDesc; |
| Buffer metabuf; |
| BMMetaPage metapage; |
| Relation lovHeap, lovIndex; |
| ScanKey scanKeys; |
| IndexScanDesc scanDesc; |
| int attno; |
| |
| tupDesc = RelationGetDescr(rel); |
| if (tupDesc->natts <= 0) |
| return ; |
| |
| tidOffset = BM_IPTR_TO_INT(&ht_ctid); |
| |
| /* insert a new bit into the corresponding bitmap using the HRL scheme */ |
| metabuf = _bitmap_getbuf(rel, BM_METAPAGE, BM_READ); |
| metapage = _bitmap_get_metapage_data(rel, metabuf); |
| _bitmap_open_lov_heapandindex(rel, metapage, &lovHeap, &lovIndex, |
| RowExclusiveLock); |
| |
| LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); |
| |
| scanKeys = (ScanKey) palloc0(tupDesc->natts * sizeof(ScanKeyData)); |
| |
| for (attno = 0; attno < tupDesc->natts; attno++) |
| { |
| Oid eq_opr; |
| RegProcedure opfuncid; |
| ScanKey scanKey; |
| |
| get_sort_group_operators(TupleDescAttr(tupDesc, attno)->atttypid, |
| false, true, false, |
| NULL, &eq_opr, NULL, NULL); |
| opfuncid = get_opcode(eq_opr); |
| |
| scanKey = (ScanKey) (((char *)scanKeys) + attno * sizeof(ScanKeyData)); |
| |
| ScanKeyEntryInitialize(scanKey, |
| nulls[attno] ? SK_ISNULL : 0, |
| attno + 1, |
| BTEqualStrategyNumber, |
| InvalidOid, |
| lovIndex->rd_indcollation[attno], |
| opfuncid, |
| attdata[attno]); |
| } |
| |
| scanDesc = index_beginscan(lovHeap, lovIndex, GetActiveSnapshot(), |
| tupDesc->natts, 0); |
| index_rescan(scanDesc, scanKeys, tupDesc->natts, NULL, 0); |
| |
| /* insert this new tuple into the bitmap index. */ |
| inserttuple(rel, metabuf, tidOffset, ht_ctid, tupDesc, attdata, nulls, |
| lovHeap, lovIndex, scanKeys, scanDesc, RelationNeedsWAL(rel)); |
| |
| index_endscan(scanDesc); |
| _bitmap_close_lov_heapandindex(lovHeap, lovIndex, RowExclusiveLock); |
| |
| ReleaseBuffer(metabuf); |
| pfree(scanKeys); |
| } |