| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| /* |
| * tuplesort_mkheap.c |
| * Multi level key heap. |
| * |
| */ |
| #include "postgres.h" |
| #include "utils/tuplesort.h" |
| #include "utils/tuplesort_mk.h" |
| #include "utils/datum.h" |
| #include "utils/builtins.h" |
| #include "miscadmin.h" |
| |
| #include "cdb/cdbvars.h" |
| |
| /* |
| * Multi-Key Heap. |
| * |
| * This is a heap, plus an array. The array (called lvtops) holds the first N keys of the top heap element. |
| * N is the entry's level |
| * For other entries, their level indicates for how many of those N keys they match the top heap element |
| * |
| * For example, a heap with 3 multi-key entries: |
| * ['bob','barker','impostor'] <== this is the top entry of the heap |
| * ['bob','barker','price is right'] |
| * ['bob','evans', 'sausage'] |
| * |
| * So, our lvtops could have: |
| * ['bob','barker'] |
| * |
| * And our entries would have the levels: |
| * 2: top entry of the heap so indicates # of values in lvtops |
| * 2: matches the top entry/lvtops on the first two keys ('bob' and 'barker') |
| * 1: matches the top entry/lvtops on first key ('bob') |
| * |
| * Note that lvtops could also have the following, with the given entries |
| * ['bob','barker','impostor'] |
| * 3: top entry of the heap so indicates # of values in lvtops |
| * 2: matches the top entry/lvtops on the first two keys ('bob' and 'barker') |
| * 1: matches the top entry/lvtops on first key ('bob') |
| * |
| * Therefore, level tells you something useful about the relationship between values. |
| * |
| * Note also that because we CANNOT insert values SMALLER than the top of the heap, |
| * the level of an entry (say, e) never goes down -- because that would require lvtops |
| * values to become smaller which is impossible without adding a smaller value! |
| * |
| * The purpose of this weird heap is that: |
| * 1) we do not do any unnecessary lv expansions -- meaning, many tuples in a multi-key comparison differ |
| * only in first few keys so no need to materialize the later keys and comparable keys. |
| * 2) the compare is cheap because once you've determined the level of a row then you can compare using only level |
| * |
| * Testing note: see attachment on MPP-7231 for some useful testing code |
| * |
| * |
| * |
| * NOTE ON UNIQUENESS CHECKING: |
| * |
| * When required to enforce uniqueness, by necessity we can only do that either when: |
| * a) a value is inserted and we detect that it equals the root then uniqueness is violated |
| * b) at any point, if one of the root's children exactly matches the root then uniqueness is violated |
| * |
| * However, when inserting a value that is greater than the root then we can't check whether it is |
| * equal to some other value in the heap. Therefore, the heap may contain duplicate values at |
| * any point in time. |
| * |
| * For this reason, we do NOT try to check uniqueness when building from array -- it would take time and |
| * we are assuming that duplicates inside the heaps will be later detected by case b) above. |
| */ |
| |
| |
| /* During merge phase, we check for interrupts every COMPARES_BETWEEN_INTERRUPT_CHECKS comparisons */ |
| #define COMPARES_BETWEEN_INTERRUPT_CHECKS 100000 |
| /* Counter to keep track of how many comparisons since last CHECK_FOR_INTERRUPTS */ |
| static uint32 compare_count = 0; |
| |
| static void mkheap_heapify(MKHeap *mkheap, bool doLv0); |
| static bool mke_has_duplicates_with_root(MKHeap *mkheap); |
| |
| static inline int LEFT(int n) |
| { |
| Assert(n>=0); |
| return (n << 1) + 1; |
| } |
| static inline int RIGHT(int n) |
| { |
| Assert(n>=0); |
| return (n << 1) + 2; |
| } |
| static inline int PARENT(int n) |
| { |
| Assert(n > 0); |
| return (n-1) >> 1; |
| } |
| static inline int SIBLING(int n) |
| { |
| Assert(n > 0); |
| return ((n-1) ^ 1) + 1; |
| } |
| static inline bool ISLEFT(int n) |
| { |
| Assert(n > 0); |
| return (n & 1) != 0; |
| } |
| static inline bool ISRIGHT(int n) |
| { |
| Assert(n > 0); |
| return (n & 1) == 0; |
| } |
| |
| /* |
| * Heap compare. |
| * Rule: |
| * 1. Empty entry is always bigger, so empty cells will be at the bottom |
| * 2. A entry is expanded at lv, means 0 - (lv-1) is the same as heaptop. |
| * 3. From 2, deeper level is smaller. |
| * 4. Smaller run is always smaller. Why we do this after 2. and 3.? |
| * When a entry is inserted, we don't know this entry belongs to |
| * current run on the next run yet. |
| * 5. Same run, same level, then we compare the null flag. Pg 8.2 (that is, |
| * what we are using, always use null last.) We do have a null first bit |
| * reserved. |
| * 6. Compare non null datum. |
| * |
| * These rules guarantee the heap top is minimal of current run. However, at |
| * deeper level, mkheap_compare return 0 does not mean the two entry are really |
| * equal. They are just equal up to a certain level. |
| * |
| * Step 1 - 5 are captured in compflags. |
| */ |
| static inline int32 mkheap_compare(MKHeap *heap, MKEntry *a, MKEntry *b) |
| { |
| |
| /* Check for interrupts every COMPARES_BETWEEN_INTERRUPT_CHECKS comparisons */ |
| if (compare_count++ % COMPARES_BETWEEN_INTERRUPT_CHECKS == 0) |
| { |
| CHECK_FOR_INTERRUPTS(); |
| } |
| |
| int32 ret = (a->compflags & (~MKE_CF_NULLBITS)) - |
| (b->compflags & (~MKE_CF_NULLBITS)); |
| |
| if (ret == 0) |
| { |
| int32 limitmask = heap->mkctxt->limitmask; |
| bool needcmp = false; |
| ret = a->compflags - b->compflags; |
| |
| /* need comparison if non-null values compare equal by compflags */ |
| needcmp = (ret == 0) && (!mke_is_null(a)); |
| |
| if (needcmp) |
| { |
| /* two entries were equal according to flags. This means the first lv entries are the same |
| * |
| * So now compare the actual Datums for the next level (which is level at index lv) |
| */ |
| int32 lv = mke_get_lv(a); |
| Assert(lv < heap->mkctxt->total_lv); |
| Assert(lv == mke_get_lv(b)); |
| ret = tupsort_compare_datum(a, b, heap->mkctxt->lvctxt+lv, heap->mkctxt); |
| } |
| |
| /* |
| * limitmask is either 0 or -1. If 0 then ~limitmask is all 1s, else it's all zeroes |
| * so this is equivalanet to uselimit ? -ret : ret |
| * |
| * So this causes a REVERSE sort -- which is what tuplesort_limit_sort wants and because for tuplesort_limit_insert |
| * we actually want to POP the HIGHER values, not the lower ones since we are trying to find the smallest elements |
| * |
| * There's no need to reverse the initial comparison of this function because it is considering only |
| * differences in levels and runs, which already represent the reversed order. |
| * |
| * todo: bug when ret is the min possible int val (so negating it leaves it unchanged). (and earlier comp does not guarantee -1,0,1 as results) |
| * todo: rename limitmask to reverseSortMask... |
| */ |
| ret = (ret & (~limitmask)) + ((-ret) & limitmask); |
| } |
| |
| return ret; |
| } |
| |
| #ifdef USE_ASSERT_CHECKING |
| extern void mkheap_verify_heap(MKHeap *heap, int top); |
| #else |
| #define mkheap_verify_heap(heap, top) |
| #endif |
| |
| /* |
| * Create a multi-key heap from an array of entries |
| * |
| * entries: the values to convert to a heap. This array will be under mkheap's ownership |
| * alloc_sz: the allocation size of entries: that is, how much room the array has. |
| * cnt: the number of elements in entries which should be used to build the heap |
| * mkctxt: description of the heap to build |
| * |
| * If alloc_sz is zero then entries must be NULL |
| */ |
| MKHeap *mkheap_from_array(MKEntry *entries, int alloc_sz, int cnt, MKContext *mkctxt) |
| { |
| MKHeap *heap = (MKHeap *) palloc(sizeof(MKHeap)); |
| |
| Assert(mkctxt); |
| Assert(alloc_sz >= cnt); |
| AssertEquivalent(entries!=NULL, cnt > 0); |
| AssertEquivalent(!entries, cnt == 0); |
| |
| heap->mkctxt = mkctxt; |
| heap->lvtops = palloc0(mkctxt->total_lv * sizeof(MKEntry)); |
| |
| heap->readers = NULL; |
| heap->nreader = 0; |
| |
| AssertImply(alloc_sz == 0, !entries); |
| Assert(cnt >= 0 && cnt <= alloc_sz); |
| |
| heap->p = entries; |
| heap->alloc_size = alloc_sz; |
| heap->count = cnt; |
| heap->maxentry = cnt; |
| |
| #ifdef USE_ASSERT_CHECKING |
| { |
| int i; |
| for(i=0; i<cnt; ++i) |
| { |
| Assert(mke_get_lv(entries+i) == 0); |
| Assert(mke_get_reader(entries+i) == 0); |
| } |
| } |
| #endif |
| |
| /* note: see NOTE ON UNIQUENESS CHECKING at the top of this file for information about why |
| * we don't check uniqueness here */ |
| |
| mk_prepare_array(entries, 0, cnt-1, 0, mkctxt); |
| mkheap_heapify(heap, true); |
| return heap; |
| } |
| |
| /* |
| * Create a mkheap from an array of readers, which are assumed to provide their |
| * values in sorted order. |
| * |
| * Invoking this causes each reader to be invoked once to get the first value. |
| */ |
| MKHeap *mkheap_from_reader(MKHeapReader *readers, int nreader, MKContext *ctxt) |
| { |
| int i, j; |
| MKHeap *heap = (MKHeap *) palloc(sizeof(MKHeap)); |
| |
| Assert(readers && nreader > 0); |
| Assert(ctxt); |
| |
| heap->mkctxt = ctxt; |
| heap->lvtops = palloc0(ctxt->total_lv * sizeof(MKEntry)); |
| heap->readers = readers; |
| heap->nreader = nreader; |
| |
| /* Create initial heap. */ |
| heap->p = palloc0(sizeof(MKEntry) * (nreader+1)); |
| heap->alloc_size = nreader+1; |
| |
| for(i=0, j=0; i<nreader; ++i) |
| { |
| MKEntry* e = heap->p + j; |
| bool fOK = readers[i].reader(readers[i].mkhr_ctxt, e); |
| |
| if(fOK) |
| { |
| mke_set_reader(e, i); |
| ++j; |
| } |
| } |
| |
| heap->count = j; |
| heap->maxentry = j; |
| |
| /* at least one reader had a value so convert the values to the heap */ |
| if (j>0) |
| { |
| mk_prepare_array(heap->p, 0, j-1, 0, ctxt); |
| mkheap_heapify(heap, true); |
| } |
| |
| return heap; |
| } |
| |
| /* |
| * Destroy mkheap. Free entry arrays at each level. |
| * |
| * Does NOT free the entries themselves. |
| */ |
| void mkheap_destroy(MKHeap *mkheap) |
| { |
| if(mkheap->p) |
| pfree(mkheap->p); |
| |
| if(mkheap->lvtops) |
| pfree(mkheap->lvtops); |
| |
| pfree(mkheap); |
| } |
| |
| /* |
| * MKHeap siftdown an entry, starting with a hole. |
| */ |
| static void mkheap_siftdown(MKHeap *heap, int hole, MKEntry *e) |
| { |
| int c; |
| int left; |
| int right; |
| int child; |
| |
| static int roundrobin = 0; |
| |
| Assert(hole >= 0 && hole < heap->maxentry); |
| |
| while(1) |
| { |
| left = LEFT(hole); |
| right = RIGHT(hole); |
| child = left; |
| |
| /* determine into which child to try sifting */ |
| if (right < heap->maxentry) |
| { |
| c = mkheap_compare(heap, heap->p+left, heap->p+right); |
| |
| if (c==0) |
| child = left + ((roundrobin++) & 1); |
| else if(c > 0) |
| child = right; |
| } |
| |
| /* sift there if possible */ |
| if (child < heap->maxentry) |
| { |
| c = mkheap_compare(heap, e, heap->p+child); |
| |
| if(c > 0) |
| { |
| heap->p[hole] = heap->p[child]; |
| hole = child; |
| continue; |
| } |
| } |
| |
| /* We are in right position */ |
| heap->p[hole] = *e; |
| return; |
| } |
| |
| Assert(!"Never here"); |
| } |
| |
| /* |
| * Prepare heap (expand lv) at postion cur. |
| */ |
| static void mkheap_prep_siftdown_lv(MKHeap *mkheap, int lv, int cur, MKContext *mkctxt) |
| { |
| bool expchild = false; |
| |
| int c; |
| |
| Assert(cur < mkheap->maxentry); |
| Assert(cur >= 0); |
| |
| /* If necessary, expand right */ |
| if(RIGHT(cur) < mkheap->maxentry) |
| { |
| c = mkheap_compare(mkheap, mkheap->p+cur, mkheap->p+RIGHT(cur)); |
| if(c==0) |
| { |
| mkheap_prep_siftdown_lv(mkheap, lv, RIGHT(cur), mkctxt); |
| expchild = true; |
| } |
| } |
| |
| /* If necessary, expand left */ |
| if(LEFT(cur) < mkheap->maxentry) |
| { |
| c = mkheap_compare(mkheap, mkheap->p+cur, mkheap->p+LEFT(cur)); |
| if(c==0) |
| { |
| mkheap_prep_siftdown_lv(mkheap, lv, LEFT(cur), mkctxt); |
| expchild = true; |
| } |
| } |
| |
| Assert(mke_get_lv(mkheap->p+cur) == lv-1); |
| if ( mkheap->mkctxt->fetchForPrep) |
| tupsort_prepare(mkheap->p+cur, mkctxt, lv); |
| |
| mke_set_lv(mkheap->p+cur, lv); |
| |
| if(expchild) |
| { |
| MKEntry tmp = mkheap->p[cur]; |
| mkheap_siftdown(mkheap, cur, &tmp); |
| } |
| } |
| |
| /* |
| * Update all lvtops entries. The lvtops entries whose array index is not greater |
| * than the level value stored in the current top element in the heap, update them |
| * with the value from the top element in the heap. This function also cleans up |
| * the remaining lvtops entries. |
| * |
| * This is needed after the current top entry in the heap is removed from the heap, since |
| * some lvtops' ptr may point to the tuple that could be freed when its associated entry |
| * is removed from the heap. |
| */ |
| static void mkheap_update_lvtops(MKHeap *mkheap) |
| { |
| int top_lv = mke_get_lv(mkheap->p); |
| |
| for (int lv = 0; lv < mkheap->mkctxt->total_lv; lv++) |
| { |
| MKEntry *lvEntry = mkheap->lvtops + lv; |
| MKEntry *srcEntry = NULL; |
| |
| if (lv <= top_lv) |
| { |
| srcEntry = mkheap->p; |
| |
| if ( mkheap->mkctxt->fetchForPrep) |
| tupsort_prepare(mkheap->p, mkheap->mkctxt, lv); |
| } |
| |
| (*mkheap->mkctxt->cpfr)(lvEntry, srcEntry, mkheap->mkctxt->lvctxt + lv); |
| |
| /* Set the correct level */ |
| mke_set_lv(lvEntry, lv); |
| } |
| } |
| |
| /** |
| * Save the value for mkheap's current top (at the level entered in the current top) into lvtops. |
| */ |
| static void mkheap_save_lvtop(MKHeap *mkheap) |
| { |
| int lv; |
| MKLvContext *lvctxt; |
| Assert(mkheap->count > 0); |
| |
| lv = mke_get_lv(mkheap->p); |
| lvctxt = mkheap->mkctxt->lvctxt + lv; |
| |
| Assert(lv < mkheap->mkctxt->total_lv); |
| |
| /* Free the old one and copy in the new one */ |
| (*mkheap->mkctxt->cpfr)(mkheap->lvtops + lv, mkheap->p, lvctxt); |
| } |
| |
| static inline bool mkheap_need_heapify(MKHeap *mkheap) |
| { |
| return ( |
| /* More than one entry */ |
| mkheap_cnt(mkheap) > 1 && |
| /* Not fully expanded */ |
| mke_get_lv(mkheap->p) < mkheap->mkctxt->total_lv-1 && |
| /* Cannot determine by comp flag */ |
| ( |
| (1 < mkheap->maxentry && mkheap->p[0].compflags == mkheap->p[1].compflags) |
| || |
| (2 < mkheap->maxentry && mkheap->p[0].compflags == mkheap->p[2].compflags) |
| ) |
| ); |
| } |
| |
| static void mkheap_heapify(MKHeap *mkheap, bool doLv0) |
| { |
| int i; |
| MKEntry tmp; |
| |
| /* Build heap, expanded already at lv 0 */ |
| if(doLv0) |
| { |
| for(i=PARENT(mkheap->maxentry); i >= 0; --i) |
| { |
| tmp = mkheap->p[i]; |
| mkheap_siftdown(mkheap, i, &tmp); |
| } |
| } |
| |
| /* Populate deeper levels */ |
| while(mke_get_lv(mkheap->p) < mkheap->mkctxt->total_lv - 1) |
| { |
| int nlv = mke_get_lv(mkheap->p) + 1; |
| |
| Assert(nlv < mkheap->mkctxt->total_lv); |
| |
| mkheap_save_lvtop(mkheap); |
| mkheap_prep_siftdown_lv(mkheap, nlv, 0, mkheap->mkctxt); |
| } |
| |
| #ifdef USE_ASSERT_CHECKING |
| if(gp_mk_sort_check) |
| mkheap_verify_heap(mkheap, 0); |
| #endif |
| } |
| |
| static bool |
| mke_has_duplicates_with_root(MKHeap *mkheap) |
| { |
| /** since the heap is heapified, only if the level is at the max can |
| * we have a duplicate |
| */ |
| if ( mke_get_lv(mkheap->p) + 1 != mkheap->mkctxt->total_lv ) |
| return false; |
| |
| /* see if we have children which exactly match the root */ |
| if(RIGHT(0) < mkheap->maxentry) |
| { |
| int cmp = mkheap_compare(mkheap, mkheap->p, mkheap->p+RIGHT(0)); |
| if(cmp==0) |
| return true; |
| } |
| |
| if(LEFT(0) < mkheap->maxentry) |
| { |
| int cmp = mkheap_compare(mkheap, mkheap->p, mkheap->p+LEFT(0)); |
| if(cmp==0) |
| return true; |
| } |
| return false; |
| } |
| |
| /* |
| * Insert an entry and perhaps return the top element of the heap in *e |
| * |
| * Comparison happens from the specified level to the end of levels, as needed: |
| * Return < 0 if smaller than heap top; *e is unchanged |
| * Return = 0 if eq to heap top ; *e is unchanged (but will have value equal to the heap top) |
| * Return > 0 if successfully inserted; *e is populated with the removed heap top |
| * |
| * If 0 would be returned but the heap is marked as needing uniqueness enforcement, error is generated instead |
| */ |
| static int mkheap_putAndGet_impl(MKHeap *mkheap, MKEntry *e) |
| { |
| int c = 0; |
| int toplv; |
| MKEntry tmp; |
| |
| /* can't put+get from an empty heap */ |
| Assert(mkheap->count > 0); |
| |
| if ( mkheap->mkctxt->enforceUnique && |
| mke_has_duplicates_with_root(mkheap)) |
| { |
| /** |
| * See NOTE ON UNIQUENESS CHECKING in the comment at the top of the file |
| * for information about why we check for duplicates here |
| */ |
| ERROR_UNIQUENESS_VIOLATED(); |
| } |
| |
| if(mke_is_empty(e)) |
| { |
| /* adding an empty (sentinel): just remove from count and fallthrough to where top is removed */ |
| --mkheap->count; |
| } |
| else if (mke_get_run(e) != mke_get_run(mkheap->p)) |
| { |
| /* this code assumes that the new one, with lower run, is LARGER than the top -- so it must be larger run */ |
| Assert(mke_get_run(e) > mke_get_run(mkheap->p)); |
| |
| /* when the runs differ it is because we attempted once with the runs equal. |
| * So if level is zero then: the level was zero AND validly prepared for the previous run -- and there is no need to prep again |
| */ |
| if ( mke_get_lv(e) != 0 ) |
| { |
| /* Not same run, at least prepare lv 0 */ |
| if ( mkheap->mkctxt->fetchForPrep) |
| tupsort_prepare(e, mkheap->mkctxt, 0); |
| mke_set_lv(e, 0); |
| } |
| |
| /* now fall through and let top be returned, new one is also inserted so no change to count */ |
| } |
| else |
| { |
| /* same run so figure out where it fits in relation to the heap top */ |
| int lv = 0; |
| |
| toplv = mke_get_lv(mkheap->p); |
| mke_set_lv(e, lv); |
| |
| /* populate level until we differ from the top element of the heap */ |
| while (lv < toplv) |
| { |
| if ( mkheap->mkctxt->fetchForPrep) |
| tupsort_prepare(e, mkheap->mkctxt, lv); |
| c = mkheap_compare(mkheap, e, mkheap->lvtops+lv); |
| if(c!=0) |
| break; |
| |
| mke_set_lv(e, ++lv); |
| } |
| |
| /* smaller than top */ |
| if(c<0) |
| return -1; |
| |
| /* we have not done e->lv == toplv yet since we increment at the end of the previous loop. Do it now. */ |
| Assert(mke_get_lv(e) == lv); |
| if(lv == toplv) |
| { |
| if ( mkheap->mkctxt->fetchForPrep) |
| tupsort_prepare(e, mkheap->mkctxt, lv); |
| c = mkheap_compare(mkheap, e, mkheap->p); |
| if(c < 0) |
| return -1; |
| } |
| |
| if(c==0) |
| { |
| /* |
| * Equal and at top level. |
| * |
| * This means that e is less-than/equal to all entries except the heap top. |
| */ |
| Assert(mke_get_lv(e) == lv); |
| Assert(lv == mke_get_lv(mkheap->p)); |
| |
| /* |
| * Expand more levels of lvtop in the current top and the new one until we detect a difference. |
| */ |
| while(lv < mkheap->mkctxt->total_lv - 1) |
| { |
| mkheap_save_lvtop(mkheap); |
| |
| ++lv; |
| |
| /* expand top */ |
| if ( mkheap->mkctxt->fetchForPrep) |
| tupsort_prepare(mkheap->p, mkheap->mkctxt, lv); |
| |
| /* expand new element */ |
| if ( mkheap->mkctxt->fetchForPrep) |
| tupsort_prepare(e, mkheap->mkctxt, lv); |
| |
| mke_set_lv(mkheap->p, lv); |
| mke_set_lv(e, lv); |
| |
| c = mkheap_compare(mkheap, e, mkheap->p); |
| if(c != 0) |
| break; |
| } |
| |
| if(c<=0) |
| { |
| /* if new one is less than current top then we just return that negative comparison */ |
| /* if new one equals the current top then we could do an insert and immediate removal -- but it |
| * won't matter so we simply return right away, leaving *e untouched */ |
| |
| /* enforce uniqueness first */ |
| if(c == 0 && mkheap->mkctxt->enforceUnique) |
| { |
| ERROR_UNIQUENESS_VIOLATED(); |
| } |
| |
| return c; |
| } |
| } |
| } |
| |
| /* Now, I am bigger than top but not definitely smaller/equal to all other entries |
| * |
| * So we will: |
| * return top as *e |
| * do heap shuffling to restore heap ordering |
| */ |
| tmp = *e; |
| *e = mkheap->p[0]; |
| |
| /* Sift down a hole to bottom of (current or next) run, depends on tmp.run */ |
| mkheap_siftdown(mkheap, 0, &tmp); |
| |
| if(mkheap_need_heapify(mkheap)) |
| mkheap_heapify(mkheap, false); |
| |
| if (mkheap->count > 0) |
| { |
| mkheap_update_lvtops(mkheap); |
| } |
| |
| #ifdef USE_ASSERT_CHECKING |
| if(gp_mk_sort_check) |
| mkheap_verify_heap(mkheap, 0); |
| #endif |
| |
| return 1; |
| } |
| |
| /* |
| * Insert with run. Runs provide a way for values SMALLER than |
| * the top element to be inserted -- they get pushed to |
| * the next run and then insert (and since 'run' will be part of |
| * their comparison then they will definitely be inserted). |
| * |
| * If the entry is greater than the top, we can |
| * insert it using the same run. |
| * |
| * If the entry is less than the heap top, we will |
| * put it to the next run, and insert it. |
| * |
| * Both non-equal cases will ends up with the entry |
| * inserted, and return a postive number. |
| * |
| * If the entry equal to the heaptop, we will NOT |
| * insert the element, but return 0. Caller need to |
| * handle this. In case of sort build runs, that means |
| * e can be written to current run immediately. |
| * |
| * Note: run must be passed in as equal to the run of the top element of the heap. |
| * Perhaps change the function interface so this is not required (make the |
| * function itself peek at the top run and return whether or not the top run |
| * of the heap was increased by this action |
| */ |
| int mkheap_putAndGet_run(MKHeap *mkheap, MKEntry *e, int16 run) |
| { |
| int ins; |
| |
| /* try insertion into specified run */ |
| Assert(!mke_is_empty(e)); |
| mke_set_run(e, run); |
| ins = mkheap_putAndGet_impl(mkheap, e); |
| |
| /* success! Greater than or Equal. */ |
| if(ins >= 0) |
| { |
| return ins; |
| } |
| |
| /* failed, put into next run */ |
| mke_set_run(e, run+1); |
| ins = mkheap_putAndGet_impl(mkheap, e); |
| |
| Assert(ins > 0); |
| return ins; |
| } |
| |
| int mkheap_putAndGet_reader(MKHeap *mkheap, MKEntry *out) |
| { |
| int n; |
| bool fOK; |
| int ins; |
| |
| /* First, check to see if the heap is empty */ |
| MKEntry *e = mkheap_peek(mkheap); |
| |
| if(!e) |
| { |
| mke_set_empty(out); |
| return -1; |
| } |
| |
| Assert(!mke_is_empty(e)); |
| |
| /* _reader code is not compatible with the run code */ |
| Assert(mke_get_run(e) == 0); |
| |
| /* Figure out the reader that provided the current top of the heap */ |
| n = mke_get_reader(e); |
| Assert(n>=0 && n<mkheap->nreader); |
| |
| /* Read in a new one that will replenish the current top of the heap */ |
| fOK = mkheap->readers[n].reader(mkheap->readers[n].mkhr_ctxt, out); |
| if(fOK) |
| { |
| mke_set_reader(out, n); |
| } |
| else |
| { |
| mke_set_empty(out); |
| } |
| Assert(mke_get_run(out) == 0); |
| |
| /* now insert it and pop the top of the heap or, if equal to the top, |
| * just don't do the insert |
| * |
| * So, on success (ins >= 0) then *out will refer to the element which |
| * is NOT in the heap (which could be either the result of mkheap_peek |
| * above OR the result of the .reader() call, depending) |
| */ |
| ins = mkheap_putAndGet_impl(mkheap, out); |
| |
| Assert(ins >= 0); |
| Assert(mke_get_reader(out) == n); |
| Assert(!mke_is_empty(out)); |
| |
| return ins; |
| } |
| |
| /* |
| * Get the next value. On success, a non-negative value is returned and *out is populated with the value |
| * that was on the top of the heap. |
| * |
| * if this is an array-backed heap then *out is inserted into the heap. If it's a reader-backed heap then |
| * *out is ignored on input. |
| */ |
| int mkheap_putAndGet(MKHeap *mkheap, MKEntry *out) |
| { |
| int ret = 0; |
| Assert(out); |
| |
| /* |
| * fetch from appropriate source |
| * |
| * note that these two cases don't behave the same in terms of how *out is treated. |
| * mkheap_putAndGet_reader should be called mkheap_get_reader -- it never puts the input value |
| * mkheap_putAndGet_impl will put *out if it's not empty, and then do the get. |
| */ |
| if(mkheap->nreader > 0) |
| ret = mkheap_putAndGet_reader(mkheap, out); |
| else |
| ret = mkheap_putAndGet_impl(mkheap, out); |
| |
| /* check: underlying call must have enforced uniquness */ |
| AssertImply(mkheap->mkctxt->enforceUnique, ret != 0); |
| |
| /* free *out */ |
| if(mkheap->mkctxt->cpfr) |
| (mkheap->mkctxt->cpfr)(out, NULL, mkheap->mkctxt->lvctxt + mke_get_lv(out)); |
| return ret; |
| } |
| |
| /** |
| * Return the top of the heap or NULL if the heap is empty |
| */ |
| MKEntry *mkheap_peek(MKHeap *mkheap) |
| { |
| if(mkheap->count == 0) |
| return NULL; |
| |
| Assert(!mke_is_empty(mkheap->p)); |
| return mkheap->p; |
| } |
| |
| #ifdef USE_ASSERT_CHECKING |
| void mkheap_verify_heap(MKHeap *heap, int top) |
| { |
| int empty_cnt = 0; |
| int i; |
| MKEntry e; |
| |
| if(heap->count == 0) |
| return; |
| |
| Assert(heap->count > 0); |
| Assert(heap->maxentry > 0); |
| Assert(heap->count <= heap->maxentry); |
| Assert(heap->maxentry <= heap->alloc_size); |
| |
| e = heap->p[0]; |
| mke_set_lv(&e, 0); |
| mke_clear_refc_copied(&e); |
| |
| /* Checking for lvtops */ |
| for(i=0; i<mke_get_lv(heap->p); ++i) |
| { |
| int c; |
| |
| /* Too much trouble dealing with ref counters. Just don't */ |
| /* |
| if(!mke_test_flag(heap->lvtops+i, MKE_RefCnt | MKE_Copied)) |
| { |
| */ |
| mke_set_lv(&e, i); |
| if ( heap->mkctxt->fetchForPrep) |
| tupsort_prepare(&e, heap->mkctxt, i); |
| |
| c = mkheap_compare(heap, &e, heap->lvtops+i); |
| Assert(c==0); |
| /* |
| } |
| */ |
| } |
| |
| /* Verify Heap property */ |
| for(i=top; i<heap->maxentry; ++i) |
| { |
| int left = LEFT(i); |
| int right = RIGHT(i); |
| int cl, cr; |
| |
| if(mke_is_empty(heap->p+i)) |
| ++empty_cnt; |
| |
| if(left >= heap->maxentry) |
| continue; |
| |
| cl = mkheap_compare(heap, heap->p+i, heap->p+left); |
| Assert(cl<=0); |
| if(i==0 && cl==0) |
| Assert(mke_get_lv(heap->p) == heap->mkctxt->total_lv-1); |
| |
| if(right >= heap->maxentry) |
| continue; |
| |
| cr = mkheap_compare(heap, heap->p+i, heap->p+right); |
| Assert(cr<=0); |
| if(i==0 && cr==0) |
| Assert(mke_get_lv(heap->p) == heap->mkctxt->total_lv-1); |
| } |
| } |
| |
| #endif |