src/backend/access/nbtree/nbtsearch.c - hawq - Git at Google

 /*-------------------------------------------------------------------------
  *
  * nbtsearch.c
  *	  Search code for postgres btrees.
  *
  *
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
  *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.107.2.1 2007/01/07 01:56:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */

 #include "postgres.h"

 #include "access/genam.h"
 #include "access/nbtree.h"
 #include "access/relscan.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "utils/lsyscache.h"


 static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir,
 			 OffsetNumber offnum);
 static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
 static Buffer _bt_walk_left(Relation rel, Buffer buf);
 static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);


 /*
  *	_bt_search() -- Search the tree for a particular scankey,
  *		or more precisely for the first leaf page it could be on.
  *
  * The passed scankey must be an insertion-type scankey (see nbtree/README),
  * but it can omit the rightmost column(s) of the index.
  *
  * When nextkey is false (the usual case), we are looking for the first
  * item >= scankey.  When nextkey is true, we are looking for the first
  * item strictly greater than scankey.
  *
  * Return value is a stack of parent-page pointers.  *bufP is set to the
  * address of the leaf-page buffer, which is read-locked and pinned.
  * No locks are held on the parent pages, however!
  *
  * NOTE that the returned buffer is read-locked regardless of the access
  * parameter.  However, access = BT_WRITE will allow an empty root page
  * to be created and returned.	When access = BT_READ, an empty index
  * will result in *bufP being set to InvalidBuffer.
  */
 BTStack
 _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
 		   Buffer *bufP, int access)
 {
 	BTStack		stack_in = NULL;

 	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

 	/* Get the root page to start with */
 	*bufP = _bt_getroot(rel, access);

 	/* If index is empty and access = BT_READ, no root page is created. */
 	if (!BufferIsValid(*bufP))
 		return (BTStack) NULL;

 	/* Loop iterates once per level descended in the tree */
 	for (;;)
 	{
 		Page		page;
 		BTPageOpaque opaque;
 		OffsetNumber offnum;
 		ItemId		itemid;
 		IndexTuple	itup;
 		BlockNumber blkno;
 		BlockNumber par_blkno;
 		BTStack		new_stack;

 		/*
 		 * Race -- the page we just grabbed may have split since we read its
 		 * pointer in the parent (or metapage).  If it has, we may need to
 		 * move right to its new sibling.  Do that.
 		 */
 		*bufP = _bt_moveright(rel, *bufP, keysz, scankey, nextkey, BT_READ);

 		/* if this is a leaf page, we're done */
 		page = BufferGetPage(*bufP);
 		opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 		if (P_ISLEAF(opaque))
 			break;

 		/*
 		 * Find the appropriate item on the internal page, and get the child
 		 * page that it points to.
 		 */
 		offnum = _bt_binsrch(rel, *bufP, keysz, scankey, nextkey);
 		itemid = PageGetItemId(page, offnum);
 		itup = (IndexTuple) PageGetItem(page, itemid);
 		blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
 		par_blkno = BufferGetBlockNumber(*bufP);

 		/*
 		 * We need to save the location of the index entry we chose in the
 		 * parent page on a stack. In case we split the tree, we'll use the
 		 * stack to work back up to the parent page.  We also save the actual
 		 * downlink (TID) to uniquely identify the index entry, in case it
 		 * moves right while we're working lower in the tree.  See the paper
 		 * by Lehman and Yao for how this is detected and handled. (We use the
 		 * child link to disambiguate duplicate keys in the index -- Lehman
 		 * and Yao disallow duplicate keys.)
 		 */
 		new_stack = (BTStack) palloc(sizeof(BTStackData));
 		new_stack->bts_blkno = par_blkno;
 		new_stack->bts_offset = offnum;
 		memcpy(&new_stack->bts_btentry, itup, sizeof(IndexTupleData));
 		new_stack->bts_parent = stack_in;

 		/* drop the read lock on the parent page, acquire one on the child */
 		*bufP = _bt_relandgetbuf(rel, *bufP, blkno, BT_READ);

 		/* okay, all set to move down a level */
 		stack_in = new_stack;
 	}

 	return stack_in;
 }

 /*
  *	_bt_moveright() -- move right in the btree if necessary.
  *
  * When we follow a pointer to reach a page, it is possible that
  * the page has changed in the meanwhile.  If this happens, we're
  * guaranteed that the page has "split right" -- that is, that any
  * data that appeared on the page originally is either on the page
  * or strictly to the right of it.
  *
  * This routine decides whether or not we need to move right in the
  * tree by examining the high key entry on the page.  If that entry
  * is strictly less than the scankey, or <= the scankey in the nextkey=true
  * case, then we followed the wrong link and we need to move right.
  *
  * The passed scankey must be an insertion-type scankey (see nbtree/README),
  * but it can omit the rightmost column(s) of the index.
  *
  * When nextkey is false (the usual case), we are looking for the first
  * item >= scankey.  When nextkey is true, we are looking for the first
  * item strictly greater than scankey.
  *
  * On entry, we have the buffer pinned and a lock of the type specified by
  * 'access'.  If we move right, we release the buffer and lock and acquire
  * the same on the right sibling.  Return value is the buffer we stop at.
  */
 Buffer
 _bt_moveright(Relation rel,
 			  Buffer buf,
 			  int keysz,
 			  ScanKey scankey,
 			  bool nextkey,
 			  int access)
 {
 	Page		page;
 	BTPageOpaque opaque;
 	int32		cmpval;

 	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

 	page = BufferGetPage(buf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);

 	/*
 	 * When nextkey = false (normal case): if the scan key that brought us to
 	 * this page is > the high key stored on the page, then the page has split
 	 * and we need to move right.  (If the scan key is equal to the high key,
 	 * we might or might not need to move right; have to scan the page first
 	 * anyway.)
 	 *
 	 * When nextkey = true: move right if the scan key is >= page's high key.
 	 *
 	 * The page could even have split more than once, so scan as far as
 	 * needed.
 	 *
 	 * We also have to move right if we followed a link that brought us to a
 	 * dead page.
 	 */
 	cmpval = nextkey ? 0 : 1;

 	while (!P_RIGHTMOST(opaque) &&
 		   (P_IGNORE(opaque) ||
 			_bt_compare(rel, keysz, scankey, page, P_HIKEY) >= cmpval))
 	{
 		/* step right one page */
 		BlockNumber rblkno = opaque->btpo_next;

 		buf = _bt_relandgetbuf(rel, buf, rblkno, access);
 		page = BufferGetPage(buf);
 		opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 	}

 	if (P_IGNORE(opaque))
 		elog(ERROR, "fell off the end of index \"%s\"",
 			 RelationGetRelationName(rel));

 	return buf;
 }

 /*
  *	_bt_binsrch() -- Do a binary search for a key on a particular page.
  *
  * The passed scankey must be an insertion-type scankey (see nbtree/README),
  * but it can omit the rightmost column(s) of the index.
  *
  * When nextkey is false (the usual case), we are looking for the first
  * item >= scankey.  When nextkey is true, we are looking for the first
  * item strictly greater than scankey.
  *
  * On a leaf page, _bt_binsrch() returns the OffsetNumber of the first
  * key >= given scankey, or > scankey if nextkey is true.  (NOTE: in
  * particular, this means it is possible to return a value 1 greater than the
  * number of keys on the page, if the scankey is > all keys on the page.)
  *
  * On an internal (non-leaf) page, _bt_binsrch() returns the OffsetNumber
  * of the last key < given scankey, or last key <= given scankey if nextkey
  * is true.  (Since _bt_compare treats the first data key of such a page as
  * minus infinity, there will be at least one key < scankey, so the result
  * always points at one of the keys on the page.)  This key indicates the
  * right place to descend to be sure we find all leaf keys >= given scankey
  * (or leaf keys > given scankey when nextkey is true).
  *
  * This procedure is not responsible for walking right, it just examines
  * the given page.	_bt_binsrch() has no lock or refcount side effects
  * on the buffer.
  */
 OffsetNumber
 _bt_binsrch(Relation rel,
 			Buffer buf,
 			int keysz,
 			ScanKey scankey,
 			bool nextkey)
 {
 	Page		page;
 	BTPageOpaque opaque;
 	OffsetNumber low,
 				high;
 	int32		result,
 				cmpval;

 	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

 	page = BufferGetPage(buf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);

 	low = P_FIRSTDATAKEY(opaque);
 	high = PageGetMaxOffsetNumber(page);

 	/*
 	 * If there are no keys on the page, return the first available slot. Note
 	 * this covers two cases: the page is really empty (no keys), or it
 	 * contains only a high key.  The latter case is possible after vacuuming.
 	 * This can never happen on an internal page, however, since they are
 	 * never empty (an internal page must have children).
 	 */
 	if (high < low)
 		return low;

 	/*
 	 * Binary search to find the first key on the page >= scan key, or first
 	 * key > scankey when nextkey is true.
 	 *
 	 * For nextkey=false (cmpval=1), the loop invariant is: all slots before
 	 * 'low' are < scan key, all slots at or after 'high' are >= scan key.
 	 *
 	 * For nextkey=true (cmpval=0), the loop invariant is: all slots before
 	 * 'low' are <= scan key, all slots at or after 'high' are > scan key.
 	 *
 	 * We can fall out when high == low.
 	 */
 	high++;						/* establish the loop invariant for high */

 	cmpval = nextkey ? 0 : 1;	/* select comparison value */

 	while (high > low)
 	{
 		OffsetNumber mid = low + ((high - low) / 2);

 		/* We have low <= mid < high, so mid points at a real slot */

 		result = _bt_compare(rel, keysz, scankey, page, mid);

 		if (result >= cmpval)
 			low = mid + 1;
 		else
 			high = mid;
 	}

 	/*
 	 * At this point we have high == low, but be careful: they could point
 	 * past the last slot on the page.
 	 *
 	 * On a leaf page, we always return the first key >= scan key (resp. >
 	 * scan key), which could be the last slot + 1.
 	 */
 	if (P_ISLEAF(opaque))
 		return low;

 	/*
 	 * On a non-leaf page, return the last key < scan key (resp. <= scan key).
 	 * There must be one if _bt_compare() is playing by the rules.
 	 */
 	Assert(low > P_FIRSTDATAKEY(opaque));

 	return OffsetNumberPrev(low);
 }

 /*----------
  *	_bt_compare() -- Compare scankey to a particular tuple on the page.
  *
  * The passed scankey must be an insertion-type scankey (see nbtree/README),
  * but it can omit the rightmost column(s) of the index.
  *
  *	keysz: number of key conditions to be checked (might be less than the
  *		number of index columns!)
  *	page/offnum: location of btree item to be compared to.
  *
  *		This routine returns:
  *			<0 if scankey < tuple at offnum;
  *			 0 if scankey == tuple at offnum;
  *			>0 if scankey > tuple at offnum.
  *		NULLs in the keys are treated as sortable values.  Therefore
  *		"equality" does not necessarily mean that the item should be
  *		returned to the caller as a matching key!
  *
  * CRUCIAL NOTE: on a non-leaf page, the first data key is assumed to be
  * "minus infinity": this routine will always claim it is less than the
  * scankey.  The actual key value stored (if any, which there probably isn't)
  * does not matter.  This convention allows us to implement the Lehman and
  * Yao convention that the first down-link pointer is before the first key.
  * See backend/access/nbtree/README for details.
  *----------
  */
 int32
 _bt_compare(Relation rel,
 			int keysz,
 			ScanKey scankey,
 			Page page,
 			OffsetNumber offnum)
 {
 	TupleDesc	itupdesc = RelationGetDescr(rel);
 	BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 	IndexTuple	itup;
 	int			i;

 	/*
 	 * Force result ">" if target item is first data item on an internal page
 	 * --- see NOTE above.
 	 */
 	if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque))
 		return 1;

 	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));

 	/*
 	 * The scan key is set up with the attribute number associated with each
 	 * term in the key.  It is important that, if the index is multi-key, the
 	 * scan contain the first k key attributes, and that they be in order.	If
 	 * you think about how multi-key ordering works, you'll understand why
 	 * this is.
 	 *
 	 * We don't test for violation of this condition here, however.  The
 	 * initial setup for the index scan had better have gotten it right (see
 	 * _bt_first).
 	 */

 	for (i = 1; i <= keysz; i++)
 	{
 		Datum		datum;
 		bool		isNull;
 		int32		result;

 		datum = index_getattr(itup, scankey->sk_attno, itupdesc, &isNull);

 		/* see comments about NULLs handling in btbuild */
 		if (scankey->sk_flags & SK_ISNULL)		/* key is NULL */
 		{
 			if (isNull)
 				result = 0;		/* NULL "=" NULL */
 			else
 				result = 1;		/* NULL ">" NOT_NULL */
 		}
 		else if (isNull)		/* key is NOT_NULL and item is NULL */
 		{
 			result = -1;		/* NOT_NULL "<" NULL */
 		}
 		else
 		{
 			/*
 			 * The sk_func needs to be passed the index value as left arg and
 			 * the sk_argument as right arg (they might be of different
 			 * types).	Since it is convenient for callers to think of
 			 * _bt_compare as comparing the scankey to the index item, we have
 			 * to flip the sign of the comparison result.
 			 *
 			 * Note: curious-looking coding is to avoid overflow if comparison
 			 * function returns INT_MIN.  There is no risk of overflow for
 			 * positive results.
 			 */
 			result = DatumGetInt32(FunctionCall2(&scankey->sk_func,
 												 datum,
 												 scankey->sk_argument));
 			result = (result < 0) ? 1 : -result;
 		}

 		/* if the keys are unequal, return the difference */
 		if (result != 0)
 			return result;

 		scankey++;
 	}

 	/* if we get here, the keys are equal */
 	return 0;
 }

 /*
  *	_bt_first() -- Find the first item in a scan.
  *
  *		We need to be clever about the direction of scan, the search
  *		conditions, and the tree ordering.	We find the first item (or,
  *		if backwards scan, the last item) in the tree that satisfies the
  *		qualifications in the scan key.  On success exit, the page containing
  *		the current index tuple is pinned but not locked, and data about
  *		the matching tuple(s) on the page has been loaded into so->currPos,
  *		and scan->xs_ctup.t_self is set to the heap TID of the current tuple.
  *
  * If there are no matching items in the index, we return FALSE, with no
  * pins or locks held.
  *
  * Note that scan->keyData[], and the so->keyData[] scankey built from it,
  * are both search-type scankeys (see nbtree/README for more about this).
  * Within this routine, we build a temporary insertion-type scankey to use
  * in locating the scan start position.
  */
 bool
 _bt_first(IndexScanDesc scan, ScanDirection dir)
 {
 	MIRROREDLOCK_BUFMGR_DECLARE;

 	Relation	rel = scan->indexRelation;
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	Buffer		buf;
 	BTStack		stack;
 	OffsetNumber offnum;
 	StrategyNumber strat;
 	bool		nextkey;
 	bool		goback;
 	ScanKey		startKeys[INDEX_MAX_KEYS];
 	ScanKeyData scankeys[INDEX_MAX_KEYS];
 	int			keysCount = 0;
 	int			i;
 	StrategyNumber strat_total;

 	pgstat_count_index_scan(rel);

 	/*
 	 * Examine the scan keys and eliminate any redundant keys; also mark the
 	 * keys that must be matched to continue the scan.
 	 */
 	_bt_preprocess_keys(scan);

 	/*
 	 * Quit now if _bt_preprocess_keys() discovered that the scan keys can
 	 * never be satisfied (eg, x == 1 AND x > 2).
 	 */
 	if (!so->qual_ok)
 		return false;

 	/*----------
 	 * Examine the scan keys to discover where we need to start the scan.
 	 *
 	 * We want to identify the keys that can be used as starting boundaries;
 	 * these are =, >, or >= keys for a forward scan or =, <, <= keys for
 	 * a backwards scan.  We can use keys for multiple attributes so long as
 	 * the prior attributes had only =, >= (resp. =, <=) keys.	Once we accept
 	 * a > or < boundary or find an attribute with no boundary (which can be
 	 * thought of as the same as "> -infinity"), we can't use keys for any
 	 * attributes to its right, because it would break our simplistic notion
 	 * of what initial positioning strategy to use.
 	 *
 	 * When the scan keys include non-default operators, _bt_preprocess_keys
 	 * may not be able to eliminate redundant keys; in such cases we will
 	 * arbitrarily pick a usable one for each attribute.  This is correct
 	 * but possibly not optimal behavior.  (For example, with keys like
 	 * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when
 	 * x=5 would be more efficient.)  Since the situation only arises in
 	 * hokily-worded queries, live with it.
 	 *
 	 * When both equality and inequality keys appear for a single attribute
 	 * (again, only possible when non-default operators appear), we *must*
 	 * select one of the equality keys for the starting point, because
 	 * _bt_checkkeys() will stop the scan as soon as an equality qual fails.
 	 * For example, if we have keys like "x >= 4 AND x = 10" and we elect to
 	 * start at x=4, we will fail and stop before reaching x=10.  If multiple
 	 * equality quals survive preprocessing, however, it doesn't matter which
 	 * one we use --- by definition, they are either redundant or
 	 * contradictory.
 	 *
 	 * In this loop, row-comparison keys are treated the same as keys on their
 	 * first (leftmost) columns.  We'll add on lower-order columns of the row
 	 * comparison below, if possible.
 	 *
 	 * The selected scan keys (at most one per index column) are remembered by
 	 * storing their addresses into the local startKeys[] array.
 	 *----------
 	 */
 	strat_total = BTEqualStrategyNumber;
 	if (so->numberOfKeys > 0)
 	{
 		AttrNumber	curattr;
 		ScanKey		chosen;
 		ScanKey		cur;

 		/*
 		 * chosen is the so-far-chosen key for the current attribute, if any.
 		 * We don't cast the decision in stone until we reach keys for the
 		 * next attribute.
 		 */
 		curattr = 1;
 		chosen = NULL;

 		/*
 		 * Loop iterates from 0 to numberOfKeys inclusive; we use the last
 		 * pass to handle after-last-key processing.  Actual exit from the
 		 * loop is at one of the "break" statements below.
 		 */
 		for (cur = so->keyData, i = 0;; cur++, i++)
 		{
 			if (i >= so->numberOfKeys || cur->sk_attno != curattr)
 			{
 				/*
 				 * Done looking at keys for curattr.  If we didn't find a
 				 * usable boundary key, quit; else save the boundary key
 				 * pointer in startKeys.
 				 */
 				if (chosen == NULL)
 					break;
 				startKeys[keysCount++] = chosen;

 				/*
 				 * Adjust strat_total, and quit if we have stored a > or <
 				 * key.
 				 */
 				strat = chosen->sk_strategy;
 				if (strat != BTEqualStrategyNumber)
 				{
 					strat_total = strat;
 					if (strat == BTGreaterStrategyNumber ||
 						strat == BTLessStrategyNumber)
 						break;
 				}

 				/*
 				 * Done if that was the last attribute, or if next key is not
 				 * in sequence (implying no boundary key is available for the
 				 * next attribute).
 				 */
 				if (i >= so->numberOfKeys ||
 					cur->sk_attno != curattr + 1)
 					break;

 				/*
 				 * Reset for next attr.
 				 */
 				curattr = cur->sk_attno;
 				chosen = NULL;
 			}

 			/* Can we use this key as a starting boundary for this attr? */
 			switch (cur->sk_strategy)
 			{
 				case BTLessStrategyNumber:
 				case BTLessEqualStrategyNumber:
 					if (chosen == NULL && ScanDirectionIsBackward(dir))
 						chosen = cur;
 					break;
 				case BTEqualStrategyNumber:
 					/* override any non-equality choice */
 					chosen = cur;
 					break;
 				case BTGreaterEqualStrategyNumber:
 				case BTGreaterStrategyNumber:
 					if (chosen == NULL && ScanDirectionIsForward(dir))
 						chosen = cur;
 					break;
 			}
 		}
 	}

 	/*
 	 * If we found no usable boundary keys, we have to start from one end of
 	 * the tree.  Walk down that edge to the first or last key, and scan from
 	 * there.
 	 */
 	if (keysCount == 0)
 		return _bt_endpoint(scan, dir);

 	/*
 	 * We want to start the scan somewhere within the index.  Set up an
 	 * insertion scankey we can use to search for the boundary point we
 	 * identified above.  The insertion scankey is built in the local
 	 * scankeys[] array, using the keys identified by startKeys[].
 	 */
 	Assert(keysCount <= INDEX_MAX_KEYS);
 	for (i = 0; i < keysCount; i++)
 	{
 		ScanKey		cur = startKeys[i];

 		Assert(cur->sk_attno == i + 1);

 		if (cur->sk_flags & SK_ROW_HEADER)
 		{
 			/*
 			 * Row comparison header: look to the first row member instead.
 			 *
 			 * The member scankeys are already in insertion format (ie, they
 			 * have sk_func = 3-way-comparison function), but we have to watch
 			 * out for nulls, which _bt_preprocess_keys didn't check. A null
 			 * in the first row member makes the condition unmatchable, just
 			 * like qual_ok = false.
 			 */
 			ScanKey		subkey = (ScanKey) DatumGetPointer(cur->sk_argument);

 			Assert(subkey->sk_flags & SK_ROW_MEMBER);
 			if (subkey->sk_flags & SK_ISNULL)
 				return false;
 			memcpy(scankeys + i, subkey, sizeof(ScanKeyData));

 			/*
 			 * If the row comparison is the last positioning key we accepted,
 			 * try to add additional keys from the lower-order row members.
 			 * (If we accepted independent conditions on additional index
 			 * columns, we use those instead --- doesn't seem worth trying to
 			 * determine which is more restrictive.)  Note that this is OK
 			 * even if the row comparison is of ">" or "<" type, because the
 			 * condition applied to all but the last row member is effectively
 			 * ">=" or "<=", and so the extra keys don't break the positioning
 			 * scheme.  But, by the same token, if we aren't able to use all
 			 * the row members, then the part of the row comparison that we
 			 * did use has to be treated as just a ">=" or "<=" condition, and
 			 * so we'd better adjust strat_total accordingly.
 			 */
 			if (i == keysCount - 1)
 			{
 				bool		used_all_subkeys = false;

 				Assert(!(subkey->sk_flags & SK_ROW_END));
 				for(;;)
 				{
 					subkey++;
 					Assert(subkey->sk_flags & SK_ROW_MEMBER);
 					if (subkey->sk_attno != keysCount + 1)
 						break;	/* out-of-sequence, can't use it */
 					if (subkey->sk_strategy != cur->sk_strategy)
 						break;	/* wrong direction, can't use it */
 					if (subkey->sk_flags & SK_ISNULL)
 						break;	/* can't use null keys */
 					Assert(keysCount < INDEX_MAX_KEYS);
 					memcpy(scankeys + keysCount, subkey, sizeof(ScanKeyData));
 					keysCount++;
 					if (subkey->sk_flags & SK_ROW_END)
 					{
 						used_all_subkeys = true;
 						break;
 					}
 				}
 				if (!used_all_subkeys)
 				{
 					switch (strat_total)
 					{
 						case BTLessStrategyNumber:
 							strat_total = BTLessEqualStrategyNumber;
 							break;
 						case BTGreaterStrategyNumber:
 							strat_total = BTGreaterEqualStrategyNumber;
 							break;
 					}
 				}
 				break;			/* done with outer loop */
 			}
 		}
 		else
 		{
 			/*
 			 * Ordinary comparison key.  Transform the search-style scan key
 			 * to an insertion scan key by replacing the sk_func with the
 			 * appropriate btree comparison function.
 			 *
 			 * If scankey operator is of default subtype, we can use the
 			 * cached comparison function; otherwise gotta look it up in the
 			 * catalogs.
 			 */
 			if (cur->sk_subtype == InvalidOid)
 			{
 				FmgrInfo   *procinfo;

 				procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC);
 				ScanKeyEntryInitializeWithInfo(scankeys + i,
 											   cur->sk_flags,
 											   cur->sk_attno,
 											   InvalidStrategy,
 											   InvalidOid,
 											   procinfo,
 											   cur->sk_argument);
 			}
 			else
 			{
 				RegProcedure cmp_proc;

 				cmp_proc = get_opclass_proc(rel->rd_indclass->values[i],
 											cur->sk_subtype,
 											BTORDER_PROC);
 				ScanKeyEntryInitialize(scankeys + i,
 									   cur->sk_flags,
 									   cur->sk_attno,
 									   InvalidStrategy,
 									   cur->sk_subtype,
 									   cmp_proc,
 									   cur->sk_argument);
 			}
 		}
 	}

 	/*----------
 	 * Examine the selected initial-positioning strategy to determine exactly
 	 * where we need to start the scan, and set flag variables to control the
 	 * code below.
 	 *
 	 * If nextkey = false, _bt_search and _bt_binsrch will locate the first
 	 * item >= scan key.  If nextkey = true, they will locate the first
 	 * item > scan key.
 	 *
 	 * If goback = true, we will then step back one item, while if
 	 * goback = false, we will start the scan on the located item.
 	 *
 	 * it's yet other place to add some code later for is(not)null ...
 	 *----------
 	 */
 	switch (strat_total)
 	{
 		case BTLessStrategyNumber:

 			/*
 			 * Find first item >= scankey, then back up one to arrive at last
 			 * item < scankey.	(Note: this positioning strategy is only used
 			 * for a backward scan, so that is always the correct starting
 			 * position.)
 			 */
 			nextkey = false;
 			goback = true;
 			break;

 		case BTLessEqualStrategyNumber:

 			/*
 			 * Find first item > scankey, then back up one to arrive at last
 			 * item <= scankey.  (Note: this positioning strategy is only used
 			 * for a backward scan, so that is always the correct starting
 			 * position.)
 			 */
 			nextkey = true;
 			goback = true;
 			break;

 		case BTEqualStrategyNumber:

 			/*
 			 * If a backward scan was specified, need to start with last equal
 			 * item not first one.
 			 */
 			if (ScanDirectionIsBackward(dir))
 			{
 				/*
 				 * This is the same as the <= strategy.  We will check at the
 				 * end whether the found item is actually =.
 				 */
 				nextkey = true;
 				goback = true;
 			}
 			else
 			{
 				/*
 				 * This is the same as the >= strategy.  We will check at the
 				 * end whether the found item is actually =.
 				 */
 				nextkey = false;
 				goback = false;
 			}
 			break;

 		case BTGreaterEqualStrategyNumber:

 			/*
 			 * Find first item >= scankey.	(This is only used for forward
 			 * scans.)
 			 */
 			nextkey = false;
 			goback = false;
 			break;

 		case BTGreaterStrategyNumber:

 			/*
 			 * Find first item > scankey.  (This is only used for forward
 			 * scans.)
 			 */
 			nextkey = true;
 			goback = false;
 			break;

 		default:
 			/* can't get here, but keep compiler quiet */
 			elog(ERROR, "unrecognized strat_total: %d", (int) strat_total);
 			return false;
 	}

 	/*
 	 * Use the manufactured insertion scan key to descend the tree and
 	 * position ourselves on the target leaf page.
 	 */

 	// -------- MirroredLock ----------
 	MIRROREDLOCK_BUFMGR_LOCK;

 	stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ);

 	/* don't need to keep the stack around... */
 	_bt_freestack(stack);

 	/* remember which buffer we have pinned, if any */
 	so->currPos.buf = buf;

 	if (!BufferIsValid(buf))
 	{
 		/* Only get here if index is completely empty */

 		MIRROREDLOCK_BUFMGR_UNLOCK;
 		// -------- MirroredLock ----------

 		return false;
 	}

 	/* initialize moreLeft/moreRight appropriately for scan direction */
 	if (ScanDirectionIsForward(dir))
 	{
 		so->currPos.moreLeft = false;
 		so->currPos.moreRight = true;
 	}
 	else
 	{
 		so->currPos.moreLeft = true;
 		so->currPos.moreRight = false;
 	}
 	so->numKilled = 0;			/* just paranoia */
 	so->markItemIndex = -1;		/* ditto */

 	/* position to the precise item on the page */
 	offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey);

 	/*
 	 * If nextkey = false, we are positioned at the first item >= scan key, or
 	 * possibly at the end of a page on which all the existing items are less
 	 * than the scan key and we know that everything on later pages is greater
 	 * than or equal to scan key.
 	 *
 	 * If nextkey = true, we are positioned at the first item > scan key, or
 	 * possibly at the end of a page on which all the existing items are less
 	 * than or equal to the scan key and we know that everything on later
 	 * pages is greater than scan key.
 	 *
 	 * The actually desired starting point is either this item or the prior
 	 * one, or in the end-of-page case it's the first item on the next page or
 	 * the last item on this page.	Adjust the starting offset if needed. (If
 	 * this results in an offset before the first item or after the last one,
 	 * _bt_readpage will report no items found, and then we'll step to the
 	 * next page as needed.)
 	 */
 	if (goback)
 		offnum = OffsetNumberPrev(offnum);

 	/*
 	 * Now load data from the first page of the scan.
 	 */
 	if (!_bt_readpage(scan, dir, offnum))
 	{
 		/*
 		 * There's no actually-matching data on this page.  Try to advance to
 		 * the next page.  Return false if there's no matching data at all.
 		 */
 		if (!_bt_steppage(scan, dir))
 		{

 			MIRROREDLOCK_BUFMGR_UNLOCK;
 			// -------- MirroredLock ----------

 			return false;
 		}
 	}

 	/* Drop the lock, but not pin, on the current page */
 	LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);

 	MIRROREDLOCK_BUFMGR_UNLOCK;
 	// -------- MirroredLock ----------

 	/* OK, itemIndex says what to return */
 	scan->xs_ctup.t_self = so->currPos.items[so->currPos.itemIndex].heapTid;

 	return true;
 }

 /*
  *	_bt_next() -- Get the next item in a scan.
  *
  *		On entry, so->currPos describes the current page, which is pinned
  *		but not locked, and so->currPos.itemIndex identifies which item was
  *		previously returned.
  *
  *		On successful exit, scan->xs_ctup.t_self is set to the TID of the
  *		next heap tuple, and so->currPos is updated as needed.
  *
  *		On failure exit (no more tuples), we release pin and set
  *		so->currPos.buf to InvalidBuffer.
  */
 bool
 _bt_next(IndexScanDesc scan, ScanDirection dir)
 {
 	MIRROREDLOCK_BUFMGR_DECLARE;

 	BTScanOpaque so = (BTScanOpaque) scan->opaque;

 	/*
 	 * Advance to next tuple on current page; or if there's no more, try to
 	 * step to the next page with data.
 	 */

 	// -------- MirroredLock ----------
 	MIRROREDLOCK_BUFMGR_LOCK;

 	if (ScanDirectionIsForward(dir))
 	{
 		if (++so->currPos.itemIndex > so->currPos.lastItem)
 		{
 			/* We must acquire lock before applying _bt_steppage */
 			Assert(BufferIsValid(so->currPos.buf));
 			LockBuffer(so->currPos.buf, BT_READ);
 			if (!_bt_steppage(scan, dir))
 			{
 				MIRROREDLOCK_BUFMGR_UNLOCK;
 				// -------- MirroredLock ----------

 				return false;
 			}
 			/* Drop the lock, but not pin, on the new page */
 			LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
 		}
 	}
 	else
 	{
 		if (--so->currPos.itemIndex < so->currPos.firstItem)
 		{
 			/* We must acquire lock before applying _bt_steppage */
 			Assert(BufferIsValid(so->currPos.buf));
 			LockBuffer(so->currPos.buf, BT_READ);
 			if (!_bt_steppage(scan, dir))
 			{

 				MIRROREDLOCK_BUFMGR_UNLOCK;
 				// -------- MirroredLock ----------

 				return false;
 			}
 			/* Drop the lock, but not pin, on the new page */
 			LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
 		}
 	}

 	MIRROREDLOCK_BUFMGR_UNLOCK;
 	// -------- MirroredLock ----------

 	/* OK, itemIndex says what to return */
 	scan->xs_ctup.t_self = so->currPos.items[so->currPos.itemIndex].heapTid;

 	return true;
 }

 /*
  *	_bt_readpage() -- Load data from current index page into so->currPos
  *
  * Caller must have pinned and read-locked so->currPos.buf; the buffer's state
  * is not changed here.  Also, currPos.moreLeft and moreRight must be valid;
  * they are updated as appropriate.  All other fields of so->currPos are
  * initialized from scratch here.
  *
  * We scan the current page starting at offnum and moving in the indicated
  * direction.  All items matching the scan keys are loaded into currPos.items.
  * moreLeft or moreRight (as appropriate) is cleared if _bt_checkkeys reports
  * that there can be no more matching tuples in the current scan direction.
  *
  * Returns true if any matching items found on the page, false if none.
  */
 static bool
 _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 {
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	Page		page;
 	BTPageOpaque opaque;
 	OffsetNumber minoff;
 	OffsetNumber maxoff;
 	int			itemIndex;
 	bool		continuescan;

 	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

 	/* we must have the buffer pinned and locked */
 	Assert(BufferIsValid(so->currPos.buf));

 	page = BufferGetPage(so->currPos.buf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 	minoff = P_FIRSTDATAKEY(opaque);
 	maxoff = PageGetMaxOffsetNumber(page);

 	/*
 	 * we must save the page's right-link while scanning it; this tells us
 	 * where to step right to after we're done with these items.  There is no
 	 * corresponding need for the left-link, since splits always go right.
 	 */
 	so->currPos.nextPage = opaque->btpo_next;

 	if (ScanDirectionIsForward(dir))
 	{
 		/* load items[] in ascending order */
 		itemIndex = 0;

 		offnum = Max(offnum, minoff);

 		while (offnum <= maxoff)
 		{
 			if (_bt_checkkeys(scan, page, offnum, dir, &continuescan))
 			{
 				/* tuple passes all scan key conditions, so remember it */
 				/* _bt_checkkeys put the heap ptr into scan->xs_ctup.t_self */
 				so->currPos.items[itemIndex].heapTid = scan->xs_ctup.t_self;
 				so->currPos.items[itemIndex].indexOffset = offnum;
 				itemIndex++;
 			}
 			if (!continuescan)
 			{
 				/* there can't be any more matches, so stop */
 				so->currPos.moreRight = false;
 				break;
 			}

 			offnum = OffsetNumberNext(offnum);
 		}

 		Assert(itemIndex <= MaxIndexTuplesPerPage);
 		so->currPos.firstItem = 0;
 		so->currPos.lastItem = itemIndex - 1;
 		so->currPos.itemIndex = 0;
 	}
 	else
 	{
 		/* load items[] in descending order */
 		itemIndex = MaxIndexTuplesPerPage;

 		offnum = Min(offnum, maxoff);

 		while (offnum >= minoff)
 		{
 			if (_bt_checkkeys(scan, page, offnum, dir, &continuescan))
 			{
 				/* tuple passes all scan key conditions, so remember it */
 				/* _bt_checkkeys put the heap ptr into scan->xs_ctup.t_self */
 				itemIndex--;
 				so->currPos.items[itemIndex].heapTid = scan->xs_ctup.t_self;
 				so->currPos.items[itemIndex].indexOffset = offnum;
 			}
 			if (!continuescan)
 			{
 				/* there can't be any more matches, so stop */
 				so->currPos.moreLeft = false;
 				break;
 			}

 			offnum = OffsetNumberPrev(offnum);
 		}

 		Assert(itemIndex >= 0);
 		so->currPos.firstItem = itemIndex;
 		so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
 		so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
 	}

 	return (so->currPos.firstItem <= so->currPos.lastItem);
 }

 /*
  *	_bt_steppage() -- Step to next page containing valid data for scan
  *
  * On entry, so->currPos.buf must be pinned and read-locked.  We'll drop
  * the lock and pin before moving to next page.
  *
  * On success exit, we hold pin and read-lock on the next interesting page,
  * and so->currPos is updated to contain data from that page.
  *
  * If there are no more matching records in the given direction, we drop all
  * locks and pins, set so->currPos.buf to InvalidBuffer, and return FALSE.
  */
 static bool
 _bt_steppage(IndexScanDesc scan, ScanDirection dir)
 {
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	Relation	rel;
 	Page		page;
 	BTPageOpaque opaque;

 	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

 	/* we must have the buffer pinned and locked */
 	Assert(BufferIsValid(so->currPos.buf));

 	/* Before leaving current page, deal with any killed items */
 	if (so->numKilled > 0)
 		_bt_killitems(scan, true);

 	/*
 	 * Before we modify currPos, make a copy of the page data if there was a
 	 * mark position that needs it.
 	 */
 	if (so->markItemIndex >= 0)
 	{
 		/* bump pin on current buffer for assignment to mark buffer */
 		IncrBufferRefCount(so->currPos.buf);
 		memcpy(&so->markPos, &so->currPos,
 			   offsetof(BTScanPosData, items[1]) +
 			   so->currPos.lastItem * sizeof(BTScanPosItem));
 		so->markPos.itemIndex = so->markItemIndex;
 		so->markItemIndex = -1;
 	}

 	rel = scan->indexRelation;

 	if (ScanDirectionIsForward(dir))
 	{
 		/* Walk right to the next page with data */
 		/* We must rely on the previously saved nextPage link! */
 		BlockNumber blkno = so->currPos.nextPage;

 		/* Remember we left a page with data */
 		so->currPos.moreLeft = true;

 		for (;;)
 		{
 			/* if we're at end of scan, release the buffer and return */
 			if (blkno == P_NONE || !so->currPos.moreRight)
 			{
 				_bt_relbuf(rel, so->currPos.buf);
 				so->currPos.buf = InvalidBuffer;
 				return false;
 			}
 			/* step right one page */
 			so->currPos.buf = _bt_relandgetbuf(rel, so->currPos.buf,
 											   blkno, BT_READ);
 			/* check for deleted page */
 			page = BufferGetPage(so->currPos.buf);
 			opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 			if (!P_IGNORE(opaque))
 			{
 				/* see if there are any matches on this page */
 				/* note that this will clear moreRight if we can stop */
 				if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque)))
 					break;
 			}
 			/* nope, keep going */
 			blkno = opaque->btpo_next;
 		}
 	}
 	else
 	{
 		/* Remember we left a page with data */
 		so->currPos.moreRight = true;

 		/*
 		 * Walk left to the next page with data.  This is much more complex
 		 * than the walk-right case because of the possibility that the page
 		 * to our left splits while we are in flight to it, plus the
 		 * possibility that the page we were on gets deleted after we leave
 		 * it.	See nbtree/README for details.
 		 */
 		for (;;)
 		{
 			/* Done if we know there are no matching keys to the left */
 			if (!so->currPos.moreLeft)
 			{
 				_bt_relbuf(rel, so->currPos.buf);
 				so->currPos.buf = InvalidBuffer;
 				return false;
 			}

 			/* Step to next physical page */
 			so->currPos.buf = _bt_walk_left(rel, so->currPos.buf);

 			/* if we're physically at end of index, return failure */
 			if (so->currPos.buf == InvalidBuffer)
 				return false;

 			/*
 			 * Okay, we managed to move left to a non-deleted page. Done if
 			 * it's not half-dead and contains matching tuples. Else loop back
 			 * and do it all again.
 			 */
 			page = BufferGetPage(so->currPos.buf);
 			opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 			if (!P_IGNORE(opaque))
 			{
 				/* see if there are any matches on this page */
 				/* note that this will clear moreLeft if we can stop */
 				if (_bt_readpage(scan, dir, PageGetMaxOffsetNumber(page)))
 					break;
 			}
 		}
 	}

 	return true;
 }

 /*
  * _bt_walk_left() -- step left one page, if possible
  *
  * The given buffer must be pinned and read-locked.  This will be dropped
  * before stepping left.  On return, we have pin and read lock on the
  * returned page, instead.
  *
  * Returns InvalidBuffer if there is no page to the left (no lock is held
  * in that case).
  *
  * When working on a non-leaf level, it is possible for the returned page
  * to be half-dead; the caller should check that condition and step left
  * again if it's important.
  */
 static Buffer
 _bt_walk_left(Relation rel, Buffer buf)
 {
 	Page		page;
 	BTPageOpaque opaque;

 	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

 	page = BufferGetPage(buf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);

 	for (;;)
 	{
 		BlockNumber obknum;
 		BlockNumber lblkno;
 		BlockNumber blkno;
 		int			tries;

 		/* if we're at end of tree, release buf and return failure */
 		if (P_LEFTMOST(opaque))
 		{
 			_bt_relbuf(rel, buf);
 			break;
 		}
 		/* remember original page we are stepping left from */
 		obknum = BufferGetBlockNumber(buf);
 		/* step left */
 		blkno = lblkno = opaque->btpo_prev;
 		_bt_relbuf(rel, buf);
 		/* check for interrupts while we're not holding any buffer lock */
 		CHECK_FOR_INTERRUPTS();
 		buf = _bt_getbuf(rel, blkno, BT_READ);
 		page = BufferGetPage(buf);
 		opaque = (BTPageOpaque) PageGetSpecialPointer(page);

 		/*
 		 * If this isn't the page we want, walk right till we find what we
 		 * want --- but go no more than four hops (an arbitrary limit). If we
 		 * don't find the correct page by then, the most likely bet is that
 		 * the original page got deleted and isn't in the sibling chain at all
 		 * anymore, not that its left sibling got split more than four times.
 		 *
 		 * Note that it is correct to test P_ISDELETED not P_IGNORE here,
 		 * because half-dead pages are still in the sibling chain.	Caller
 		 * must reject half-dead pages if wanted.
 		 */
 		tries = 0;
 		for (;;)
 		{
 			if (!P_ISDELETED(opaque) && opaque->btpo_next == obknum)
 			{
 				/* Found desired page, return it */
 				return buf;
 			}
 			if (P_RIGHTMOST(opaque) || ++tries > 4)
 				break;
 			blkno = opaque->btpo_next;
 			buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
 			page = BufferGetPage(buf);
 			opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 		}

 		/* Return to the original page to see what's up */
 		buf = _bt_relandgetbuf(rel, buf, obknum, BT_READ);
 		page = BufferGetPage(buf);
 		opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 		if (P_ISDELETED(opaque))
 		{
 			/*
 			 * It was deleted.	Move right to first nondeleted page (there
 			 * must be one); that is the page that has acquired the deleted
 			 * one's keyspace, so stepping left from it will take us where we
 			 * want to be.
 			 */
 			for (;;)
 			{
 				if (P_RIGHTMOST(opaque))
 					elog(ERROR, "fell off the end of index \"%s\"",
 						 RelationGetRelationName(rel));
 				blkno = opaque->btpo_next;
 				buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
 				page = BufferGetPage(buf);
 				opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 				if (!P_ISDELETED(opaque))
 					break;
 			}

 			/*
 			 * Now return to top of loop, resetting obknum to point to this
 			 * nondeleted page, and try again.
 			 */
 		}
 		else
 		{
 			/*
 			 * It wasn't deleted; the explanation had better be that the page
 			 * to the left got split or deleted. Without this check, we'd go
 			 * into an infinite loop if there's anything wrong.
 			 */
 			if (opaque->btpo_prev == lblkno)
 				elog(ERROR, "could not find left sibling of block %u in index \"%s\"",
 					 obknum, RelationGetRelationName(rel));
 			/* Okay to try again with new lblkno value */
 		}
 	}

 	return InvalidBuffer;
 }

 /*
  * _bt_get_endpoint() -- Find the first or last page on a given tree level
  *
  * If the index is empty, we will return InvalidBuffer; any other failure
  * condition causes ereport().	We will not return a dead page.
  *
  * The returned buffer is pinned and read-locked.
  */
 Buffer
 _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
 {
 	Buffer		buf;
 	Page		page;
 	BTPageOpaque opaque;
 	OffsetNumber offnum;
 	BlockNumber blkno;
 	IndexTuple	itup;

 	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

 	/*
 	 * If we are looking for a leaf page, okay to descend from fast root;
 	 * otherwise better descend from true root.  (There is no point in being
 	 * smarter about intermediate levels.)
 	 */
 	if (level == 0)
 		buf = _bt_getroot(rel, BT_READ);
 	else
 		buf = _bt_gettrueroot(rel);

 	if (!BufferIsValid(buf))
 	{
 		/* empty index... */
 		return InvalidBuffer;
 	}

 	page = BufferGetPage(buf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);

 	for (;;)
 	{
 		/*
 		 * If we landed on a deleted page, step right to find a live page
 		 * (there must be one).  Also, if we want the rightmost page, step
 		 * right if needed to get to it (this could happen if the page split
 		 * since we obtained a pointer to it).
 		 */
 		while (P_IGNORE(opaque) ||
 			   (rightmost && !P_RIGHTMOST(opaque)))
 		{
 			blkno = opaque->btpo_next;
 			if (blkno == P_NONE)
 				elog(ERROR, "fell off the end of index \"%s\"",
 					 RelationGetRelationName(rel));
 			buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
 			page = BufferGetPage(buf);
 			opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 		}

 		/* Done? */
 		if (opaque->btpo.level == level)
 			break;
 		if (opaque->btpo.level < level)
 			elog(ERROR, "btree level %u not found in index \"%s\"",
 				 level, RelationGetRelationName(rel));

 		/* Descend to leftmost or rightmost child page */
 		if (rightmost)
 			offnum = PageGetMaxOffsetNumber(page);
 		else
 			offnum = P_FIRSTDATAKEY(opaque);

 		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
 		blkno = ItemPointerGetBlockNumber(&(itup->t_tid));

 		buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
 		page = BufferGetPage(buf);
 		opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 	}

 	return buf;
 }

 /*
  *	_bt_endpoint() -- Find the first or last page in the index, and scan
  * from there to the first key satisfying all the quals.
  *
  * This is used by _bt_first() to set up a scan when we've determined
  * that the scan must start at the beginning or end of the index (for
  * a forward or backward scan respectively).  Exit conditions are the
  * same as for _bt_first().
  */
 static bool
 _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
 {
 	MIRROREDLOCK_BUFMGR_DECLARE;

 	Relation	rel = scan->indexRelation;
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	Buffer		buf;
 	Page		page;
 	BTPageOpaque opaque;
 	OffsetNumber start;

 	/*
 	 * Scan down to the leftmost or rightmost leaf page.  This is a simplified
 	 * version of _bt_search().  We don't maintain a stack since we know we
 	 * won't need it.
 	 */

 	// -------- MirroredLock ----------
 	MIRROREDLOCK_BUFMGR_LOCK;

 	buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir));

 	if (!BufferIsValid(buf))
 	{
 		/* empty index... */

 		MIRROREDLOCK_BUFMGR_UNLOCK;
 		// -------- MirroredLock ----------

 		so->currPos.buf = InvalidBuffer;
 		return false;
 	}

 	page = BufferGetPage(buf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 	Assert(P_ISLEAF(opaque));

 	if (ScanDirectionIsForward(dir))
 	{
 		/* There could be dead pages to the left, so not this: */
 		/* Assert(P_LEFTMOST(opaque)); */

 		start = P_FIRSTDATAKEY(opaque);
 	}
 	else if (ScanDirectionIsBackward(dir))
 	{
 		Assert(P_RIGHTMOST(opaque));

 		start = PageGetMaxOffsetNumber(page);
 	}
 	else
 	{
 		elog(ERROR, "invalid scan direction: %d", (int) dir);
 		start = 0;				/* keep compiler quiet */
 	}

 	/* remember which buffer we have pinned */
 	so->currPos.buf = buf;

 	/* initialize moreLeft/moreRight appropriately for scan direction */
 	if (ScanDirectionIsForward(dir))
 	{
 		so->currPos.moreLeft = false;
 		so->currPos.moreRight = true;
 	}
 	else
 	{
 		so->currPos.moreLeft = true;
 		so->currPos.moreRight = false;
 	}
 	so->numKilled = 0;			/* just paranoia */
 	so->markItemIndex = -1;		/* ditto */

 	/*
 	 * Now load data from the first page of the scan.
 	 */
 	if (!_bt_readpage(scan, dir, start))
 	{
 		/*
 		 * There's no actually-matching data on this page.  Try to advance to
 		 * the next page.  Return false if there's no matching data at all.
 		 */
 		if (!_bt_steppage(scan, dir))
 		{

 			MIRROREDLOCK_BUFMGR_UNLOCK;
 			// -------- MirroredLock ----------

 			return false;
 		}
 	}

 	/* Drop the lock, but not pin, on the current page */
 	LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);

 	MIRROREDLOCK_BUFMGR_UNLOCK;
 	// -------- MirroredLock ----------

 	/* OK, itemIndex says what to return */
 	scan->xs_ctup.t_self = so->currPos.items[so->currPos.itemIndex].heapTid;

 	return true;
 }