src/backend/catalog/index.c - hawq - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*-------------------------------------------------------------------------
  *
  * index.c
  *	  code to create and destroy POSTGRES index relations
  *
  * Portions Copyright (c) 2006-2009, Greenplum inc
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.274 2006/10/04 00:29:50 momjian Exp $
  *
  *
  * INTERFACE ROUTINES
  *		index_create()			- Create a cataloged index relation
  *		index_drop()			- Removes index relation from catalogs
  *		BuildIndexInfo()		- Prepare to insert index tuples
  *		FormIndexDatum()		- Construct datum vector for one index tuple
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include <unistd.h>

 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/relscan.h"
 #include "access/sysattr.h"
 #include "access/transam.h"
 #include "access/xact.h"
 #include "bootstrap/bootstrap.h"
 #include "catalog/catalog.h"
 #include "catalog/catquery.h"
 #include "catalog/dependency.h"
 #include "catalog/heap.h"
 #include "catalog/index.h"
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
 #include "commands/tablecmds.h"
 #include "executor/executor.h"
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "optimizer/var.h"
 #include "parser/parse_expr.h"
 #include "storage/procarray.h"
 #include "storage/smgr.h"
 #include "utils/builtins.h"
 #include "utils/fmgroids.h"
 #include "utils/inval.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/relcache.h"
 #include "utils/syscache.h"
 #include "utils/tuplesort.h"

 #include "cdb/cdbvars.h"
 #include "cdb/cdbanalyze.h"
 #include "cdb/cdboidsync.h"
 #include "cdb/cdbappendonlyam.h"

 #include "cdb/cdbmirroredfilesysobj.h"

 /* state info for validate_index bulkdelete callback */
 typedef struct
 {
 	void *tuplesort;	/* for sorting the index TIDs */
 	/* statistics (for debug purposes only): */
 	double		htups,
 			itups,
 			tups_inserted;
 } v_i_state;

 /* non-export function prototypes */
 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
 						 IndexInfo *indexInfo,
 						 Oid *classObjectId);
 static void InitializeAttributeOids(Relation indexRelation,
 						int numatts, Oid indexoid);
 static void AppendAttributeTuples(Relation indexRelation, int numatts);
 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
 					IndexInfo *indexInfo,
 					Oid *classOids,
 					bool primary,
 					bool isvalid);
 static void index_update_stats(Relation rel, bool hasindex, bool isprimary,
 							   Oid reltoastidxid, double reltuples);
 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
 static void validate_index_heapscan(Relation heapRelation,
 						Relation indexRelation,
 						IndexInfo *indexInfo,
 						Snapshot snapshot,
 						v_i_state *state);
 static double IndexBuildHeapScan(Relation heapRelation,
 								 Relation indexRelation,
 								 struct IndexInfo *indexInfo,
 								 EState *estate,
 								 Snapshot snapshot,
 								 TransactionId OldestXmin,
 								 IndexBuildCallback callback,
 								 void *callback_state);
 static double IndexBuildAppendOnlyRowScan(Relation parentRelation,
 										  Relation indexRelation,
 										  struct IndexInfo *indexInfo,
 										  EState *estate,
 										  Snapshot snapshot,
 										  IndexBuildCallback callback,
 										  void *callback_state);


 /*
  *		ConstructTupleDescriptor
  *
  * Build an index tuple descriptor for a new index
  */
 static TupleDesc
 ConstructTupleDescriptor(Relation heapRelation,
 						 IndexInfo *indexInfo,
 						 Oid *classObjectId)
 {
 	int			numatts = indexInfo->ii_NumIndexAttrs;
 	ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
 	TupleDesc	heapTupDesc;
 	TupleDesc	indexTupDesc;
 	int			natts;			/* #atts in heap rel --- for error checks */
 	int			i;
 	int			fetchCount;
 	cqContext  *pcqCtx;

 	heapTupDesc = RelationGetDescr(heapRelation);
 	natts = RelationGetForm(heapRelation)->relnatts;

 	/*
 	 * allocate the new tuple descriptor
 	 */
 	indexTupDesc = CreateTemplateTupleDesc(numatts, false);

 	/*
 	 * For simple index columns, we copy the pg_attribute row from the parent
 	 * relation and modify it as necessary.  For expressions we have to cons
 	 * up a pg_attribute row the hard way.
 	 */
 	for (i = 0; i < numatts; i++)
 	{
 		AttrNumber	atnum = indexInfo->ii_KeyAttrNumbers[i];
 		Form_pg_attribute to = indexTupDesc->attrs[i];
 		HeapTuple	tuple;
 		Form_pg_type typeTup;
 		Oid			keyType;

 		if (atnum != 0)
 		{
 			/* Simple index column */
 			Form_pg_attribute from;

 			if (atnum < 0)
 			{
 				/*
 				 * here we are indexing on a system attribute (-1...-n)
 				 */
 				from = SystemAttributeDefinition(atnum,
 										   heapRelation->rd_rel->relhasoids);
 			}
 			else
 			{
 				/*
 				 * here we are indexing on a normal attribute (1...n)
 				 */
 				if (atnum > natts)		/* safety check */
 					elog(ERROR, "invalid column number %d", atnum);
 				from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
 			}

 			/*
 			 * now that we've determined the "from", let's copy the tuple desc
 			 * data...
 			 */
 			memcpy(to, from, ATTRIBUTE_TUPLE_SIZE);

 			/*
 			 * Fix the stuff that should not be the same as the underlying
 			 * attr
 			 */
 			to->attnum = i + 1;

 			to->attstattarget = -1;
 			to->attcacheoff = -1;
 			to->attnotnull = false;
 			to->atthasdef = false;
 			to->attislocal = true;
 			to->attinhcount = 0;
 		}
 		else
 		{
 			/* Expressional index */
 			Node	   *indexkey;

 			MemSet(to, 0, ATTRIBUTE_TUPLE_SIZE);

 			if (indexpr_item == NULL)	/* shouldn't happen */
 				elog(ERROR, "too few entries in indexprs list");
 			indexkey = (Node *) lfirst(indexpr_item);
 			indexpr_item = lnext(indexpr_item);

 			/*
 			 * Make the attribute's name "pg_expresssion_nnn" (maybe think of
 			 * something better later)
 			 */
 			sprintf(NameStr(to->attname), "pg_expression_%d", i + 1);

 			/*
 			 * Lookup the expression type in pg_type for the type length etc.
 			 */
 			keyType = exprType(indexkey);

 			pcqCtx = caql_beginscan(
 					NULL,
 					cql("SELECT * FROM pg_type "
 						" WHERE oid = :1 ",
 						ObjectIdGetDatum(keyType)));

 			tuple = caql_getnext(pcqCtx);

 			if (!HeapTupleIsValid(tuple))
 				elog(ERROR, "cache lookup failed for type %u", keyType);
 			typeTup = (Form_pg_type) GETSTRUCT(tuple);

 			/*
 			 * Assign some of the attributes values. Leave the rest as 0.
 			 */
 			to->attnum = i + 1;
 			to->atttypid = keyType;
 			to->attlen = typeTup->typlen;
 			to->attbyval = typeTup->typbyval;
 			to->attstorage = typeTup->typstorage;
 			to->attalign = typeTup->typalign;
 			to->attstattarget = -1;
 			to->attcacheoff = -1;
 			to->atttypmod = -1;
 			to->attislocal = true;

 			caql_endscan(pcqCtx);
 		}

 		/*
 		 * We do not yet have the correct relation OID for the index, so just
 		 * set it invalid for now.	InitializeAttributeOids() will fix it
 		 * later.
 		 */
 		to->attrelid = InvalidOid;

 		/*
 		 * Check the opclass to see if it provides a keytype (overriding the
 		 * attribute type).
 		 */
 		keyType = caql_getoid_plus(
 				NULL,
 				&fetchCount,
 				NULL,
 				cql("SELECT opckeytype FROM pg_opclass "
 					" WHERE oid = :1 ",
 					ObjectIdGetDatum(classObjectId[i])));

 		if (!fetchCount)
 			elog(ERROR, "cache lookup failed for opclass %u",
 				 classObjectId[i]);

 		if (OidIsValid(keyType) && keyType != to->atttypid)
 		{
 			/* index value and heap value have different types */
 			pcqCtx = caql_beginscan(
 					NULL,
 					cql("SELECT * FROM pg_type "
 						" WHERE oid = :1 ",
 						ObjectIdGetDatum(keyType)));

 			tuple = caql_getnext(pcqCtx);

 			if (!HeapTupleIsValid(tuple))
 				elog(ERROR, "cache lookup failed for type %u", keyType);
 			typeTup = (Form_pg_type) GETSTRUCT(tuple);

 			to->atttypid = keyType;
 			to->atttypmod = -1;
 			to->attlen = typeTup->typlen;
 			to->attbyval = typeTup->typbyval;
 			to->attalign = typeTup->typalign;
 			to->attstorage = typeTup->typstorage;

 			caql_endscan(pcqCtx);
 		}
 	}

 	return indexTupDesc;
 }

 /* ----------------------------------------------------------------
  *		InitializeAttributeOids
  * ----------------------------------------------------------------
  */
 static void
 InitializeAttributeOids(Relation indexRelation,
 						int numatts,
 						Oid indexoid)
 {
 	TupleDesc	tupleDescriptor;
 	int			i;

 	tupleDescriptor = RelationGetDescr(indexRelation);

 	for (i = 0; i < numatts; i += 1)
 		tupleDescriptor->attrs[i]->attrelid = indexoid;
 }

 /* ----------------------------------------------------------------
  *		AppendAttributeTuples
  * ----------------------------------------------------------------
  */
 static void
 AppendAttributeTuples(Relation indexRelation, int numatts)
 {
 	TupleDesc	indexTupDesc;
 	HeapTuple	new_tuple;
 	int			i;
 	cqContext  *pcqCtx;

 	/*
 	 * open the attribute relation and its indexes
 	 */
 	pcqCtx = caql_beginscan(
 			NULL,
 			cql("INSERT INTO pg_attribute ",
 				NULL));

 	/*
 	 * insert data from new index's tupdesc into pg_attribute
 	 */
 	indexTupDesc = RelationGetDescr(indexRelation);

 	for (i = 0; i < numatts; i++)
 	{
 		/*
 		 * There used to be very grotty code here to set these fields, but I
 		 * think it's unnecessary.  They should be set already.
 		 */
 		Assert(indexTupDesc->attrs[i]->attnum == i + 1);
 		Assert(indexTupDesc->attrs[i]->attcacheoff == -1);

 		new_tuple = heap_addheader(Natts_pg_attribute,
 								   false,
 								   ATTRIBUTE_TUPLE_SIZE,
 								   (void *) indexTupDesc->attrs[i]);

 		caql_insert(pcqCtx, new_tuple);
 		/* and Update indexes (implicit) */

 		heap_freetuple(new_tuple);
 	}

 	caql_endscan(pcqCtx); /* close rel, indexes */
 }

 /* ----------------------------------------------------------------
  *		UpdateIndexRelation
  *
  * Construct and insert a new entry in the pg_index catalog
  * ----------------------------------------------------------------
  */
 static void
 UpdateIndexRelation(Oid indexoid,
 					Oid heapoid,
 					IndexInfo *indexInfo,
 					Oid *classOids,
 					bool primary,
 					bool isvalid)
 {
 	int2vector *indkey;
 	oidvector  *indclass;
 	Datum		exprsDatum;
 	Datum		predDatum;
 	Datum		values[Natts_pg_index];
 	bool		nulls[Natts_pg_index];
 	HeapTuple	tuple;
 	int			i;
 	cqContext  *pcqCtx;
 	/*
 	 * Copy the index key and opclass info into arrays (should we make the
 	 * caller pass them like this to start with?)
 	 */
 	indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
 	indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
 		indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];

 	/*
 	 * Convert the index expressions (if any) to a text datum
 	 */
 	if (indexInfo->ii_Expressions != NIL)
 	{
 		char	   *exprsString;

 		exprsString = nodeToString(indexInfo->ii_Expressions);
 		exprsDatum = CStringGetTextDatum(exprsString);
 		pfree(exprsString);
 	}
 	else
 		exprsDatum = (Datum) 0;

 	/*
 	 * Convert the index predicate (if any) to a text datum.  Note we convert
 	 * implicit-AND format to normal explicit-AND for storage.
 	 */
 	if (indexInfo->ii_Predicate != NIL)
 	{
 		char	   *predString;

 		predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
 		predDatum = CStringGetTextDatum(predString);
 		pfree(predString);
 	}
 	else
 		predDatum = (Datum) 0;

 	/*
 	 * open the system catalog index relation
 	 */
 	pcqCtx = caql_beginscan(
 			NULL,
 			cql("INSERT INTO pg_index ",
 				NULL));

 	/*
 	 * Build a pg_index tuple
 	 */
 	MemSet(nulls, false, sizeof(nulls));

 	values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
 	values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
 	values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
 	values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
 	values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
 	values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
 	values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
 	values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
 	values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
 	values[Anum_pg_index_indexprs - 1] = exprsDatum;
 	if (exprsDatum == (Datum) 0)
 		nulls[Anum_pg_index_indexprs - 1] = true;
 	values[Anum_pg_index_indpred - 1] = predDatum;
 	if (predDatum == (Datum) 0)
 		nulls[Anum_pg_index_indpred - 1] = true;

 	tuple = caql_form_tuple(pcqCtx, values, nulls);

 	/*
 	 * insert the tuple into the pg_index catalog
 	 */
 	caql_insert(pcqCtx, tuple);
 	/* and Update indexes (implicit) */

 	/*
 	 * close the relation and free the tuple
 	 */
 	caql_endscan(pcqCtx);
 	heap_freetuple(tuple);
 }


 /*
  * index_create
  *
  * heapRelationId: OID of table to build index on
  * indexRelationName: what it say
  * indexRelationId: normally, pass InvalidOid to let this routine
  *		generate an OID for the index.	During bootstrap this may be
  *		nonzero to specify a preselected OID.
  * indexInfo: same info executor uses to insert into the index
  * accessMethodObjectId: OID of index AM to use
  * tableSpaceId: OID of tablespace to use
  * classObjectId: array of index opclass OIDs, one per index column
  * reloptions: AM-specific options
  * isprimary: index is a PRIMARY KEY
  * isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint
  * constrOid: constraint OID to use if isconstraint is true
  * allow_system_table_mods: allow table to be a system catalog
  * skip_build: true to skip the index_build() step for the moment; caller
  *		must do it later (typically via reindex_index())
  * concurrent: if true, do not lock the table against writers.	The index
  *		will be marked "invalid" and the caller must take additional steps
  *		to fix it up.
  *
  * Returns OID of the created index.
  */
 Oid
 index_create(Oid heapRelationId,
 			 const char *indexRelationName,
 			 Oid indexRelationId,
 			 struct IndexInfo *indexInfo,
 			 Oid accessMethodObjectId,
 			 Oid tableSpaceId,
 			 Oid *classObjectId,
 			 Datum reloptions,
 			 bool isprimary,
 			 bool isconstraint,
 			 Oid *constrOid,
 			 bool allow_system_table_mods,
 			 bool skip_build,
 			 bool concurrent,
 			 const char *altConName)
 {
 	Relation	pg_class;
 	Relation	gp_relfile_node;
 	Relation	heapRelation;
 	Relation	indexRelation;
 	TupleDesc	indexTupDesc;
 	bool		shared_relation;
 	Oid			namespaceId;
 	int			i;
 	LOCKMODE	heap_lockmode;

 	pg_class = heap_open(RelationRelationId, RowExclusiveLock);

 	if (!IsBootstrapProcessingMode())
 		gp_relfile_node = heap_open(GpRelfileNodeRelationId, RowExclusiveLock);
 	else
 		gp_relfile_node = NULL;

 	/*
 	 * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
 	 * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
 	 * (but not VACUUM).
 	 */
 	heap_lockmode = (concurrent ? ShareUpdateExclusiveLock : ShareLock);
 	heapRelation = heap_open(heapRelationId, heap_lockmode);


 	/*
 	 * The index will be in the same namespace as its parent table, and is
 	 * shared across databases if and only if the parent is.
 	 */
 	namespaceId = RelationGetNamespace(heapRelation);
 	shared_relation = heapRelation->rd_rel->relisshared;

 	/*
 	 * check parameters
 	 */
 	if (indexInfo->ii_NumIndexAttrs < 1)
 		elog(ERROR, "must index at least one column");

 	if (!allow_system_table_mods &&
 		IsSystemRelation(heapRelation) &&
 		IsNormalProcessingMode())
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("user-defined indexes on system catalog tables are not supported")));

 	/*
 	 * concurrent index build on a system catalog is unsafe because we tend to
 	 * release locks before committing in catalogs
 	 */
 	if (concurrent &&
 		IsSystemRelation(heapRelation))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("concurrent index creation on system catalog tables is not supported")));

 	/*
 	 * We cannot allow indexing a shared relation after initdb (because
 	 * there's no way to make the entry in other databases' pg_class),
 	 * except during upgrade.
 	 */
 	if (shared_relation &&  !(IsBootstrapProcessingMode() || gp_upgrade_mode))
 		ereport(ERROR,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("shared indexes cannot be created after initdb")));

 	if (get_relname_relid(indexRelationName, namespaceId))
 		ereport(ERROR,
 				(errcode(ERRCODE_DUPLICATE_TABLE),
 				 errmsg("relation \"%s\" already exists",
 						indexRelationName)));

 	/*
 	 * construct tuple descriptor for index tuples
 	 */
 	indexTupDesc = ConstructTupleDescriptor(heapRelation,
 											indexInfo,
 											classObjectId);

 	/*
 	 * Allocate an OID for the index, unless we were told what to use.
 	 *
 	 * The OID will be the relfilenode as well, so make sure it doesn't
 	 * collide with either pg_class OIDs or existing physical files.
 	 */
 	if (!OidIsValid(indexRelationId))
 		indexRelationId = GetNewRelFileNode(tableSpaceId, shared_relation,
 											pg_class, false);
 	else
 		if (IsUnderPostmaster)
 		{
 			CheckNewRelFileNodeIsOk(indexRelationId, tableSpaceId, shared_relation, pg_class, false);
 		}

 	/*
 	 * create the index relation's relcache entry and physical disk file. (If
 	 * we fail further down, it's the smgr's responsibility to remove the disk
 	 * file again.)
 	 */
 	indexRelation = heap_create(indexRelationName,
 								namespaceId,
 								tableSpaceId,
 								indexRelationId,
 								indexTupDesc,
 								accessMethodObjectId,
 								RELKIND_INDEX,
 								RELSTORAGE_HEAP,
 								shared_relation,
 								allow_system_table_mods,
 								/* bufferPoolBulkLoad */ false);

 	Assert(indexRelationId == RelationGetRelid(indexRelation));

 	/*
 	 * Obtain exclusive lock on it.  Although no other backends can see it
 	 * until we commit, this prevents deadlock-risk complaints from lock
 	 * manager in cases such as CLUSTER.
 	 */
 	LockRelation(indexRelation, AccessExclusiveLock);

 	/*
 	 * Fill in fields of the index's pg_class entry that are not set correctly
 	 * by heap_create.
 	 *
 	 * XXX should have a cleaner way to create cataloged indexes
 	 */
 	indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
 	indexRelation->rd_rel->relam = accessMethodObjectId;
 	indexRelation->rd_rel->relkind = RELKIND_INDEX;
 	indexRelation->rd_rel->relhasoids = false;

 	/*
 	 * store index's pg_class entry
 	 */
 	InsertPgClassTuple(pg_class, indexRelation,
 					   RelationGetRelid(indexRelation),
 					   reloptions);

 	/* done with pg_class */
 	heap_close(pg_class, RowExclusiveLock);

 	{							/* MPP-7575: track index creation */
 		bool	 doIt	= true;
 		char	*subtyp = "INDEX";

 		/* MPP-7576: don't track internal namespace tables */
 		switch (namespaceId)
 		{
 			case PG_CATALOG_NAMESPACE:
 				/* MPP-7773: don't track objects in system namespace
 				 * if modifying system tables (eg during upgrade)
 				 */
 				if (allowSystemTableModsDDL)
 					doIt = false;
 				break;

 			case PG_TOAST_NAMESPACE:
 			case PG_BITMAPINDEX_NAMESPACE:
 			case PG_AOSEGMENT_NAMESPACE:
 				doIt = false;
 				break;
 			default:
 				break;
 		}

 		if (doIt)
 			doIt = (!(isAnyTempNamespace(namespaceId)));

 		/* MPP-6929: metadata tracking */
 		if (doIt)
 			MetaTrackAddObject(RelationRelationId,
 							   RelationGetRelid(indexRelation),
 							   GetUserId(), /* not ownerid */
 							   "CREATE", subtyp
 					);
 	}

 	if (gp_relfile_node != NULL)
 	{
 		InsertGpRelfileNodeTuple(
 							gp_relfile_node,
 							indexRelation->rd_id,
 							indexRelation->rd_rel->relname.data,
 							indexRelation->rd_rel->relfilenode,
 							/* segmentFileNum */ 0,
 							/* updateIndex */ true,
 							&indexRelation->rd_relationnodeinfo.persistentTid,
 							indexRelation->rd_relationnodeinfo.persistentSerialNum);

 		heap_close(gp_relfile_node, RowExclusiveLock);
 	}

 	/*
 	 * now update the object id's of all the attribute tuple forms in the
 	 * index relation's tuple descriptor
 	 */
 	InitializeAttributeOids(indexRelation,
 							indexInfo->ii_NumIndexAttrs,
 							indexRelationId);

 	/*
 	 * append ATTRIBUTE tuples for the index
 	 */
 	AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);

 	/* ----------------
 	 *	  update pg_index
 	 *	  (append INDEX tuple)
 	 *
 	 *	  Note that this stows away a representation of "predicate".
 	 *	  (Or, could define a rule to maintain the predicate) --Nels, Feb '92
 	 * ----------------
 	 */
 	UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
 						classObjectId, isprimary, !concurrent);

 	/*
 	 * Register constraint and dependencies for the index.
 	 *
 	 * If the index is from a CONSTRAINT clause, construct a pg_constraint
 	 * entry. The index is then linked to the constraint, which in turn is
 	 * linked to the table.  If it's not a CONSTRAINT, make the dependency
 	 * directly on the table.
 	 *
 	 * We don't need a dependency on the namespace, because there'll be an
 	 * indirect dependency via our parent table.
 	 *
 	 * During bootstrap we can't register any dependencies, and we don't try
 	 * to make a constraint either.
 	 */
 	if (!IsBootstrapProcessingMode())
 	{
 		ObjectAddress myself,
 					referenced;

 		myself.classId = RelationRelationId;
 		myself.objectId = indexRelationId;
 		myself.objectSubId = 0;

 		if (isconstraint)
 		{
 			char		constraintType;
 			const char *constraintName = indexRelationName;

 			if ( altConName )
 			{
 				constraintName = altConName;
 			}

 			if (isprimary)
 				constraintType = CONSTRAINT_PRIMARY;
 			else if (indexInfo->ii_Unique)
 				constraintType = CONSTRAINT_UNIQUE;
 			else
 			{
 				elog(ERROR, "constraint must be PRIMARY or UNIQUE");
 				constraintType = 0;		/* keep compiler quiet */
 			}

 			/* Shouldn't have any expressions */
 			if (indexInfo->ii_Expressions)
 				elog(ERROR, "constraints can't have index expressions");

 			Insist(constrOid != NULL);
 			*constrOid = CreateConstraintEntry(constraintName,
 											   *constrOid,
 											   namespaceId,
 											   constraintType,
 											   false,		/* isDeferrable */
 											   false,		/* isDeferred */
 											   heapRelationId,
 											   indexInfo->ii_KeyAttrNumbers,
 											   indexInfo->ii_NumIndexAttrs,
 											   InvalidOid,	/* no domain */
 											   InvalidOid,	/* no foreign key */
 											   NULL,
 											   0,
 											   ' ',
 											   ' ',
 											   ' ',
 											   InvalidOid,	/* no associated index */
 											   NULL,		/* no check constraint */
 											   NULL,
 											   NULL);

 			referenced.classId = ConstraintRelationId;
 			referenced.objectId = *constrOid;
 			referenced.objectSubId = 0;

 			recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
 		}
 		else
 		{
 			bool		have_simple_col = false;

 			/* Create auto dependencies on simply-referenced columns */
 			for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
 			{
 				if (indexInfo->ii_KeyAttrNumbers[i] != 0)
 				{
 					referenced.classId = RelationRelationId;
 					referenced.objectId = heapRelationId;
 					referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];

 					recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);

 					have_simple_col = true;
 				}
 			}

 			/*
 			 * It's possible for an index to not depend on any columns of the
 			 * table at all, in which case we need to give it a dependency on
 			 * the table as a whole; else it won't get dropped when the table
 			 * is dropped.	This edge case is not totally useless; for
 			 * example, a unique index on a constant expression can serve to
 			 * prevent a table from containing more than one row.
 			 */
 			if (!have_simple_col &&
 				!contain_vars_of_level((Node *) indexInfo->ii_Expressions, 0) &&
 				!contain_vars_of_level((Node *) indexInfo->ii_Predicate, 0))
 			{
 				referenced.classId = RelationRelationId;
 				referenced.objectId = heapRelationId;
 				referenced.objectSubId = 0;

 				recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
 			}
 		}

 		/* Store dependency on operator classes */
 		for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
 		{
 			referenced.classId = OperatorClassRelationId;
 			referenced.objectId = classObjectId[i];
 			referenced.objectSubId = 0;

 			recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
 		}

 		/* Store dependencies on anything mentioned in index expressions */
 		if (indexInfo->ii_Expressions)
 		{
 			recordDependencyOnSingleRelExpr(&myself,
 										  (Node *) indexInfo->ii_Expressions,
 											heapRelationId,
 											DEPENDENCY_NORMAL,
 											DEPENDENCY_AUTO);
 		}

 		/* Store dependencies on anything mentioned in predicate */
 		if (indexInfo->ii_Predicate)
 		{
 			recordDependencyOnSingleRelExpr(&myself,
 											(Node *) indexInfo->ii_Predicate,
 											heapRelationId,
 											DEPENDENCY_NORMAL,
 											DEPENDENCY_AUTO);
 		}
 	}

 	/*
 	 * Advance the command counter so that we can see the newly-entered
 	 * catalog tuples for the index.
 	 */
 	CommandCounterIncrement();

 	/*
 	 * In bootstrap mode, we have to fill in the index strategy structure with
 	 * information from the catalogs.  If we aren't bootstrapping, then the
 	 * relcache entry has already been rebuilt thanks to sinval update during
 	 * CommandCounterIncrement.
 	 */
 	if (IsBootstrapProcessingMode())
 		RelationInitIndexAccessInfo(indexRelation);
 	else
 		Assert(indexRelation->rd_indexcxt != NULL);

 	/*
 	 * For upgrade, if we've already created the index in another database,
 	 * we don't need or want to recreate it.
 	 */
 	 if (gp_upgrade_mode && (RelationGetNumberOfBlocks(indexRelation) > 0))
 	 	skip_build = true;

 	/*
 	 * If this is bootstrap (initdb) time, then we don't actually fill in the
 	 * index yet.  We'll be creating more indexes and classes later, so we
 	 * delay filling them in until just before we're done with bootstrapping.
 	 * Similarly, if the caller specified skip_build then filling the index is
 	 * delayed till later (ALTER TABLE can save work in some cases with this).
 	 * Otherwise, we call the AM routine that constructs the index.
 	 */
 	if (IsBootstrapProcessingMode())
 	{
 		index_register(heapRelationId, indexRelationId, indexInfo);
 	}
 	else if (skip_build)
 	{
 		/*
 		 * Caller is responsible for filling the index later on.  However,
 		 * we'd better make sure that the heap relation is correctly marked as
 		 * having an index.
 		 */
 		index_update_stats(heapRelation,
 						   true,
 						   isprimary,
 						   InvalidOid,
 						   heapRelation->rd_rel->reltuples);
 		/* Make the above update visible */
 		CommandCounterIncrement();
 	}
 	else
 	{
 		index_build(heapRelation, indexRelation, indexInfo, isprimary);
 	}

 	/*
 	 * Close the heap and index; but we keep the locks that we acquired above
 	 * until end of transaction unless we're dealing with a child of a partition
 	 * table, in which case the lock on the master is sufficient.
 	 */
 	if (rel_needs_long_lock(RelationGetRelid(heapRelation)))
 	{
 		index_close(indexRelation, NoLock);
 		heap_close(heapRelation, NoLock);
 	}
 	else
 	{
 		index_close(indexRelation, AccessExclusiveLock);
 		heap_close(heapRelation, heap_lockmode);
 	}

 	return indexRelationId;
 }

 /*
  *		index_drop
  *
  * NOTE: this routine should now only be called through performDeletion(),
  * else associated dependencies won't be cleaned up.
  */
 void
 index_drop(Oid indexId)
 {
 	Oid			heapId;
 	Relation	userHeapRelation;
 	Relation	userIndexRelation;
 	HeapTuple	tuple;
 	bool		hasexprs;
 	bool		need_long_lock;
 	cqContext  *pcqCtx;

 	/*
 	 * To drop an index safely, we must grab exclusive lock on its parent
 	 * table; otherwise there could be other backends using the index!
 	 * Exclusive lock on the index alone is insufficient because another
 	 * backend might be in the midst of devising a query plan that will use
 	 * the index.  The parser and planner take care to hold an appropriate
 	 * lock on the parent table while working, but having them hold locks on
 	 * all the indexes too seems overly expensive.	We do grab exclusive lock
 	 * on the index too, just to be safe. Both locks must be held till end of
 	 * transaction, else other backends will still see this index in pg_index.
 	 */
 	heapId = IndexGetRelation(indexId);
 	userHeapRelation = heap_open(heapId, AccessExclusiveLock);

 	userIndexRelation = index_open(indexId, AccessExclusiveLock);


 	/*
 	 * TODO, in hawq, only MASTER_CONTENT_ID is used here,
 	 * will changed later depends on the design of index.
 	 */
 	if (!userIndexRelation->rd_relationnodeinfo.isPresent)
 		RelationFetchGpRelationNode(userIndexRelation);

 	/*
 	 * Schedule physical removal of the files
 	 */
 	MirroredFileSysObj_ScheduleDropBufferPoolRel(userIndexRelation);

 	DeleteGpRelfileNodeTuple(
 					userIndexRelation,
 					/* segmentFileNum */ 0);


 	/*
 	 * Close and flush the index's relcache entry, to ensure relcache doesn't
 	 * try to rebuild it while we're deleting catalog entries. We keep the
 	 * lock though.
 	 */
 	need_long_lock = rel_needs_long_lock(RelationGetRelid(userIndexRelation));
 	if (need_long_lock)
 		index_close(userIndexRelation, NoLock);
 	else
 		index_close(userIndexRelation, AccessExclusiveLock);

 	RelationForgetRelation(indexId);

 	/*
 	 * fix INDEX relation, and check for expressional index
 	 */
 	pcqCtx = caql_beginscan(
 			NULL,
 			cql("SELECT * FROM pg_index "
 				" WHERE indexrelid = :1 "
 				" FOR UPDATE ",
 				ObjectIdGetDatum(indexId)));

 	tuple = caql_getnext(pcqCtx);

 	if (!HeapTupleIsValid(tuple))
 		elog(ERROR, "cache lookup failed for index %u", indexId);

 	hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);

 	caql_delete_current(pcqCtx);

 	caql_endscan(pcqCtx);

 	/*
 	 * if it has any expression columns, we might have stored statistics about
 	 * them.
 	 */
 	if (hasexprs)
 		RemoveStatistics(indexId, 0);

 	/*
 	 * fix ATTRIBUTE relation
 	 */
 	DeleteAttributeTuples(indexId);

 	/*
 	 * fix RELATION relation
 	 */
 	DeleteRelationTuple(indexId);

 	/* MPP-6929: metadata tracking */
 	MetaTrackDropObject(RelationRelationId,
 						indexId);

 	/*
 	 * We are presently too lazy to attempt to compute the new correct value
 	 * of relhasindex (the next VACUUM will fix it if necessary). So there is
 	 * no need to update the pg_class tuple for the owning relation. But we
 	 * must send out a shared-cache-inval notice on the owning relation to
 	 * ensure other backends update their relcache lists of indexes.
 	 */
 	CacheInvalidateRelcache(userHeapRelation);

 	/*
 	 * Close owning rel, but keep lock
 	 */
 	heap_close(userHeapRelation, need_long_lock ? NoLock : AccessExclusiveLock);
 }

 /* ----------------------------------------------------------------
  *						index_build support
  * ----------------------------------------------------------------
  */

 /* ----------------
  *		BuildIndexInfo
  *			Construct an IndexInfo record for an open index
  *
  * IndexInfo stores the information about the index that's needed by
  * FormIndexDatum, which is used for both index_build() and later insertion
  * of individual index tuples.	Normally we build an IndexInfo for an index
  * just once per command, and then use it for (potentially) many tuples.
  * ----------------
  */
 struct IndexInfo *
 BuildIndexInfo(Relation index)
 {
 	IndexInfo  *ii = makeNode(IndexInfo);
 	Form_pg_index indexStruct = index->rd_index;
 	int			i;
 	int			numKeys;

 	/* check the number of keys, and copy attr numbers into the IndexInfo */
 	numKeys = indexStruct->indnatts;
 	if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
 		elog(ERROR, "invalid indnatts %d for index %u",
 			 numKeys, RelationGetRelid(index));
 	ii->ii_NumIndexAttrs = numKeys;
 	for (i = 0; i < numKeys; i++)
 		ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];

 	/* fetch any expressions needed for expressional indexes */
 	ii->ii_Expressions = RelationGetIndexExpressions(index);
 	ii->ii_ExpressionsState = NIL;

 	/* fetch index predicate if any */
 	ii->ii_Predicate = RelationGetIndexPredicate(index);
 	ii->ii_PredicateState = NIL;

 	/* other info */
 	ii->ii_Unique = indexStruct->indisunique;
 	ii->ii_Concurrent = false;	/* assume normal case */

 	ii->opaque = NULL;

 	return ii;
 }

 /* ----------------
  *		FormIndexDatum
  *			Construct values[] and isnull[] arrays for a new index tuple.
  *
  *	indexInfo		Info about the index
  *	slot			Heap tuple for which we must prepare an index entry
  *	estate			executor state for evaluating any index expressions
  *	values			Array of index Datums (output area)
  *	isnull			Array of is-null indicators (output area)
  *
  * When there are no index expressions, estate may be NULL.  Otherwise it
  * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
  * context must point to the heap tuple passed in.
  *
  * Notice we don't actually call index_form_tuple() here; we just prepare
  * its input arrays values[] and isnull[].	This is because the index AM
  * may wish to alter the data before storage.
  * ----------------
  */
 void
 FormIndexDatum(struct IndexInfo *indexInfo,
 			   TupleTableSlot *slot,
 			   struct EState *estate,
 			   Datum *values,
 			   bool *isnull)
 {
 	ListCell   *indexpr_item;
 	int			i;

 	if (indexInfo->ii_Expressions != NIL &&
 		indexInfo->ii_ExpressionsState == NIL)
 	{
 		/* First time through, set up expression evaluation state */
 		indexInfo->ii_ExpressionsState = (List *)
 			ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
 							estate);
 		/* Check caller has set up context correctly */
 		Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
 	}
 	indexpr_item = list_head(indexInfo->ii_ExpressionsState);

 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
 	{
 		int			keycol = indexInfo->ii_KeyAttrNumbers[i];
 		Datum		iDatum;
 		bool		isNull;

 		if (keycol != 0)
 		{
 			/*
 			 * Plain index column; get the value we need directly from the
 			 * heap tuple.
 			 */
 			iDatum = slot_getattr(slot, keycol, &isNull);
 		}
 		else
 		{
 			/*
 			 * Index expression --- need to evaluate it.
 			 */
 			if (indexpr_item == NULL)
 				elog(ERROR, "wrong number of index expressions");
 			iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
 											   GetPerTupleExprContext(estate),
 											   &isNull,
 											   NULL);
 			indexpr_item = lnext(indexpr_item);
 		}
 		values[i] = iDatum;
 		isnull[i] = isNull;
 	}

 	if (indexpr_item != NULL)
 		elog(ERROR, "wrong number of index expressions");
 }


 /*
  * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
  *
  * This routine updates the pg_class row of either an index or its parent
  * relation after CREATE INDEX or REINDEX.	Its rather bizarre API is designed
  * to ensure we can do all the necessary work in just one update.
  *
  * hasindex: set relhasindex to this value
  * isprimary: if true, set relhaspkey true; else no change
  * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
  *		else no change
  * reltuples: set reltuples to this value
  *
  * relpages is also updated (using RelationGetNumberOfBlocks()).
  *
  * NOTE: an important side-effect of this operation is that an SI invalidation
  * message is sent out to all backends --- including me --- causing relcache
  * entries to be flushed or updated with the new data.	This must happen even
  * if we find that no change is needed in the pg_class row.  When updating
  * a heap entry, this ensures that other backends find out about the new
  * index.  When updating an index, it's important because some index AMs
  * expect a relcache flush to occur after REINDEX.
  */
 static void
 index_update_stats(Relation rel, bool hasindex, bool isprimary,
 				   Oid reltoastidxid, double reltuples)
 {
 	BlockNumber relpages = RelationGetNumberOfBlocks(rel);
 	Oid			relid = RelationGetRelid(rel);
 	Relation	pg_class;
 	HeapTuple	tuple;
 	Form_pg_class rd_rel;
 	bool		dirty;

 	/*
 	 * We always update the pg_class row using a non-transactional,
 	 * overwrite-in-place update.  There are several reasons for this:
 	 *
 	 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
 	 *
 	 * 2. We could be reindexing pg_class itself, in which case we can't move
 	 * its pg_class row because CatalogUpdateIndexes might not know about all
 	 * the indexes yet (see reindex_relation).
 	 *
 	 * 3. Because we execute CREATE INDEX with just share lock on the parent
 	 * rel (to allow concurrent index creations), an ordinary update could
 	 * suffer a tuple-concurrently-updated failure against another CREATE
 	 * INDEX committing at about the same time.  We can avoid that by having
 	 * them both do nontransactional updates (we assume they will both be
 	 * trying to change the pg_class row to the same thing, so it doesn't
 	 * matter which goes first).
 	 *
 	 * 4. Even with just a single CREATE INDEX, there's a risk factor because
 	 * someone else might be trying to open the rel while we commit, and this
 	 * creates a race condition as to whether he will see both or neither of
 	 * the pg_class row versions as valid.	Again, a non-transactional update
 	 * avoids the risk.  It is indeterminate which state of the row the other
 	 * process will see, but it doesn't matter (if he's only taking
 	 * AccessShareLock, then it's not critical that he see relhasindex true).
 	 *
 	 * It is safe to use a non-transactional update even though our
 	 * transaction could still fail before committing.	Setting relhasindex
 	 * true is safe even if there are no indexes (VACUUM will eventually fix
 	 * it), and of course the relpages and reltuples counts are correct (or at
 	 * least more so than the old values) regardless.
 	 */

 	pg_class = heap_open(RelationRelationId, RowExclusiveLock);

 	/*
 	 * Make a copy of the tuple to update.	Normally we use the syscache, but
 	 * we can't rely on that during bootstrap or while reindexing pg_class
 	 * itself.
 	 */
 	if (IsBootstrapProcessingMode() ||
 		ReindexIsProcessingHeap(RelationRelationId))
 	{
 		/* don't assume syscache will work */
 		cqContext	cqc;

 		/* heapscan, noindex */
 		tuple = caql_getfirst(
 			caql_syscache(
 					caql_indexOK(caql_addrel(cqclr(&cqc), pg_class),
 								 false),
 					false),
 			cql("SELECT * FROM pg_class "
 				" WHERE oid = :1 "
 				" FOR UPDATE ",
 				ObjectIdGetDatum(relid)));
 	}
 	else
 	{
 		cqContext	cqc;

 		/* normal case, use syscache */
 		tuple = caql_getfirst(
 				caql_addrel(cqclr(&cqc), pg_class),
 			cql("SELECT * FROM pg_class "
 				" WHERE oid = :1 "
 				" FOR UPDATE ",
 				ObjectIdGetDatum(relid)));
 	}

 	if (!HeapTupleIsValid(tuple))
 		elog(ERROR, "could not find tuple for relation %u", relid);
 	rd_rel = (Form_pg_class) GETSTRUCT(tuple);

 	/* Apply required updates, if any, to copied tuple */

 	dirty = false;
 	if (rd_rel->relhasindex != hasindex)
 	{
 		rd_rel->relhasindex = hasindex;
 		dirty = true;
 	}
 	if (isprimary)
 	{
 		if (!rd_rel->relhaspkey)
 		{
 			rd_rel->relhaspkey = true;
 			dirty = true;
 		}
 	}
 	if (OidIsValid(reltoastidxid))
 	{
 		Assert(rd_rel->relkind == RELKIND_TOASTVALUE);
 		if (rd_rel->reltoastidxid != reltoastidxid)
 		{
 			rd_rel->reltoastidxid = reltoastidxid;
 			dirty = true;
 		}
 	}

 	if (Gp_role != GP_ROLE_DISPATCH)
 	{
 		/**
 		 * Do not overwrite relpages, reltuples in QD.
 		 */
 		if (rd_rel->reltuples != (float4) reltuples)
 		{
 			rd_rel->reltuples = (float4) reltuples;
 			dirty = true;
 		}
 		if (rd_rel->relpages != (int32) relpages)
 		{
 			rd_rel->relpages = (int32) relpages;
 			dirty = true;
 		}
 	}
 	/*
 	 * If anything changed, write out the tuple
 	 */
 	if (dirty)
 	{
 		heap_inplace_update(pg_class, tuple);
 		/* the above sends a cache inval message */
 	}
 	else
 	{
 		/* no need to change tuple, but force relcache inval anyway */
 		CacheInvalidateRelcacheByTuple(tuple);
 	}

 	heap_freetuple(tuple);

 	heap_close(pg_class, RowExclusiveLock);
 }

 /*
  * index_build - invoke access-method-specific index build procedure
  *
  * On entry, the index's catalog entries are valid, and its physical disk
  * file has been created but is empty.	We call the AM-specific build
  * procedure to fill in the index contents.  We then update the pg_class
  * entries of the index and heap relation as needed, using statistics
  * returned by ambuild as well as data passed by the caller.
  *
  * Note: when reindexing an existing index, isprimary can be false;
  * the index is already properly marked and need not be re-marked.
  *
  * Note: before Postgres 8.2, the passed-in heap and index Relations
  * were automatically closed by this routine.  This is no longer the case.
  * The caller opened 'em, and the caller should close 'em.
  */
 void
 index_build(Relation heapRelation,
 			Relation indexRelation,
 			IndexInfo *indexInfo,
 			bool isprimary)
 {
 	RegProcedure procedure;
 	IndexBuildResult *stats;
 	Oid			save_userid;
 	bool		save_secdefcxt;

 	/*
 	 * sanity checks
 	 */
 	Assert(RelationIsValid(indexRelation));
 	Assert(PointerIsValid(indexRelation->rd_am));

 	procedure = indexRelation->rd_am->ambuild;
 	Assert(RegProcedureIsValid(procedure));

 	/*
 	 * Switch to the table owner's userid, so that any index functions are
 	 * run as that user.
 	 */
 	GetUserIdAndContext(&save_userid, &save_secdefcxt);
 	SetUserIdAndContext(heapRelation->rd_rel->relowner, true);

 	/*
 	 * Call the access method's build procedure
 	 */
 	stats = (IndexBuildResult *)
 		DatumGetPointer(OidFunctionCall3(procedure,
 										 PointerGetDatum(heapRelation),
 										 PointerGetDatum(indexRelation),
 										 PointerGetDatum(indexInfo)));
 	Assert(PointerIsValid(stats));

 	/* Restore userid */
 	SetUserIdAndContext(save_userid, save_secdefcxt);

 	/*
 	 * Update heap and index pg_class rows
 	 */
 	index_update_stats(heapRelation,
 					   true,
 					   isprimary,
 					   (heapRelation->rd_rel->relkind == RELKIND_TOASTVALUE) ?
 					   RelationGetRelid(indexRelation) : InvalidOid,
 					   stats->heap_tuples);

 #if 0
 	/*
 	 * Update an AO segment or block directory index oid
 	 */
 	switch(heapRelation->rd_rel->relkind)
 	{
 		case RELKIND_AOSEGMENTS:
 			UpdateAppendOnlyEntryIdxid(RelationGetRelid(heapRelation),
 									   Anum_pg_appendonly_segidxid,
 									   RelationGetRelid(indexRelation));
 			break;
 		case RELKIND_AOBLOCKDIR:
 			UpdateAppendOnlyEntryIdxid(RelationGetRelid(heapRelation),
 									   Anum_pg_appendonly_blkdiridxid,
 									   RelationGetRelid(indexRelation));
 			break;
 		default:
 			/* do nothing */
 	}
 #endif

 	index_update_stats(indexRelation,
 					   false,
 					   false,
 					   InvalidOid,
 					   stats->index_tuples);

 	/* Make the updated versions visible */
 	CommandCounterIncrement();
 }

 /*
  * IndexBuildScan - scan the heap, or the append-only row, or the append-only
  * column relation to find tuples to be indexed.
  *
  * This is called back from an access-method-specific index build procedure
  * after the AM has done whatever setup it needs.  The parent relation
  * is scanned to find tuples that should be entered into the index.  Each
  * such tuple is passed to the AM's callback routine, which does the right
  * things to add it to the new index.  After we return, the AM's index
  * build procedure does whatever cleanup is needed; in particular, it should
  * close the heap and index relations.
  *
  * The total count of heap tuples is returned.	This is for updating pg_class
  * statistics.	(It's annoying not to be able to do that here, but we can't
  * do it until after the relation is closed.)  Note that the index AM itself
  * must keep track of the number of index tuples; we don't do so here because
  * the AM might reject some of the tuples for its own reasons, such as being
  * unable to store NULLs.
  */
 double
 IndexBuildScan(Relation parentRelation,
 			   Relation indexRelation,
 			   struct IndexInfo *indexInfo,
 			   IndexBuildCallback callback,
 			   void *callback_state)
 {
 	TupleTableSlot *slot;
 	EState	   *estate;
 	ExprContext *econtext;
 	Snapshot	snapshot;
 	TransactionId OldestXmin;
 	double reltuples = 0;

 	/*
 	 * sanity checks
 	 */
 	Assert(OidIsValid(indexRelation->rd_rel->relam));

 	/*
 	 * Need an EState for evaluation of index expressions and partial-index
 	 * predicates.	Also a slot to hold the current tuple.
 	 */
 	estate = CreateExecutorState();
 	econtext = GetPerTupleExprContext(estate);
 	slot = MakeSingleTupleTableSlot(RelationGetDescr(parentRelation));

 	/* Arrange for econtext's scan tuple to be the tuple under test */
 	econtext->ecxt_scantuple = slot;

 	/*
 	 * Prepare for scan of the base relation.  In a normal index build, we use
 	 * SnapshotAny because we must retrieve all tuples and do our own time
 	 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
 	 * concurrent build, we take a regular MVCC snapshot and index whatever's
 	 * live according to that.	During bootstrap we just use SnapshotNow.
 	 *
 	 * If the relation is an append-only table, we use a regular MVCC snapshot
 	 * and index what is actually in the table.
 	 */
 	if (IsBootstrapProcessingMode())
 	{
 		snapshot = SnapshotNow;
 		OldestXmin = InvalidTransactionId;		/* not used */
 	}
 	else if (indexInfo->ii_Concurrent ||
 			 RelationIsAoRows(parentRelation))
 	{
 		snapshot = CopySnapshot(GetTransactionSnapshot());
 		OldestXmin = InvalidTransactionId;		/* not used */
 	}
 	else
 	{
 		snapshot = SnapshotAny;
 		/* okay to ignore lazy VACUUMs here */
 		OldestXmin = GetOldestXmin(parentRelation->rd_rel->relisshared);
 	}

 	if (RelationIsHeap(parentRelation))
 		reltuples = IndexBuildHeapScan(parentRelation,
 									   indexRelation,
 									   indexInfo,
 									   estate,
 									   snapshot,
 									   OldestXmin,
 									   callback,
 									   callback_state);

 	else if (RelationIsAoRows(parentRelation))
 		reltuples = IndexBuildAppendOnlyRowScan(parentRelation,
 												indexRelation,
 												indexInfo,
 												estate,
 												snapshot,
 												callback,
 												callback_state);

 	ExecDropSingleTupleTableSlot(slot);
 	FreeExecutorState(estate);

 	/* These may have been pointing to the now-gone estate */
 	indexInfo->ii_ExpressionsState = NIL;
 	indexInfo->ii_PredicateState = NIL;

 	return reltuples;
 }


 /*
  * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
  *
  * This is called back from an access-method-specific index build procedure
  * after the AM has done whatever setup it needs.  The parent heap relation
  * is scanned to find tuples that should be entered into the index.  Each
  * such tuple is passed to the AM's callback routine, which does the right
  * things to add it to the new index.  After we return, the AM's index
  * build procedure does whatever cleanup is needed; in particular, it should
  * close the heap and index relations.
  *
  * The total count of heap tuples is returned.	This is for updating pg_class
  * statistics.	(It's annoying not to be able to do that here, but we can't
  * do it until after the relation is closed.)  Note that the index AM itself
  * must keep track of the number of index tuples; we don't do so here because
  * the AM might reject some of the tuples for its own reasons, such as being
  * unable to store NULLs.
  */
 static double
 IndexBuildHeapScan(Relation heapRelation,
 				   Relation indexRelation,
 				   struct IndexInfo *indexInfo,
 				   EState *estate,
 				   Snapshot snapshot,
 				   TransactionId OldestXmin,
 				   IndexBuildCallback callback,
 				   void *callback_state)
 {
 	MIRROREDLOCK_BUFMGR_DECLARE;

 	HeapScanDesc scan;
 	HeapTuple	heapTuple;
 	Datum		values[INDEX_MAX_KEYS];
 	bool		isnull[INDEX_MAX_KEYS];
 	double		reltuples;
 	List	   *predicate = NIL;
 	ExprContext *econtext;
 	TupleTableSlot *slot;

 	Assert(estate->es_per_tuple_exprcontext != NULL);
 	econtext = estate->es_per_tuple_exprcontext;
 	slot = econtext->ecxt_scantuple;

 	/* Set up execution state for predicate, if any. */
 	predicate = (List *)
 		ExecPrepareExpr((Expr *) indexInfo->ii_Predicate, estate);

 	scan = heap_beginscan(heapRelation, /* relation */
 						  snapshot,		/* seeself */
 						  0,	/* number of keys */
 						  NULL);	/* scan key */

 	reltuples = 0;

 	/*
 	 * Scan all tuples in the base relation.
 	 */
 	while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 	{
 		bool		tupleIsAlive;

 		CHECK_FOR_INTERRUPTS();

 		if (snapshot == SnapshotAny)
 		{
 			/* do our own time qual check */
 			bool		indexIt;

 			/*
 			 * We could possibly get away with not locking the buffer here,
 			 * since caller should hold ShareLock on the relation, but let's
 			 * be conservative about it.
 			 */

 			// -------- MirroredLock ----------
 			MIRROREDLOCK_BUFMGR_LOCK;

 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);

 			switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
 											 scan->rs_cbuf, true))
 			{
 				case HEAPTUPLE_DEAD:
 					/* Definitely dead, we can ignore it */
 					indexIt = false;
 					tupleIsAlive = false;
 					break;
 				case HEAPTUPLE_LIVE:
 					/* Normal case, index and unique-check it */
 					indexIt = true;
 					tupleIsAlive = true;
 					break;
 				case HEAPTUPLE_RECENTLY_DEAD:

 					/*
 					 * If tuple is recently deleted then we must index it
 					 * anyway to preserve MVCC semantics.  (Pre-existing
 					 * transactions could try to use the index after we finish
 					 * building it, and may need to see such tuples.) Exclude
 					 * it from unique-checking, however.
 					 */
 					indexIt = true;
 					tupleIsAlive = false;
 					break;
 				case HEAPTUPLE_INSERT_IN_PROGRESS:

 					/*
 					 * Since caller should hold ShareLock or better, we should
 					 * not see any tuples inserted by open transactions ---
 					 * unless it's our own transaction. (Consider INSERT
 					 * followed by CREATE INDEX within a transaction.)	An
 					 * exception occurs when reindexing a system catalog,
 					 * because we often release lock on system catalogs before
 					 * committing.
 					 */
 					if (!TransactionIdIsCurrentTransactionId(
 								   HeapTupleHeaderGetXmin(heapTuple->t_data))
 						&& !IsSystemRelation(heapRelation))
 						elog(ERROR, "concurrent insert in progress");
 					indexIt = true;
 					tupleIsAlive = true;
 					break;
 				case HEAPTUPLE_DELETE_IN_PROGRESS:

 					/*
 					 * Since caller should hold ShareLock or better, we should
 					 * not see any tuples deleted by open transactions ---
 					 * unless it's our own transaction. (Consider DELETE
 					 * followed by CREATE INDEX within a transaction.)	An
 					 * exception occurs when reindexing a system catalog,
 					 * because we often release lock on system catalogs before
 					 * committing.
 					 *
 					 * XXX we also skip the check for any bitmap indexes.
 					 */
 					Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
 					if (!TransactionIdIsCurrentTransactionId(
 								   HeapTupleHeaderGetXmax(heapTuple->t_data))
 						&& !IsSystemRelation(heapRelation)
 						&& (!RelationIsBitmapIndex(indexRelation)))
 						elog(ERROR, "concurrent delete in progress");
 					indexIt = true;
 					tupleIsAlive = false;
 					break;
 				default:
 					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
 					indexIt = tupleIsAlive = false;		/* keep compiler quiet */
 					break;
 			}

 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);

 			MIRROREDLOCK_BUFMGR_UNLOCK;
 			// -------- MirroredLock ----------

 			if (!indexIt)
 				continue;
 		}
 		else
 		{
 			/* heap_getnext did the time qual check */
 			tupleIsAlive = true;
 		}

 		reltuples += 1;

 		MemoryContextReset(econtext->ecxt_per_tuple_memory);

 		/* Set up for predicate or expression evaluation */
 		ExecStoreGenericTuple(heapTuple, slot, false);

 		/*
 		 * In a partial index, discard tuples that don't satisfy the
 		 * predicate.
 		 */
 		if (predicate != NIL)
 		{
 			if (!ExecQual(predicate, econtext, false))
 				continue;
 		}

 		/*
 		 * For the current heap tuple, extract all the attributes we use in
 		 * this index, and note which are null.  This also performs evaluation
 		 * of any expressions needed.
 		 */
 		FormIndexDatum(indexInfo,
 					   slot,
 					   estate,
 					   values,
 					   isnull);

 		/*
 		 * You'd think we should go ahead and build the index tuple here, but
 		 * some index AMs want to do further processing on the data first.	So
 		 * pass the values[] and isnull[] arrays, instead.
 		 */

 		/* Call the AM's callback routine to process the tuple */
 		callback(indexRelation, slot_get_ctid(slot),
 				 values, isnull, tupleIsAlive, callback_state);
 	}

 	heap_endscan(scan);

 	return reltuples;
 }

 /*
  * IndexBuildAppendOnlyRowScan - scan the Append-Only Row relation to find
  * tuples to be indexed.
  *
  * If the block directory of the append-only relation does not exist, it is
  * created here. This occurs when the append-only relation is upgraded from
  * pre-3.4 release.
  */
 static double
 IndexBuildAppendOnlyRowScan(Relation parentRelation,
 							Relation indexRelation,
 							struct IndexInfo *indexInfo,
 							EState *estate,
 							Snapshot snapshot,
 							IndexBuildCallback callback,
 							void *callback_state)
 {
 	List *predicate = NIL;
 	ExprContext *econtext;
 	struct AppendOnlyScanDescData *aoscan;
 	TupleTableSlot *slot;
 	double reltuples = 0;
 	Datum		values[INDEX_MAX_KEYS];
 	bool		isnull[INDEX_MAX_KEYS];

 	Assert(estate->es_per_tuple_exprcontext != NULL);
 	econtext = estate->es_per_tuple_exprcontext;
 	slot = econtext->ecxt_scantuple;

 	/* Set up execution state for predicate, if any */
 	predicate = (List *)
 		ExecPrepareExpr((Expr *)indexInfo->ii_Predicate, estate);

 	aoscan = appendonly_beginscan(parentRelation,
 								  snapshot,
 								  0,
 								  NULL);

 	while (appendonly_getnext(aoscan, ForwardScanDirection, slot) != NULL)
 	{
 		CHECK_FOR_INTERRUPTS();

 		reltuples++;

 		MemoryContextReset(econtext->ecxt_per_tuple_memory);

 		if (predicate != NIL)
 		{
 			if (!ExecQual(predicate, econtext, false))
 				continue;
 		}

 		/*
 		 * For the current heap tuple, extract all the attributes we use in
 		 * this index, and note which are null.  This also performs evaluation
 		 * of any expressions needed.
 		 */
 		FormIndexDatum(indexInfo,
 					   slot,
 					   estate,
 					   values,
 					   isnull);

 		/*
 		 * You'd think we should go ahead and build the index tuple here, but
 		 * some index AMs want to do further processing on the data first.	So
 		 * pass the values[] and isnull[] arrays, instead.
 		 */
 		Assert(ItemPointerIsValid(slot_get_ctid(slot)));

 		/* Call the AM's callback routine to process the tuple */
 		callback(indexRelation, slot_get_ctid(slot),
 				 values, isnull, true, callback_state);
 	}

 	appendonly_endscan(aoscan);

 	return reltuples;
 }


 /*
  * validate_index - support code for concurrent index builds
  *
  * We do a concurrent index build by first building the index normally via
  * index_create(), while holding a weak lock that allows concurrent
  * insert/update/delete.  Also, we index only tuples that are valid
  * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
  * build takes care to include recently-dead tuples.  This is OK because
  * we won't mark the index valid until all transactions that might be able
  * to see those tuples are gone.  The reason for doing that is to avoid
  * bogus unique-index failures due to concurrent UPDATEs (we might see
  * different versions of the same row as being valid when we pass over them,
  * if we used HeapTupleSatisfiesVacuum).  This leaves us with an index that
  * does not contain any tuples added to the table while we built the index.
  *
  * Next, we commit the transaction so that the index becomes visible to other
  * backends, but it is marked not "indisvalid" to prevent the planner from
  * relying on it for indexscans.  Then we wait for all transactions that
  * could have been modifying the table to terminate.  At this point we
  * know that any subsequently-started transactions will see the index and
  * insert their new tuples into it.  We then take a new reference snapshot
  * which is passed to validate_index().  Any tuples that are valid according
  * to this snap, but are not in the index, must be added to the index.
  * (Any tuples committed live after the snap will be inserted into the
  * index by their originating transaction.	Any tuples committed dead before
  * the snap need not be indexed, because we will wait out all transactions
  * that might care about them before we mark the index valid.)
  *
  * validate_index() works by first gathering all the TIDs currently in the
  * index, using a bulkdelete callback that just stores the TIDs and doesn't
  * ever say "delete it".  (This should be faster than a plain indexscan;
  * also, not all index AMs support full-index indexscan.)  Then we sort the
  * TIDs, and finally scan the table doing a "merge join" against the TID list
  * to see which tuples are missing from the index.	Thus we will ensure that
  * all tuples valid according to the reference snapshot are in the index.
  *
  * Building a unique index this way is tricky: we might try to insert a
  * tuple that is already dead or is in process of being deleted, and we
  * mustn't have a uniqueness failure against an updated version of the same
  * row.  We can check the tuple to see if it's already dead and tell
  * index_insert() not to do the uniqueness check, but that still leaves us
  * with a race condition against an in-progress update.  To handle that,
  * we expect the index AM to recheck liveness of the to-be-inserted tuple
  * before it declares a uniqueness error.
  *
  * After completing validate_index(), we wait until all transactions that
  * were alive at the time of the reference snapshot are gone; this is
  * necessary to be sure there are none left with a serializable snapshot
  * older than the reference (and hence possibly able to see tuples we did
  * not index).	Then we mark the index valid and commit.
  *
  * Doing two full table scans is a brute-force strategy.  We could try to be
  * cleverer, eg storing new tuples in a special area of the table (perhaps
  * making the table append-only by setting use_fsm).  However that would
  * add yet more locking issues.
  */
 void
 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
 {
 	Relation	heapRelation,
 				indexRelation;
 	IndexInfo  *indexInfo;
 	IndexVacuumInfo ivinfo;
 	v_i_state	state;
 	Oid			save_userid;
 	bool		save_secdefcxt;

 	/* Open and lock the parent heap relation */
 	heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
 	/* And the target index relation */
 	indexRelation = index_open(indexId, RowExclusiveLock);

 	/*
 	 * Fetch info needed for index_insert.	(You might think this should be
 	 * passed in from DefineIndex, but its copy is long gone due to having
 	 * been built in a previous transaction.)
 	 */
 	indexInfo = BuildIndexInfo(indexRelation);

 	/* mark build is concurrent just for consistency */
 	indexInfo->ii_Concurrent = true;

 	/*
 	 * Switch to the table owner's userid, so that any index functions are
 	 * run as that user.
 	 */
 	GetUserIdAndContext(&save_userid, &save_secdefcxt);
 	SetUserIdAndContext(heapRelation->rd_rel->relowner, true);

 	/*
 	 * Scan the index and gather up all the TIDs into a tuplesort object.
 	 */
 	ivinfo.index = indexRelation;
 	ivinfo.vacuum_full = false;
 	ivinfo.message_level = DEBUG2;
 	ivinfo.num_heap_tuples = -1;
 	ivinfo.extra_oids = NIL;
 	state.tuplesort = NULL;

 	PG_TRY();
 	{
 		if(gp_enable_mk_sort)
 			state.tuplesort = tuplesort_begin_datum_mk(NULL,
 					TIDOID,
 					TIDLessOperator,
 					maintenance_work_mem,
 					false);
 		else
 			state.tuplesort = tuplesort_begin_datum(TIDOID,
 					TIDLessOperator,
 					maintenance_work_mem,
 					false);

 		state.htups = state.itups = state.tups_inserted = 0;

 		(void) index_bulk_delete(&ivinfo, NULL,
 				validate_index_callback, (void *) &state);

 		/* Execute the sort */
 		if(gp_enable_mk_sort)
 		{
 			tuplesort_performsort_mk((Tuplesortstate_mk *)state.tuplesort);
 		}
 		else
 		{
 			tuplesort_performsort((Tuplesortstate *) state.tuplesort);
 		}

 		/*
 		 * Now scan the heap and "merge" it with the index
 		 */
 		validate_index_heapscan(heapRelation,
 				indexRelation,
 				indexInfo,
 				snapshot,
 				&state);

 		/* Done with tuplesort object */
 		if(gp_enable_mk_sort)
 		{
 			tuplesort_end_mk((Tuplesortstate_mk *)state.tuplesort);
 		}
 		else
 		{
 			tuplesort_end((Tuplesortstate *) state.tuplesort);
 		}

 		state.tuplesort = NULL;

 	}
 	PG_CATCH();
 	{
 		/* Clean up the sort state on error */
 		if (state.tuplesort)
 		{
 			if(gp_enable_mk_sort)
 			{
 				tuplesort_end_mk((Tuplesortstate_mk *)state.tuplesort);
 			}
 			else
 			{
 				tuplesort_end((Tuplesortstate *) state.tuplesort);
 			}
 			state.tuplesort = NULL;
 		}
 		PG_RE_THROW();
 	}
 	PG_END_TRY();

 	elog(DEBUG2,
 		 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
 		 state.htups, state.itups, state.tups_inserted);

 	/* Restore userid */
 	SetUserIdAndContext(save_userid, save_secdefcxt);

 	/* Close rels, but keep locks */
 	index_close(indexRelation, NoLock);
 	heap_close(heapRelation, NoLock);
 }

 /*
  * validate_index_callback - bulkdelete callback to collect the index TIDs
  */
 static bool
 validate_index_callback(ItemPointer itemptr, void *opaque)
 {
 	v_i_state  *state = (v_i_state *) opaque;

 	if(gp_enable_mk_sort)
 		tuplesort_putdatum_mk((Tuplesortstate_mk *) state->tuplesort, PointerGetDatum(itemptr), false);
 	else
 		tuplesort_putdatum((Tuplesortstate *) state->tuplesort, PointerGetDatum(itemptr), false);

 	state->itups += 1;
 	return false;				/* never actually delete anything */
 }

 /*
  * validate_index_heapscan - second table scan for concurrent index build
  *
  * This has much code in common with IndexBuildHeapScan, but it's enough
  * different that it seems cleaner to have two routines not one.
  */
 static void
 validate_index_heapscan(Relation heapRelation,
 						Relation indexRelation,
 						IndexInfo *indexInfo,
 						Snapshot snapshot,
 						v_i_state *state)
 {
 	MIRROREDLOCK_BUFMGR_DECLARE;

 	HeapScanDesc scan;
 	HeapTuple	heapTuple;
 	Datum		values[INDEX_MAX_KEYS];
 	bool		isnull[INDEX_MAX_KEYS];
 	List	   *predicate;
 	TupleTableSlot *slot;
 	EState	   *estate;
 	ExprContext *econtext;

 	/* state variables for the merge */
 	ItemPointer indexcursor = NULL;
 	bool		tuplesort_empty = false;

 	/*
 	 * sanity checks
 	 */
 	Assert(OidIsValid(indexRelation->rd_rel->relam));

 	/*
 	 * Need an EState for evaluation of index expressions and partial-index
 	 * predicates.	Also a slot to hold the current tuple.
 	 */
 	estate = CreateExecutorState();
 	econtext = GetPerTupleExprContext(estate);
 	slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));

 	/* Arrange for econtext's scan tuple to be the tuple under test */
 	econtext->ecxt_scantuple = slot;

 	/* Set up execution state for predicate, if any. */
 	predicate = (List *)
 		ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
 						estate);

 	/*
 	 * Prepare for scan of the base relation.  We need just those tuples
 	 * satisfying the passed-in reference snapshot.
 	 */
 	scan = heap_beginscan(heapRelation, /* relation */
 						  snapshot,		/* seeself */
 						  0,	/* number of keys */
 						  NULL);	/* scan key */

 	/*
 	 * Scan all tuples matching the snapshot.
 	 */
 	while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 	{
 		ItemPointer heapcursor = &heapTuple->t_self;

 		CHECK_FOR_INTERRUPTS();

 		state->htups += 1;

 		/*
 		 * "merge" by skipping through the index tuples until we find or pass
 		 * the current heap tuple.
 		 */
 		while (!tuplesort_empty &&
 			   (!indexcursor ||
 				ItemPointerCompare(indexcursor, heapcursor) < 0))
 		{
 			Datum		ts_val;
 			bool		ts_isnull;

 			if (indexcursor)
 				pfree(indexcursor);

 			if(gp_enable_mk_sort)
 				tuplesort_empty = !tuplesort_getdatum_mk((Tuplesortstate_mk *) state->tuplesort,
 						true, &ts_val, &ts_isnull);
 			else
 				tuplesort_empty = !tuplesort_getdatum((Tuplesortstate *) state->tuplesort,
 						true, &ts_val, &ts_isnull);

 			Assert(tuplesort_empty || !ts_isnull);
 			indexcursor = (ItemPointer) DatumGetPointer(ts_val);
 		}

 		if (tuplesort_empty ||
 			ItemPointerCompare(indexcursor, heapcursor) > 0)
 		{
 			/*
 			 * We've overshot which means this heap tuple is missing from the
 			 * index, so insert it.
 			 */
 			bool		check_unique;

 			MemoryContextReset(econtext->ecxt_per_tuple_memory);

 			/* Set up for predicate or expression evaluation */
 			ExecStoreGenericTuple(heapTuple, slot, false);

 			/*
 			 * In a partial index, discard tuples that don't satisfy the
 			 * predicate.
 			 */
 			if (predicate != NIL)
 			{
 				if (!ExecQual(predicate, econtext, false))
 					continue;
 			}

 			/*
 			 * For the current heap tuple, extract all the attributes we use
 			 * in this index, and note which are null.	This also performs
 			 * evaluation of any expressions needed.
 			 */
 			FormIndexDatum(indexInfo,
 						   slot,
 						   estate,
 						   values,
 						   isnull);

 			/*
 			 * If the tuple is already committed dead, we still have to put it
 			 * in the index (because some xacts might be able to see it), but
 			 * we might as well suppress uniqueness checking. This is just an
 			 * optimization because the index AM is not supposed to raise a
 			 * uniqueness failure anyway.
 			 */
 			if (indexInfo->ii_Unique)
 			{

 				// -------- MirroredLock ----------
 				MIRROREDLOCK_BUFMGR_LOCK;

 				/* must hold a buffer lock to call HeapTupleSatisfiesNow */
 				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);

 				if (HeapTupleSatisfiesNow(scan->rs_rd, heapTuple->t_data, scan->rs_cbuf))
 					check_unique = true;
 				else
 					check_unique = false;

 				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);

 				MIRROREDLOCK_BUFMGR_UNLOCK;
 				// -------- MirroredLock ----------

 			}
 			else
 				check_unique = false;

 			/*
 			 * You'd think we should go ahead and build the index tuple here,
 			 * but some index AMs want to do further processing on the data
 			 * first. So pass the values[] and isnull[] arrays, instead.
 			 */
 			index_insert(indexRelation,
 						 values,
 						 isnull,
 						 heapcursor,
 						 heapRelation,
 						 check_unique);

 			state->tups_inserted += 1;
 		}
 	}

 	heap_endscan(scan);

 	ExecDropSingleTupleTableSlot(slot);

 	FreeExecutorState(estate);

 	/* These may have been pointing to the now-gone estate */
 	indexInfo->ii_ExpressionsState = NIL;
 	indexInfo->ii_PredicateState = NIL;
 }


 /*
  * IndexGetRelation: given an index's relation OID, get the OID of the
  * relation it is an index on.	Uses the system cache.
  */
 Oid
 IndexGetRelation(Oid indexId)
 {
 	HeapTuple	tuple;
 	Form_pg_index index;
 	Oid			result;
 	cqContext  *pcqCtx;

 	pcqCtx = caql_beginscan(
 			NULL,
 			cql("SELECT * FROM pg_index "
 				" WHERE indexrelid = :1 ",
 				ObjectIdGetDatum(indexId)));

 	tuple = caql_getnext(pcqCtx);

 	if (!HeapTupleIsValid(tuple))
 		elog(ERROR, "cache lookup failed for index %u", indexId);
 	index = (Form_pg_index) GETSTRUCT(tuple);
 	Assert(index->indexrelid == indexId);

 	result = index->indrelid;

 	caql_endscan(pcqCtx);
 	return result;
 }

 /*
  * createIndexInfoOpaque: create the opaque value in indexInfo
  * based on the given list of OIDs passed from reindex_index().
  *
  * The extra_oids contains 2 OID values. They are used by
  * the bitmap indexes to create their internal heap and btree.
  * See reindex_index() for more info.
  */
 static void
 createIndexInfoOpaque(List *extra_oids,
 					  bool isBitmapIndex,
 					  IndexInfo *indexInfo)
 {
 	Assert(extra_oids != NULL &&
 		   list_length(extra_oids) == 2);
 	Assert(indexInfo != NULL);
 	Assert(indexInfo->opaque == NULL);

 	indexInfo->opaque = (void*)palloc0(sizeof(IndexInfoOpaque));

 	ListCell *lc = list_head(extra_oids);

 	((IndexInfoOpaque *)indexInfo->opaque)->heapRelfilenode =
 		lfirst_oid(lc);
 	lc = lnext(lc);
 	((IndexInfoOpaque *)indexInfo->opaque)->indexRelfilenode =
 		lfirst_oid(lc);
 	lc = lnext(lc);

 #ifdef USE_ASSERT_CHECKING
 	if (isBitmapIndex)
 	{
 		Assert(OidIsValid(((IndexInfoOpaque *)indexInfo->opaque)->heapRelfilenode));
 		Assert(OidIsValid(((IndexInfoOpaque *)indexInfo->opaque)->indexRelfilenode));
 	}

 	else
 	{
 		Assert(!OidIsValid(((IndexInfoOpaque *)indexInfo->opaque)->heapRelfilenode));
 		Assert(!OidIsValid(((IndexInfoOpaque *)indexInfo->opaque)->indexRelfilenode));
 	}
 #endif
 }

 /*
  * generateExtraOids: generate the given number of extra Oids.
  *
  * If genNewOid is true, all generated OIDs will be valid. Otherwise,
  * all OIDs will be InvalidOid.
  */
 static List *
 generateExtraOids(int num_extra_oids,
 				  Oid reltablespace,
 				  bool relisshared,
 				  bool genNewOid)
 {
 	List *extra_oids = NIL;

 	Assert(num_extra_oids > 0);

 	for (int no = 0; no < num_extra_oids; no++)
 	{
 		Oid newOid = InvalidOid;
 		if (genNewOid)
 			newOid = GetNewRelFileNode(reltablespace,
 									   relisshared,
 									   NULL,
 									   false);

 		extra_oids = lappend_oid(extra_oids, newOid);
 	}

 	return extra_oids;
 }

 /*
  * reindex_index - This routine is used to recreate a single index.
  *
  * GPDB: we return the new relfilenode for transmission to QEs. If
  * newrelfilenode is valid, we use that Oid instead.
  *
  * XXX The bitmap index requires two additional oids for its internal
  * heap and index. We pass those in as extra_oids. If there are no
  * such oids, this function generates them and pass them out to
  * the caller.
  *
  * The extra_oids list always contain 2 values. If the index is
  * a bitmap index, those two values are valid OIDs. Otherwise,
  * they are InvalidOids.
  */
 Oid
 reindex_index(Oid indexId, Oid newrelfilenode, List **extra_oids)
 {
 	Relation		iRel,
 					heapRelation,
 					pg_index;
 	Oid				heapId;
 	bool			inplace;
 	HeapTuple		indexTuple;
 	Form_pg_index	indexForm;
 	Oid				retrelfilenode;
 	Oid				namespaceId;
 	cqContext		cqc;
 	cqContext	   *pcqCtx;

 	Assert(OidIsValid(indexId));
 	Assert(extra_oids != NULL);

 	/*
 	 * Open and lock the parent heap relation.	ShareLock is sufficient since
 	 * we only need to be sure no schema or data changes are going on.
 	 */
 	heapId = IndexGetRelation(indexId);
 	heapRelation = heap_open(heapId, ShareLock);

 	namespaceId = RelationGetNamespace(heapRelation);

 	/*
 	 * Open the target index relation and get an exclusive lock on it, to
 	 * ensure that no one else is touching this particular index.
 	 */
 	iRel = index_open(indexId, AccessExclusiveLock);

 	/*
 	 * Don't allow reindex on temp tables of other backends ... their local
 	 * buffer manager is not going to cope.
 	 */
 	if (isOtherTempNamespace(RelationGetNamespace(iRel)))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("cannot reindex temporary tables of other sessions")));

 	/*
 	 * Also check for active uses of the index in the current transaction;
 	 * we don't want to reindex underneath an open indexscan.
 	 */
 	CheckTableNotInUse(iRel, "REINDEX INDEX");

 	/*
 	 * If it's a shared index, we must do inplace processing (because we have
 	 * no way to update relfilenode in other databases).  Otherwise we can do
 	 * it the normal transaction-safe way.
 	 *
 	 * Since inplace processing isn't crash-safe, we only allow it in a
 	 * standalone backend.	(In the REINDEX TABLE and REINDEX DATABASE cases,
 	 * the caller should have detected this.)
 	 *
 	 * MPP: If we are in a standalone backend always perform reindex operations
 	 * in place.  In postgres this only applies to shared relations, for
 	 * Greenplum we apply it to all tables as a means of enabling upgrade to
 	 * filerep: it is required to reindex gp_relation_node in place before it
 	 * is possible to populate the gp_persistent tables.
 	 */
 	inplace = iRel->rd_rel->relisshared || !IsUnderPostmaster;

 	if (inplace && IsUnderPostmaster)
 		ereport(ERROR,
 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 				 errmsg("shared index \"%s\" can only be reindexed in stand-alone mode",
 						RelationGetRelationName(iRel))));

 	PG_TRY();
 	{
 		IndexInfo  *indexInfo;

 		/* Suppress use of the target index while rebuilding it */
 		SetReindexProcessing(heapId, indexId);

 		/* Fetch info needed for index_build */
 		indexInfo = BuildIndexInfo(iRel);

 		if (inplace)
 		{
 			/* Truncate the actual file (and discard buffers) */

 			RelationTruncate(
 						iRel,
 						0,
 						/* markPersistentAsPhysicallyTruncated */ true);

 			retrelfilenode = iRel->rd_rel->relfilenode;
 			Assert(retrelfilenode == newrelfilenode ||
 				   !OidIsValid(newrelfilenode));
 		}
 		else
 		{
 			/*
 			 * We'll build a new physical relation for the index.
 			 */
 			if (OidIsValid(newrelfilenode))
 			{
 				setNewRelfilenodeToOid(iRel, newrelfilenode);
 				retrelfilenode = newrelfilenode;
 			}
 			else
 			{
 				retrelfilenode = setNewRelfilenode(iRel);

 				Assert(*extra_oids == NULL);

 				/*
 				 * If this is a bitmap index, we generate two more relfilenodes
 				 * for its internal heap and index.
 				 */
 				*extra_oids = generateExtraOids(2,
 												iRel->rd_rel->reltablespace,
 												iRel->rd_rel->relisshared,
 												RelationIsBitmapIndex(iRel));

 			}


 			/* Store extra_oids into indexInfo->opaque */
 			createIndexInfoOpaque(*extra_oids,
 								  RelationIsBitmapIndex(iRel),
 								  indexInfo);
 		}

 		/* Initialize the index and rebuild */
 		/* Note: we do not need to re-establish pkey setting */
 		index_build(heapRelation, iRel, indexInfo, false);
 	}
 	PG_CATCH();
 	{
 		/* Make sure flag gets cleared on error exit */
 		ResetReindexProcessing();
 		PG_RE_THROW();
 	}
 	PG_END_TRY();
 	ResetReindexProcessing();

 	/*
 	 * If the index is marked invalid (ie, it's from a failed CREATE INDEX
 	 * CONCURRENTLY), we can now mark it valid.  This allows REINDEX to be
 	 * used to clean up in such cases.
 	 */
 	pg_index = heap_open(IndexRelationId, RowExclusiveLock);

 	pcqCtx = caql_addrel(cqclr(&cqc), pg_index);

 	indexTuple = caql_getfirst(pcqCtx,
 							   cql("SELECT * FROM pg_index "
 								   " WHERE indexrelid = :1 "
 								   " FOR UPDATE ",
 								   ObjectIdGetDatum(indexId)));

 	if (!HeapTupleIsValid(indexTuple))
 		elog(ERROR, "cache lookup failed for index %u", indexId);
 	indexForm = (Form_pg_index) GETSTRUCT(indexTuple);

 	if (!indexForm->indisvalid)
 	{
 		indexForm->indisvalid = true;
 		caql_update_current(pcqCtx, indexTuple);
 		/* and Update indexes (implicit) */
 	}
 	heap_close(pg_index, RowExclusiveLock);

 	{
 		bool	 doIt	= true;
 		char	*subtyp = "REINDEX";

 		/* MPP-7576: don't track internal namespace tables */
 		switch (namespaceId)
 		{
 			case PG_CATALOG_NAMESPACE:
 				/* MPP-7773: don't track objects in system namespace
 				 * if modifying system tables (eg during upgrade)
 				 */
 				if (allowSystemTableModsDDL)
 					doIt = false;
 				break;

 			case PG_TOAST_NAMESPACE:
 			case PG_BITMAPINDEX_NAMESPACE:
 			case PG_AOSEGMENT_NAMESPACE:
 				doIt = false;
 				break;
 			default:
 				break;
 		}

 		if (doIt)
 			doIt = (!(isAnyTempNamespace(namespaceId)));

 		/* MPP-6929: metadata tracking */
 		/* MPP-7587: treat as a VACUUM operation, since the index is
 		 * rebuilt */
 		if (doIt)
 			MetaTrackUpdObject(RelationRelationId,
 							   indexId,
 							   GetUserId(), /* not ownerid */
 							   "VACUUM", subtyp
 					);
 	}


 	/* Close rels, but keep locks */
 	index_close(iRel, NoLock);
 	heap_close(heapRelation, NoLock);

 	return retrelfilenode;
 }

 /*
  * reindex_relation - This routine is used to recreate all indexes
  * of a relation (and optionally its toast relation too, if any).
  *
  * Returns true if any indexes were rebuilt.  Note that a
  * CommandCounterIncrement will occur after each index rebuild.
  *
  * If build_map is true, build a map of index relation OID -> new relfilenode.
  * If it is false but *oidmap is valid and we're on a QE, use the
  * new relfilenode specified in the map.
  */
 bool
 reindex_relation(Oid relid, bool toast_too, bool aoseg_too, bool aoblkdir_too,
 				 List **oidmap, bool build_map)
 {
 	Relation	rel;
 	Oid			toast_relid;
 	Oid			aoseg_relid = InvalidOid;
 	Oid         aoblkdir_relid = InvalidOid;
 	bool		is_pg_class;
 	bool		result;
 	List	   *indexIds,
 			   *doneIndexes;
 	ListCell   *indexId;
 	bool relIsAO = false;

 	/*
 	 * Open and lock the relation.	ShareLock is sufficient since we only need
 	 * to prevent schema and data changes in it.
 	 */
 	rel = heap_open(relid, ShareLock);

 	relIsAO = (RelationIsAoRows(rel) || RelationIsParquet(rel));

 	toast_relid = rel->rd_rel->reltoastrelid;

 	/*
 	 * Get the list of index OIDs for this relation.  (We trust to the
 	 * relcache to get this with a sequential scan if ignoring system
 	 * indexes.)
 	 */
 	indexIds = RelationGetIndexList(rel);

 	/*
 	 * reindex_index will attempt to update the pg_class rows for the relation
 	 * and index.  If we are processing pg_class itself, we want to make sure
 	 * that the updates do not try to insert index entries into indexes we
 	 * have not processed yet.	(When we are trying to recover from corrupted
 	 * indexes, that could easily cause a crash.) We can accomplish this
 	 * because CatalogUpdateIndexes will use the relcache's index list to know
 	 * which indexes to update. We just force the index list to be only the
 	 * stuff we've processed.
 	 *
 	 * It is okay to not insert entries into the indexes we have not processed
 	 * yet because all of this is transaction-safe.  If we fail partway
 	 * through, the updated rows are dead and it doesn't matter whether they
 	 * have index entries.	Also, a new pg_class index will be created with an
 	 * entry for its own pg_class row because we do setNewRelfilenode() before
 	 * we do index_build().
 	 *
 	 * Note that we also clear pg_class's rd_oidindex until the loop is done,
 	 * so that that index can't be accessed either.  This means we cannot
 	 * safely generate new relation OIDs while in the loop; shouldn't be a
 	 * problem.
 	 */
 	is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
 	doneIndexes = NIL;

 	/* Reindex all the indexes. */
 	foreach(indexId, indexIds)
 	{
 		Oid			indexOid = lfirst_oid(indexId);
 		Oid			newrelfilenode;
 		Oid			mapoid = InvalidOid;
 		List        *extra_oids = NIL;

 		if (is_pg_class)
 			RelationSetIndexList(rel, doneIndexes, InvalidOid);

 		if (Gp_role == GP_ROLE_EXECUTE && !build_map && oidmap &&
 			*oidmap)
 		{
 			ListCell *c;

 			/* Yes, this is O(N^2) but N is small */
 			foreach(c, *oidmap)
 			{
 				List *map = lfirst(c);
 				Oid ind = linitial_oid(map);

 				if (ind == indexOid)
 				{
 					mapoid = lsecond_oid(map);

 					/*
 					 * The map should contain more than 2 OIDs (the OID of the
 					 * index and its new relfilenode), to support the bitmap
 					 * index, see reindex_index() for more info. Construct
 					 * the extra_oids list by skipping the first two OIDs.
 					 */
 					Assert(list_length(map) > 2);
 					extra_oids = list_copy_tail(map, 2);

 					break;
 				}
 			}
 			Assert(OidIsValid(mapoid));
 		}

 		elog(DEBUG5, "reindexing index with OID %u (supplied %u as new OID)",
 			 indexOid, mapoid);

 		newrelfilenode = reindex_index(indexOid, mapoid, &extra_oids);

 		Assert(!OidIsValid(mapoid) || newrelfilenode == mapoid);

 		CommandCounterIncrement();

 		if (oidmap && build_map)
 		{
 			List *map = list_make2_oid(indexOid, newrelfilenode);

 			Assert(extra_oids != NULL);
 			map = list_concat(map, extra_oids);

 			*oidmap = lappend(*oidmap, map);
 		}

 		if (is_pg_class)
 			doneIndexes = lappend_oid(doneIndexes, indexOid);
 	}

 	if (is_pg_class)
 		RelationSetIndexList(rel, indexIds, ClassOidIndexId);

 	/*
 	 * Close rel, but continue to hold the lock.
 	 */
 	heap_close(rel, NoLock);

 	result = (indexIds != NIL);

 	/*
 	 * If the relation has a secondary toast rel, reindex that too while we
 	 * still hold the lock on the master table.
 	 */
 	if (toast_too && OidIsValid(toast_relid))
 		result |= reindex_relation(toast_relid, false, false, false, oidmap, build_map);

 	/* Obtain the aoseg_relid and aoblkdir_relid if the relation is an AO table. */
 	if ((aoseg_too || aoblkdir_too) && relIsAO)
 		GetAppendOnlyEntryAuxOids(relid, SnapshotNow,
 								  &aoseg_relid, NULL, &aoblkdir_relid, NULL);

 	/*
 	 * If an AO rel has a secondary segment list rel, reindex that too while we
 	 * still hold the lock on the master table.
 	 */
 	if (aoseg_too && OidIsValid(aoseg_relid))
 		result |= reindex_relation(aoseg_relid, false, false, false, oidmap, build_map);

 	/*
 	 * If an AO rel has a secondary block directory rel, reindex that too while we
 	 * still hold the lock on the master table.
 	 */
 	if (aoblkdir_too && OidIsValid(aoblkdir_relid))
 		result |= reindex_relation(aoblkdir_relid, false, false, false, oidmap, build_map);

 	return result;
 }