| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /*------------------------------------------------------------------------- |
| * |
| * index.c |
| * code to create and destroy POSTGRES index relations |
| * |
| * Portions Copyright (c) 2006-2009, Greenplum inc |
| * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.274 2006/10/04 00:29:50 momjian Exp $ |
| * |
| * |
| * INTERFACE ROUTINES |
| * index_create() - Create a cataloged index relation |
| * index_drop() - Removes index relation from catalogs |
| * BuildIndexInfo() - Prepare to insert index tuples |
| * FormIndexDatum() - Construct datum vector for one index tuple |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include <unistd.h> |
| |
| #include "access/genam.h" |
| #include "access/heapam.h" |
| #include "access/relscan.h" |
| #include "access/sysattr.h" |
| #include "access/transam.h" |
| #include "access/xact.h" |
| #include "bootstrap/bootstrap.h" |
| #include "catalog/catalog.h" |
| #include "catalog/catquery.h" |
| #include "catalog/dependency.h" |
| #include "catalog/heap.h" |
| #include "catalog/index.h" |
| #include "catalog/indexing.h" |
| #include "catalog/namespace.h" |
| #include "catalog/pg_constraint.h" |
| #include "catalog/pg_namespace.h" |
| #include "catalog/pg_operator.h" |
| #include "catalog/pg_opclass.h" |
| #include "catalog/pg_tablespace.h" |
| #include "catalog/pg_type.h" |
| #include "commands/tablecmds.h" |
| #include "executor/executor.h" |
| #include "miscadmin.h" |
| #include "optimizer/clauses.h" |
| #include "optimizer/var.h" |
| #include "parser/parse_expr.h" |
| #include "storage/procarray.h" |
| #include "storage/smgr.h" |
| #include "utils/builtins.h" |
| #include "utils/fmgroids.h" |
| #include "utils/inval.h" |
| #include "utils/lsyscache.h" |
| #include "utils/memutils.h" |
| #include "utils/relcache.h" |
| #include "utils/syscache.h" |
| #include "utils/tuplesort.h" |
| |
| #include "cdb/cdbvars.h" |
| #include "cdb/cdbanalyze.h" |
| #include "cdb/cdboidsync.h" |
| #include "cdb/cdbappendonlyam.h" |
| |
| #include "cdb/cdbmirroredfilesysobj.h" |
| |
| /* state info for validate_index bulkdelete callback */ |
| typedef struct |
| { |
| void *tuplesort; /* for sorting the index TIDs */ |
| /* statistics (for debug purposes only): */ |
| double htups, |
| itups, |
| tups_inserted; |
| } v_i_state; |
| |
| /* non-export function prototypes */ |
| static TupleDesc ConstructTupleDescriptor(Relation heapRelation, |
| IndexInfo *indexInfo, |
| Oid *classObjectId); |
| static void InitializeAttributeOids(Relation indexRelation, |
| int numatts, Oid indexoid); |
| static void AppendAttributeTuples(Relation indexRelation, int numatts); |
| static void UpdateIndexRelation(Oid indexoid, Oid heapoid, |
| IndexInfo *indexInfo, |
| Oid *classOids, |
| bool primary, |
| bool isvalid); |
| static void index_update_stats(Relation rel, bool hasindex, bool isprimary, |
| Oid reltoastidxid, double reltuples); |
| static bool validate_index_callback(ItemPointer itemptr, void *opaque); |
| static void validate_index_heapscan(Relation heapRelation, |
| Relation indexRelation, |
| IndexInfo *indexInfo, |
| Snapshot snapshot, |
| v_i_state *state); |
| static double IndexBuildHeapScan(Relation heapRelation, |
| Relation indexRelation, |
| struct IndexInfo *indexInfo, |
| EState *estate, |
| Snapshot snapshot, |
| TransactionId OldestXmin, |
| IndexBuildCallback callback, |
| void *callback_state); |
| static double IndexBuildAppendOnlyRowScan(Relation parentRelation, |
| Relation indexRelation, |
| struct IndexInfo *indexInfo, |
| EState *estate, |
| Snapshot snapshot, |
| IndexBuildCallback callback, |
| void *callback_state); |
| |
| |
| /* |
| * ConstructTupleDescriptor |
| * |
| * Build an index tuple descriptor for a new index |
| */ |
| static TupleDesc |
| ConstructTupleDescriptor(Relation heapRelation, |
| IndexInfo *indexInfo, |
| Oid *classObjectId) |
| { |
| int numatts = indexInfo->ii_NumIndexAttrs; |
| ListCell *indexpr_item = list_head(indexInfo->ii_Expressions); |
| TupleDesc heapTupDesc; |
| TupleDesc indexTupDesc; |
| int natts; /* #atts in heap rel --- for error checks */ |
| int i; |
| int fetchCount; |
| cqContext *pcqCtx; |
| |
| heapTupDesc = RelationGetDescr(heapRelation); |
| natts = RelationGetForm(heapRelation)->relnatts; |
| |
| /* |
| * allocate the new tuple descriptor |
| */ |
| indexTupDesc = CreateTemplateTupleDesc(numatts, false); |
| |
| /* |
| * For simple index columns, we copy the pg_attribute row from the parent |
| * relation and modify it as necessary. For expressions we have to cons |
| * up a pg_attribute row the hard way. |
| */ |
| for (i = 0; i < numatts; i++) |
| { |
| AttrNumber atnum = indexInfo->ii_KeyAttrNumbers[i]; |
| Form_pg_attribute to = indexTupDesc->attrs[i]; |
| HeapTuple tuple; |
| Form_pg_type typeTup; |
| Oid keyType; |
| |
| if (atnum != 0) |
| { |
| /* Simple index column */ |
| Form_pg_attribute from; |
| |
| if (atnum < 0) |
| { |
| /* |
| * here we are indexing on a system attribute (-1...-n) |
| */ |
| from = SystemAttributeDefinition(atnum, |
| heapRelation->rd_rel->relhasoids); |
| } |
| else |
| { |
| /* |
| * here we are indexing on a normal attribute (1...n) |
| */ |
| if (atnum > natts) /* safety check */ |
| elog(ERROR, "invalid column number %d", atnum); |
| from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)]; |
| } |
| |
| /* |
| * now that we've determined the "from", let's copy the tuple desc |
| * data... |
| */ |
| memcpy(to, from, ATTRIBUTE_TUPLE_SIZE); |
| |
| /* |
| * Fix the stuff that should not be the same as the underlying |
| * attr |
| */ |
| to->attnum = i + 1; |
| |
| to->attstattarget = -1; |
| to->attcacheoff = -1; |
| to->attnotnull = false; |
| to->atthasdef = false; |
| to->attislocal = true; |
| to->attinhcount = 0; |
| } |
| else |
| { |
| /* Expressional index */ |
| Node *indexkey; |
| |
| MemSet(to, 0, ATTRIBUTE_TUPLE_SIZE); |
| |
| if (indexpr_item == NULL) /* shouldn't happen */ |
| elog(ERROR, "too few entries in indexprs list"); |
| indexkey = (Node *) lfirst(indexpr_item); |
| indexpr_item = lnext(indexpr_item); |
| |
| /* |
| * Make the attribute's name "pg_expresssion_nnn" (maybe think of |
| * something better later) |
| */ |
| sprintf(NameStr(to->attname), "pg_expression_%d", i + 1); |
| |
| /* |
| * Lookup the expression type in pg_type for the type length etc. |
| */ |
| keyType = exprType(indexkey); |
| |
| pcqCtx = caql_beginscan( |
| NULL, |
| cql("SELECT * FROM pg_type " |
| " WHERE oid = :1 ", |
| ObjectIdGetDatum(keyType))); |
| |
| tuple = caql_getnext(pcqCtx); |
| |
| if (!HeapTupleIsValid(tuple)) |
| elog(ERROR, "cache lookup failed for type %u", keyType); |
| typeTup = (Form_pg_type) GETSTRUCT(tuple); |
| |
| /* |
| * Assign some of the attributes values. Leave the rest as 0. |
| */ |
| to->attnum = i + 1; |
| to->atttypid = keyType; |
| to->attlen = typeTup->typlen; |
| to->attbyval = typeTup->typbyval; |
| to->attstorage = typeTup->typstorage; |
| to->attalign = typeTup->typalign; |
| to->attstattarget = -1; |
| to->attcacheoff = -1; |
| to->atttypmod = -1; |
| to->attislocal = true; |
| |
| caql_endscan(pcqCtx); |
| } |
| |
| /* |
| * We do not yet have the correct relation OID for the index, so just |
| * set it invalid for now. InitializeAttributeOids() will fix it |
| * later. |
| */ |
| to->attrelid = InvalidOid; |
| |
| /* |
| * Check the opclass to see if it provides a keytype (overriding the |
| * attribute type). |
| */ |
| keyType = caql_getoid_plus( |
| NULL, |
| &fetchCount, |
| NULL, |
| cql("SELECT opckeytype FROM pg_opclass " |
| " WHERE oid = :1 ", |
| ObjectIdGetDatum(classObjectId[i]))); |
| |
| if (!fetchCount) |
| elog(ERROR, "cache lookup failed for opclass %u", |
| classObjectId[i]); |
| |
| if (OidIsValid(keyType) && keyType != to->atttypid) |
| { |
| /* index value and heap value have different types */ |
| pcqCtx = caql_beginscan( |
| NULL, |
| cql("SELECT * FROM pg_type " |
| " WHERE oid = :1 ", |
| ObjectIdGetDatum(keyType))); |
| |
| tuple = caql_getnext(pcqCtx); |
| |
| if (!HeapTupleIsValid(tuple)) |
| elog(ERROR, "cache lookup failed for type %u", keyType); |
| typeTup = (Form_pg_type) GETSTRUCT(tuple); |
| |
| to->atttypid = keyType; |
| to->atttypmod = -1; |
| to->attlen = typeTup->typlen; |
| to->attbyval = typeTup->typbyval; |
| to->attalign = typeTup->typalign; |
| to->attstorage = typeTup->typstorage; |
| |
| caql_endscan(pcqCtx); |
| } |
| } |
| |
| return indexTupDesc; |
| } |
| |
| /* ---------------------------------------------------------------- |
| * InitializeAttributeOids |
| * ---------------------------------------------------------------- |
| */ |
| static void |
| InitializeAttributeOids(Relation indexRelation, |
| int numatts, |
| Oid indexoid) |
| { |
| TupleDesc tupleDescriptor; |
| int i; |
| |
| tupleDescriptor = RelationGetDescr(indexRelation); |
| |
| for (i = 0; i < numatts; i += 1) |
| tupleDescriptor->attrs[i]->attrelid = indexoid; |
| } |
| |
| /* ---------------------------------------------------------------- |
| * AppendAttributeTuples |
| * ---------------------------------------------------------------- |
| */ |
| static void |
| AppendAttributeTuples(Relation indexRelation, int numatts) |
| { |
| TupleDesc indexTupDesc; |
| HeapTuple new_tuple; |
| int i; |
| cqContext *pcqCtx; |
| |
| /* |
| * open the attribute relation and its indexes |
| */ |
| pcqCtx = caql_beginscan( |
| NULL, |
| cql("INSERT INTO pg_attribute ", |
| NULL)); |
| |
| /* |
| * insert data from new index's tupdesc into pg_attribute |
| */ |
| indexTupDesc = RelationGetDescr(indexRelation); |
| |
| for (i = 0; i < numatts; i++) |
| { |
| /* |
| * There used to be very grotty code here to set these fields, but I |
| * think it's unnecessary. They should be set already. |
| */ |
| Assert(indexTupDesc->attrs[i]->attnum == i + 1); |
| Assert(indexTupDesc->attrs[i]->attcacheoff == -1); |
| |
| new_tuple = heap_addheader(Natts_pg_attribute, |
| false, |
| ATTRIBUTE_TUPLE_SIZE, |
| (void *) indexTupDesc->attrs[i]); |
| |
| caql_insert(pcqCtx, new_tuple); |
| /* and Update indexes (implicit) */ |
| |
| heap_freetuple(new_tuple); |
| } |
| |
| caql_endscan(pcqCtx); /* close rel, indexes */ |
| } |
| |
| /* ---------------------------------------------------------------- |
| * UpdateIndexRelation |
| * |
| * Construct and insert a new entry in the pg_index catalog |
| * ---------------------------------------------------------------- |
| */ |
| static void |
| UpdateIndexRelation(Oid indexoid, |
| Oid heapoid, |
| IndexInfo *indexInfo, |
| Oid *classOids, |
| bool primary, |
| bool isvalid) |
| { |
| int2vector *indkey; |
| oidvector *indclass; |
| Datum exprsDatum; |
| Datum predDatum; |
| Datum values[Natts_pg_index]; |
| bool nulls[Natts_pg_index]; |
| HeapTuple tuple; |
| int i; |
| cqContext *pcqCtx; |
| /* |
| * Copy the index key and opclass info into arrays (should we make the |
| * caller pass them like this to start with?) |
| */ |
| indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs); |
| indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs); |
| for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) |
| indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i]; |
| |
| /* |
| * Convert the index expressions (if any) to a text datum |
| */ |
| if (indexInfo->ii_Expressions != NIL) |
| { |
| char *exprsString; |
| |
| exprsString = nodeToString(indexInfo->ii_Expressions); |
| exprsDatum = CStringGetTextDatum(exprsString); |
| pfree(exprsString); |
| } |
| else |
| exprsDatum = (Datum) 0; |
| |
| /* |
| * Convert the index predicate (if any) to a text datum. Note we convert |
| * implicit-AND format to normal explicit-AND for storage. |
| */ |
| if (indexInfo->ii_Predicate != NIL) |
| { |
| char *predString; |
| |
| predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate)); |
| predDatum = CStringGetTextDatum(predString); |
| pfree(predString); |
| } |
| else |
| predDatum = (Datum) 0; |
| |
| /* |
| * open the system catalog index relation |
| */ |
| pcqCtx = caql_beginscan( |
| NULL, |
| cql("INSERT INTO pg_index ", |
| NULL)); |
| |
| /* |
| * Build a pg_index tuple |
| */ |
| MemSet(nulls, false, sizeof(nulls)); |
| |
| values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid); |
| values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid); |
| values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs); |
| values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique); |
| values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary); |
| values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false); |
| values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid); |
| values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey); |
| values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass); |
| values[Anum_pg_index_indexprs - 1] = exprsDatum; |
| if (exprsDatum == (Datum) 0) |
| nulls[Anum_pg_index_indexprs - 1] = true; |
| values[Anum_pg_index_indpred - 1] = predDatum; |
| if (predDatum == (Datum) 0) |
| nulls[Anum_pg_index_indpred - 1] = true; |
| |
| tuple = caql_form_tuple(pcqCtx, values, nulls); |
| |
| /* |
| * insert the tuple into the pg_index catalog |
| */ |
| caql_insert(pcqCtx, tuple); |
| /* and Update indexes (implicit) */ |
| |
| /* |
| * close the relation and free the tuple |
| */ |
| caql_endscan(pcqCtx); |
| heap_freetuple(tuple); |
| } |
| |
| |
| /* |
| * index_create |
| * |
| * heapRelationId: OID of table to build index on |
| * indexRelationName: what it say |
| * indexRelationId: normally, pass InvalidOid to let this routine |
| * generate an OID for the index. During bootstrap this may be |
| * nonzero to specify a preselected OID. |
| * indexInfo: same info executor uses to insert into the index |
| * accessMethodObjectId: OID of index AM to use |
| * tableSpaceId: OID of tablespace to use |
| * classObjectId: array of index opclass OIDs, one per index column |
| * reloptions: AM-specific options |
| * isprimary: index is a PRIMARY KEY |
| * isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint |
| * constrOid: constraint OID to use if isconstraint is true |
| * allow_system_table_mods: allow table to be a system catalog |
| * skip_build: true to skip the index_build() step for the moment; caller |
| * must do it later (typically via reindex_index()) |
| * concurrent: if true, do not lock the table against writers. The index |
| * will be marked "invalid" and the caller must take additional steps |
| * to fix it up. |
| * |
| * Returns OID of the created index. |
| */ |
| Oid |
| index_create(Oid heapRelationId, |
| const char *indexRelationName, |
| Oid indexRelationId, |
| struct IndexInfo *indexInfo, |
| Oid accessMethodObjectId, |
| Oid tableSpaceId, |
| Oid *classObjectId, |
| Datum reloptions, |
| bool isprimary, |
| bool isconstraint, |
| Oid *constrOid, |
| bool allow_system_table_mods, |
| bool skip_build, |
| bool concurrent, |
| const char *altConName) |
| { |
| Relation pg_class; |
| Relation gp_relfile_node; |
| Relation heapRelation; |
| Relation indexRelation; |
| TupleDesc indexTupDesc; |
| bool shared_relation; |
| Oid namespaceId; |
| int i; |
| LOCKMODE heap_lockmode; |
| |
| pg_class = heap_open(RelationRelationId, RowExclusiveLock); |
| |
| if (!IsBootstrapProcessingMode()) |
| gp_relfile_node = heap_open(GpRelfileNodeRelationId, RowExclusiveLock); |
| else |
| gp_relfile_node = NULL; |
| |
| /* |
| * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard |
| * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE |
| * (but not VACUUM). |
| */ |
| heap_lockmode = (concurrent ? ShareUpdateExclusiveLock : ShareLock); |
| heapRelation = heap_open(heapRelationId, heap_lockmode); |
| |
| |
| /* |
| * The index will be in the same namespace as its parent table, and is |
| * shared across databases if and only if the parent is. |
| */ |
| namespaceId = RelationGetNamespace(heapRelation); |
| shared_relation = heapRelation->rd_rel->relisshared; |
| |
| /* |
| * check parameters |
| */ |
| if (indexInfo->ii_NumIndexAttrs < 1) |
| elog(ERROR, "must index at least one column"); |
| |
| if (!allow_system_table_mods && |
| IsSystemRelation(heapRelation) && |
| IsNormalProcessingMode()) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("user-defined indexes on system catalog tables are not supported"))); |
| |
| /* |
| * concurrent index build on a system catalog is unsafe because we tend to |
| * release locks before committing in catalogs |
| */ |
| if (concurrent && |
| IsSystemRelation(heapRelation)) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("concurrent index creation on system catalog tables is not supported"))); |
| |
| /* |
| * We cannot allow indexing a shared relation after initdb (because |
| * there's no way to make the entry in other databases' pg_class), |
| * except during upgrade. |
| */ |
| if (shared_relation && !(IsBootstrapProcessingMode() || gp_upgrade_mode)) |
| ereport(ERROR, |
| (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
| errmsg("shared indexes cannot be created after initdb"))); |
| |
| if (get_relname_relid(indexRelationName, namespaceId)) |
| ereport(ERROR, |
| (errcode(ERRCODE_DUPLICATE_TABLE), |
| errmsg("relation \"%s\" already exists", |
| indexRelationName))); |
| |
| /* |
| * construct tuple descriptor for index tuples |
| */ |
| indexTupDesc = ConstructTupleDescriptor(heapRelation, |
| indexInfo, |
| classObjectId); |
| |
| /* |
| * Allocate an OID for the index, unless we were told what to use. |
| * |
| * The OID will be the relfilenode as well, so make sure it doesn't |
| * collide with either pg_class OIDs or existing physical files. |
| */ |
| if (!OidIsValid(indexRelationId)) |
| indexRelationId = GetNewRelFileNode(tableSpaceId, shared_relation, |
| pg_class, false); |
| else |
| if (IsUnderPostmaster) |
| { |
| CheckNewRelFileNodeIsOk(indexRelationId, tableSpaceId, shared_relation, pg_class, false); |
| } |
| |
| /* |
| * create the index relation's relcache entry and physical disk file. (If |
| * we fail further down, it's the smgr's responsibility to remove the disk |
| * file again.) |
| */ |
| indexRelation = heap_create(indexRelationName, |
| namespaceId, |
| tableSpaceId, |
| indexRelationId, |
| indexTupDesc, |
| accessMethodObjectId, |
| RELKIND_INDEX, |
| RELSTORAGE_HEAP, |
| shared_relation, |
| allow_system_table_mods, |
| /* bufferPoolBulkLoad */ false); |
| |
| Assert(indexRelationId == RelationGetRelid(indexRelation)); |
| |
| /* |
| * Obtain exclusive lock on it. Although no other backends can see it |
| * until we commit, this prevents deadlock-risk complaints from lock |
| * manager in cases such as CLUSTER. |
| */ |
| LockRelation(indexRelation, AccessExclusiveLock); |
| |
| /* |
| * Fill in fields of the index's pg_class entry that are not set correctly |
| * by heap_create. |
| * |
| * XXX should have a cleaner way to create cataloged indexes |
| */ |
| indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner; |
| indexRelation->rd_rel->relam = accessMethodObjectId; |
| indexRelation->rd_rel->relkind = RELKIND_INDEX; |
| indexRelation->rd_rel->relhasoids = false; |
| |
| /* |
| * store index's pg_class entry |
| */ |
| InsertPgClassTuple(pg_class, indexRelation, |
| RelationGetRelid(indexRelation), |
| reloptions); |
| |
| /* done with pg_class */ |
| heap_close(pg_class, RowExclusiveLock); |
| |
| { /* MPP-7575: track index creation */ |
| bool doIt = true; |
| char *subtyp = "INDEX"; |
| |
| /* MPP-7576: don't track internal namespace tables */ |
| switch (namespaceId) |
| { |
| case PG_CATALOG_NAMESPACE: |
| /* MPP-7773: don't track objects in system namespace |
| * if modifying system tables (eg during upgrade) |
| */ |
| if (allowSystemTableModsDDL) |
| doIt = false; |
| break; |
| |
| case PG_TOAST_NAMESPACE: |
| case PG_BITMAPINDEX_NAMESPACE: |
| case PG_AOSEGMENT_NAMESPACE: |
| doIt = false; |
| break; |
| default: |
| break; |
| } |
| |
| if (doIt) |
| doIt = (!(isAnyTempNamespace(namespaceId))); |
| |
| /* MPP-6929: metadata tracking */ |
| if (doIt) |
| MetaTrackAddObject(RelationRelationId, |
| RelationGetRelid(indexRelation), |
| GetUserId(), /* not ownerid */ |
| "CREATE", subtyp |
| ); |
| } |
| |
| if (gp_relfile_node != NULL) |
| { |
| InsertGpRelfileNodeTuple( |
| gp_relfile_node, |
| indexRelation->rd_id, |
| indexRelation->rd_rel->relname.data, |
| indexRelation->rd_rel->relfilenode, |
| /* segmentFileNum */ 0, |
| /* updateIndex */ true, |
| &indexRelation->rd_relationnodeinfo.persistentTid, |
| indexRelation->rd_relationnodeinfo.persistentSerialNum); |
| |
| heap_close(gp_relfile_node, RowExclusiveLock); |
| } |
| |
| /* |
| * now update the object id's of all the attribute tuple forms in the |
| * index relation's tuple descriptor |
| */ |
| InitializeAttributeOids(indexRelation, |
| indexInfo->ii_NumIndexAttrs, |
| indexRelationId); |
| |
| /* |
| * append ATTRIBUTE tuples for the index |
| */ |
| AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs); |
| |
| /* ---------------- |
| * update pg_index |
| * (append INDEX tuple) |
| * |
| * Note that this stows away a representation of "predicate". |
| * (Or, could define a rule to maintain the predicate) --Nels, Feb '92 |
| * ---------------- |
| */ |
| UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo, |
| classObjectId, isprimary, !concurrent); |
| |
| /* |
| * Register constraint and dependencies for the index. |
| * |
| * If the index is from a CONSTRAINT clause, construct a pg_constraint |
| * entry. The index is then linked to the constraint, which in turn is |
| * linked to the table. If it's not a CONSTRAINT, make the dependency |
| * directly on the table. |
| * |
| * We don't need a dependency on the namespace, because there'll be an |
| * indirect dependency via our parent table. |
| * |
| * During bootstrap we can't register any dependencies, and we don't try |
| * to make a constraint either. |
| */ |
| if (!IsBootstrapProcessingMode()) |
| { |
| ObjectAddress myself, |
| referenced; |
| |
| myself.classId = RelationRelationId; |
| myself.objectId = indexRelationId; |
| myself.objectSubId = 0; |
| |
| if (isconstraint) |
| { |
| char constraintType; |
| const char *constraintName = indexRelationName; |
| |
| if ( altConName ) |
| { |
| constraintName = altConName; |
| } |
| |
| if (isprimary) |
| constraintType = CONSTRAINT_PRIMARY; |
| else if (indexInfo->ii_Unique) |
| constraintType = CONSTRAINT_UNIQUE; |
| else |
| { |
| elog(ERROR, "constraint must be PRIMARY or UNIQUE"); |
| constraintType = 0; /* keep compiler quiet */ |
| } |
| |
| /* Shouldn't have any expressions */ |
| if (indexInfo->ii_Expressions) |
| elog(ERROR, "constraints can't have index expressions"); |
| |
| Insist(constrOid != NULL); |
| *constrOid = CreateConstraintEntry(constraintName, |
| *constrOid, |
| namespaceId, |
| constraintType, |
| false, /* isDeferrable */ |
| false, /* isDeferred */ |
| heapRelationId, |
| indexInfo->ii_KeyAttrNumbers, |
| indexInfo->ii_NumIndexAttrs, |
| InvalidOid, /* no domain */ |
| InvalidOid, /* no foreign key */ |
| NULL, |
| 0, |
| ' ', |
| ' ', |
| ' ', |
| InvalidOid, /* no associated index */ |
| NULL, /* no check constraint */ |
| NULL, |
| NULL); |
| |
| referenced.classId = ConstraintRelationId; |
| referenced.objectId = *constrOid; |
| referenced.objectSubId = 0; |
| |
| recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL); |
| } |
| else |
| { |
| bool have_simple_col = false; |
| |
| /* Create auto dependencies on simply-referenced columns */ |
| for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) |
| { |
| if (indexInfo->ii_KeyAttrNumbers[i] != 0) |
| { |
| referenced.classId = RelationRelationId; |
| referenced.objectId = heapRelationId; |
| referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i]; |
| |
| recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO); |
| |
| have_simple_col = true; |
| } |
| } |
| |
| /* |
| * It's possible for an index to not depend on any columns of the |
| * table at all, in which case we need to give it a dependency on |
| * the table as a whole; else it won't get dropped when the table |
| * is dropped. This edge case is not totally useless; for |
| * example, a unique index on a constant expression can serve to |
| * prevent a table from containing more than one row. |
| */ |
| if (!have_simple_col && |
| !contain_vars_of_level((Node *) indexInfo->ii_Expressions, 0) && |
| !contain_vars_of_level((Node *) indexInfo->ii_Predicate, 0)) |
| { |
| referenced.classId = RelationRelationId; |
| referenced.objectId = heapRelationId; |
| referenced.objectSubId = 0; |
| |
| recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO); |
| } |
| } |
| |
| /* Store dependency on operator classes */ |
| for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) |
| { |
| referenced.classId = OperatorClassRelationId; |
| referenced.objectId = classObjectId[i]; |
| referenced.objectSubId = 0; |
| |
| recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); |
| } |
| |
| /* Store dependencies on anything mentioned in index expressions */ |
| if (indexInfo->ii_Expressions) |
| { |
| recordDependencyOnSingleRelExpr(&myself, |
| (Node *) indexInfo->ii_Expressions, |
| heapRelationId, |
| DEPENDENCY_NORMAL, |
| DEPENDENCY_AUTO); |
| } |
| |
| /* Store dependencies on anything mentioned in predicate */ |
| if (indexInfo->ii_Predicate) |
| { |
| recordDependencyOnSingleRelExpr(&myself, |
| (Node *) indexInfo->ii_Predicate, |
| heapRelationId, |
| DEPENDENCY_NORMAL, |
| DEPENDENCY_AUTO); |
| } |
| } |
| |
| /* |
| * Advance the command counter so that we can see the newly-entered |
| * catalog tuples for the index. |
| */ |
| CommandCounterIncrement(); |
| |
| /* |
| * In bootstrap mode, we have to fill in the index strategy structure with |
| * information from the catalogs. If we aren't bootstrapping, then the |
| * relcache entry has already been rebuilt thanks to sinval update during |
| * CommandCounterIncrement. |
| */ |
| if (IsBootstrapProcessingMode()) |
| RelationInitIndexAccessInfo(indexRelation); |
| else |
| Assert(indexRelation->rd_indexcxt != NULL); |
| |
| /* |
| * For upgrade, if we've already created the index in another database, |
| * we don't need or want to recreate it. |
| */ |
| if (gp_upgrade_mode && (RelationGetNumberOfBlocks(indexRelation) > 0)) |
| skip_build = true; |
| |
| /* |
| * If this is bootstrap (initdb) time, then we don't actually fill in the |
| * index yet. We'll be creating more indexes and classes later, so we |
| * delay filling them in until just before we're done with bootstrapping. |
| * Similarly, if the caller specified skip_build then filling the index is |
| * delayed till later (ALTER TABLE can save work in some cases with this). |
| * Otherwise, we call the AM routine that constructs the index. |
| */ |
| if (IsBootstrapProcessingMode()) |
| { |
| index_register(heapRelationId, indexRelationId, indexInfo); |
| } |
| else if (skip_build) |
| { |
| /* |
| * Caller is responsible for filling the index later on. However, |
| * we'd better make sure that the heap relation is correctly marked as |
| * having an index. |
| */ |
| index_update_stats(heapRelation, |
| true, |
| isprimary, |
| InvalidOid, |
| heapRelation->rd_rel->reltuples); |
| /* Make the above update visible */ |
| CommandCounterIncrement(); |
| } |
| else |
| { |
| index_build(heapRelation, indexRelation, indexInfo, isprimary); |
| } |
| |
| /* |
| * Close the heap and index; but we keep the locks that we acquired above |
| * until end of transaction unless we're dealing with a child of a partition |
| * table, in which case the lock on the master is sufficient. |
| */ |
| if (rel_needs_long_lock(RelationGetRelid(heapRelation))) |
| { |
| index_close(indexRelation, NoLock); |
| heap_close(heapRelation, NoLock); |
| } |
| else |
| { |
| index_close(indexRelation, AccessExclusiveLock); |
| heap_close(heapRelation, heap_lockmode); |
| } |
| |
| return indexRelationId; |
| } |
| |
| /* |
| * index_drop |
| * |
| * NOTE: this routine should now only be called through performDeletion(), |
| * else associated dependencies won't be cleaned up. |
| */ |
| void |
| index_drop(Oid indexId) |
| { |
| Oid heapId; |
| Relation userHeapRelation; |
| Relation userIndexRelation; |
| HeapTuple tuple; |
| bool hasexprs; |
| bool need_long_lock; |
| cqContext *pcqCtx; |
| |
| /* |
| * To drop an index safely, we must grab exclusive lock on its parent |
| * table; otherwise there could be other backends using the index! |
| * Exclusive lock on the index alone is insufficient because another |
| * backend might be in the midst of devising a query plan that will use |
| * the index. The parser and planner take care to hold an appropriate |
| * lock on the parent table while working, but having them hold locks on |
| * all the indexes too seems overly expensive. We do grab exclusive lock |
| * on the index too, just to be safe. Both locks must be held till end of |
| * transaction, else other backends will still see this index in pg_index. |
| */ |
| heapId = IndexGetRelation(indexId); |
| userHeapRelation = heap_open(heapId, AccessExclusiveLock); |
| |
| userIndexRelation = index_open(indexId, AccessExclusiveLock); |
| |
| |
| /* |
| * TODO, in hawq, only MASTER_CONTENT_ID is used here, |
| * will changed later depends on the design of index. |
| */ |
| if (!userIndexRelation->rd_relationnodeinfo.isPresent) |
| RelationFetchGpRelationNode(userIndexRelation); |
| |
| /* |
| * Schedule physical removal of the files |
| */ |
| MirroredFileSysObj_ScheduleDropBufferPoolRel(userIndexRelation); |
| |
| DeleteGpRelfileNodeTuple( |
| userIndexRelation, |
| /* segmentFileNum */ 0); |
| |
| |
| /* |
| * Close and flush the index's relcache entry, to ensure relcache doesn't |
| * try to rebuild it while we're deleting catalog entries. We keep the |
| * lock though. |
| */ |
| need_long_lock = rel_needs_long_lock(RelationGetRelid(userIndexRelation)); |
| if (need_long_lock) |
| index_close(userIndexRelation, NoLock); |
| else |
| index_close(userIndexRelation, AccessExclusiveLock); |
| |
| RelationForgetRelation(indexId); |
| |
| /* |
| * fix INDEX relation, and check for expressional index |
| */ |
| pcqCtx = caql_beginscan( |
| NULL, |
| cql("SELECT * FROM pg_index " |
| " WHERE indexrelid = :1 " |
| " FOR UPDATE ", |
| ObjectIdGetDatum(indexId))); |
| |
| tuple = caql_getnext(pcqCtx); |
| |
| if (!HeapTupleIsValid(tuple)) |
| elog(ERROR, "cache lookup failed for index %u", indexId); |
| |
| hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs); |
| |
| caql_delete_current(pcqCtx); |
| |
| caql_endscan(pcqCtx); |
| |
| /* |
| * if it has any expression columns, we might have stored statistics about |
| * them. |
| */ |
| if (hasexprs) |
| RemoveStatistics(indexId, 0); |
| |
| /* |
| * fix ATTRIBUTE relation |
| */ |
| DeleteAttributeTuples(indexId); |
| |
| /* |
| * fix RELATION relation |
| */ |
| DeleteRelationTuple(indexId); |
| |
| /* MPP-6929: metadata tracking */ |
| MetaTrackDropObject(RelationRelationId, |
| indexId); |
| |
| /* |
| * We are presently too lazy to attempt to compute the new correct value |
| * of relhasindex (the next VACUUM will fix it if necessary). So there is |
| * no need to update the pg_class tuple for the owning relation. But we |
| * must send out a shared-cache-inval notice on the owning relation to |
| * ensure other backends update their relcache lists of indexes. |
| */ |
| CacheInvalidateRelcache(userHeapRelation); |
| |
| /* |
| * Close owning rel, but keep lock |
| */ |
| heap_close(userHeapRelation, need_long_lock ? NoLock : AccessExclusiveLock); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * index_build support |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* ---------------- |
| * BuildIndexInfo |
| * Construct an IndexInfo record for an open index |
| * |
| * IndexInfo stores the information about the index that's needed by |
| * FormIndexDatum, which is used for both index_build() and later insertion |
| * of individual index tuples. Normally we build an IndexInfo for an index |
| * just once per command, and then use it for (potentially) many tuples. |
| * ---------------- |
| */ |
| struct IndexInfo * |
| BuildIndexInfo(Relation index) |
| { |
| IndexInfo *ii = makeNode(IndexInfo); |
| Form_pg_index indexStruct = index->rd_index; |
| int i; |
| int numKeys; |
| |
| /* check the number of keys, and copy attr numbers into the IndexInfo */ |
| numKeys = indexStruct->indnatts; |
| if (numKeys < 1 || numKeys > INDEX_MAX_KEYS) |
| elog(ERROR, "invalid indnatts %d for index %u", |
| numKeys, RelationGetRelid(index)); |
| ii->ii_NumIndexAttrs = numKeys; |
| for (i = 0; i < numKeys; i++) |
| ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i]; |
| |
| /* fetch any expressions needed for expressional indexes */ |
| ii->ii_Expressions = RelationGetIndexExpressions(index); |
| ii->ii_ExpressionsState = NIL; |
| |
| /* fetch index predicate if any */ |
| ii->ii_Predicate = RelationGetIndexPredicate(index); |
| ii->ii_PredicateState = NIL; |
| |
| /* other info */ |
| ii->ii_Unique = indexStruct->indisunique; |
| ii->ii_Concurrent = false; /* assume normal case */ |
| |
| ii->opaque = NULL; |
| |
| return ii; |
| } |
| |
| /* ---------------- |
| * FormIndexDatum |
| * Construct values[] and isnull[] arrays for a new index tuple. |
| * |
| * indexInfo Info about the index |
| * slot Heap tuple for which we must prepare an index entry |
| * estate executor state for evaluating any index expressions |
| * values Array of index Datums (output area) |
| * isnull Array of is-null indicators (output area) |
| * |
| * When there are no index expressions, estate may be NULL. Otherwise it |
| * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr |
| * context must point to the heap tuple passed in. |
| * |
| * Notice we don't actually call index_form_tuple() here; we just prepare |
| * its input arrays values[] and isnull[]. This is because the index AM |
| * may wish to alter the data before storage. |
| * ---------------- |
| */ |
| void |
| FormIndexDatum(struct IndexInfo *indexInfo, |
| TupleTableSlot *slot, |
| struct EState *estate, |
| Datum *values, |
| bool *isnull) |
| { |
| ListCell *indexpr_item; |
| int i; |
| |
| if (indexInfo->ii_Expressions != NIL && |
| indexInfo->ii_ExpressionsState == NIL) |
| { |
| /* First time through, set up expression evaluation state */ |
| indexInfo->ii_ExpressionsState = (List *) |
| ExecPrepareExpr((Expr *) indexInfo->ii_Expressions, |
| estate); |
| /* Check caller has set up context correctly */ |
| Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot); |
| } |
| indexpr_item = list_head(indexInfo->ii_ExpressionsState); |
| |
| for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) |
| { |
| int keycol = indexInfo->ii_KeyAttrNumbers[i]; |
| Datum iDatum; |
| bool isNull; |
| |
| if (keycol != 0) |
| { |
| /* |
| * Plain index column; get the value we need directly from the |
| * heap tuple. |
| */ |
| iDatum = slot_getattr(slot, keycol, &isNull); |
| } |
| else |
| { |
| /* |
| * Index expression --- need to evaluate it. |
| */ |
| if (indexpr_item == NULL) |
| elog(ERROR, "wrong number of index expressions"); |
| iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item), |
| GetPerTupleExprContext(estate), |
| &isNull, |
| NULL); |
| indexpr_item = lnext(indexpr_item); |
| } |
| values[i] = iDatum; |
| isnull[i] = isNull; |
| } |
| |
| if (indexpr_item != NULL) |
| elog(ERROR, "wrong number of index expressions"); |
| } |
| |
| |
| /* |
| * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX |
| * |
| * This routine updates the pg_class row of either an index or its parent |
| * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed |
| * to ensure we can do all the necessary work in just one update. |
| * |
| * hasindex: set relhasindex to this value |
| * isprimary: if true, set relhaspkey true; else no change |
| * reltoastidxid: if not InvalidOid, set reltoastidxid to this value; |
| * else no change |
| * reltuples: set reltuples to this value |
| * |
| * relpages is also updated (using RelationGetNumberOfBlocks()). |
| * |
| * NOTE: an important side-effect of this operation is that an SI invalidation |
| * message is sent out to all backends --- including me --- causing relcache |
| * entries to be flushed or updated with the new data. This must happen even |
| * if we find that no change is needed in the pg_class row. When updating |
| * a heap entry, this ensures that other backends find out about the new |
| * index. When updating an index, it's important because some index AMs |
| * expect a relcache flush to occur after REINDEX. |
| */ |
| static void |
| index_update_stats(Relation rel, bool hasindex, bool isprimary, |
| Oid reltoastidxid, double reltuples) |
| { |
| BlockNumber relpages = RelationGetNumberOfBlocks(rel); |
| Oid relid = RelationGetRelid(rel); |
| Relation pg_class; |
| HeapTuple tuple; |
| Form_pg_class rd_rel; |
| bool dirty; |
| |
| /* |
| * We always update the pg_class row using a non-transactional, |
| * overwrite-in-place update. There are several reasons for this: |
| * |
| * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work. |
| * |
| * 2. We could be reindexing pg_class itself, in which case we can't move |
| * its pg_class row because CatalogUpdateIndexes might not know about all |
| * the indexes yet (see reindex_relation). |
| * |
| * 3. Because we execute CREATE INDEX with just share lock on the parent |
| * rel (to allow concurrent index creations), an ordinary update could |
| * suffer a tuple-concurrently-updated failure against another CREATE |
| * INDEX committing at about the same time. We can avoid that by having |
| * them both do nontransactional updates (we assume they will both be |
| * trying to change the pg_class row to the same thing, so it doesn't |
| * matter which goes first). |
| * |
| * 4. Even with just a single CREATE INDEX, there's a risk factor because |
| * someone else might be trying to open the rel while we commit, and this |
| * creates a race condition as to whether he will see both or neither of |
| * the pg_class row versions as valid. Again, a non-transactional update |
| * avoids the risk. It is indeterminate which state of the row the other |
| * process will see, but it doesn't matter (if he's only taking |
| * AccessShareLock, then it's not critical that he see relhasindex true). |
| * |
| * It is safe to use a non-transactional update even though our |
| * transaction could still fail before committing. Setting relhasindex |
| * true is safe even if there are no indexes (VACUUM will eventually fix |
| * it), and of course the relpages and reltuples counts are correct (or at |
| * least more so than the old values) regardless. |
| */ |
| |
| pg_class = heap_open(RelationRelationId, RowExclusiveLock); |
| |
| /* |
| * Make a copy of the tuple to update. Normally we use the syscache, but |
| * we can't rely on that during bootstrap or while reindexing pg_class |
| * itself. |
| */ |
| if (IsBootstrapProcessingMode() || |
| ReindexIsProcessingHeap(RelationRelationId)) |
| { |
| /* don't assume syscache will work */ |
| cqContext cqc; |
| |
| /* heapscan, noindex */ |
| tuple = caql_getfirst( |
| caql_syscache( |
| caql_indexOK(caql_addrel(cqclr(&cqc), pg_class), |
| false), |
| false), |
| cql("SELECT * FROM pg_class " |
| " WHERE oid = :1 " |
| " FOR UPDATE ", |
| ObjectIdGetDatum(relid))); |
| } |
| else |
| { |
| cqContext cqc; |
| |
| /* normal case, use syscache */ |
| tuple = caql_getfirst( |
| caql_addrel(cqclr(&cqc), pg_class), |
| cql("SELECT * FROM pg_class " |
| " WHERE oid = :1 " |
| " FOR UPDATE ", |
| ObjectIdGetDatum(relid))); |
| } |
| |
| if (!HeapTupleIsValid(tuple)) |
| elog(ERROR, "could not find tuple for relation %u", relid); |
| rd_rel = (Form_pg_class) GETSTRUCT(tuple); |
| |
| /* Apply required updates, if any, to copied tuple */ |
| |
| dirty = false; |
| if (rd_rel->relhasindex != hasindex) |
| { |
| rd_rel->relhasindex = hasindex; |
| dirty = true; |
| } |
| if (isprimary) |
| { |
| if (!rd_rel->relhaspkey) |
| { |
| rd_rel->relhaspkey = true; |
| dirty = true; |
| } |
| } |
| if (OidIsValid(reltoastidxid)) |
| { |
| Assert(rd_rel->relkind == RELKIND_TOASTVALUE); |
| if (rd_rel->reltoastidxid != reltoastidxid) |
| { |
| rd_rel->reltoastidxid = reltoastidxid; |
| dirty = true; |
| } |
| } |
| |
| if (Gp_role != GP_ROLE_DISPATCH) |
| { |
| /** |
| * Do not overwrite relpages, reltuples in QD. |
| */ |
| if (rd_rel->reltuples != (float4) reltuples) |
| { |
| rd_rel->reltuples = (float4) reltuples; |
| dirty = true; |
| } |
| if (rd_rel->relpages != (int32) relpages) |
| { |
| rd_rel->relpages = (int32) relpages; |
| dirty = true; |
| } |
| } |
| /* |
| * If anything changed, write out the tuple |
| */ |
| if (dirty) |
| { |
| heap_inplace_update(pg_class, tuple); |
| /* the above sends a cache inval message */ |
| } |
| else |
| { |
| /* no need to change tuple, but force relcache inval anyway */ |
| CacheInvalidateRelcacheByTuple(tuple); |
| } |
| |
| heap_freetuple(tuple); |
| |
| heap_close(pg_class, RowExclusiveLock); |
| } |
| |
| /* |
| * index_build - invoke access-method-specific index build procedure |
| * |
| * On entry, the index's catalog entries are valid, and its physical disk |
| * file has been created but is empty. We call the AM-specific build |
| * procedure to fill in the index contents. We then update the pg_class |
| * entries of the index and heap relation as needed, using statistics |
| * returned by ambuild as well as data passed by the caller. |
| * |
| * Note: when reindexing an existing index, isprimary can be false; |
| * the index is already properly marked and need not be re-marked. |
| * |
| * Note: before Postgres 8.2, the passed-in heap and index Relations |
| * were automatically closed by this routine. This is no longer the case. |
| * The caller opened 'em, and the caller should close 'em. |
| */ |
| void |
| index_build(Relation heapRelation, |
| Relation indexRelation, |
| IndexInfo *indexInfo, |
| bool isprimary) |
| { |
| RegProcedure procedure; |
| IndexBuildResult *stats; |
| Oid save_userid; |
| bool save_secdefcxt; |
| |
| /* |
| * sanity checks |
| */ |
| Assert(RelationIsValid(indexRelation)); |
| Assert(PointerIsValid(indexRelation->rd_am)); |
| |
| procedure = indexRelation->rd_am->ambuild; |
| Assert(RegProcedureIsValid(procedure)); |
| |
| /* |
| * Switch to the table owner's userid, so that any index functions are |
| * run as that user. |
| */ |
| GetUserIdAndContext(&save_userid, &save_secdefcxt); |
| SetUserIdAndContext(heapRelation->rd_rel->relowner, true); |
| |
| /* |
| * Call the access method's build procedure |
| */ |
| stats = (IndexBuildResult *) |
| DatumGetPointer(OidFunctionCall3(procedure, |
| PointerGetDatum(heapRelation), |
| PointerGetDatum(indexRelation), |
| PointerGetDatum(indexInfo))); |
| Assert(PointerIsValid(stats)); |
| |
| /* Restore userid */ |
| SetUserIdAndContext(save_userid, save_secdefcxt); |
| |
| /* |
| * Update heap and index pg_class rows |
| */ |
| index_update_stats(heapRelation, |
| true, |
| isprimary, |
| (heapRelation->rd_rel->relkind == RELKIND_TOASTVALUE) ? |
| RelationGetRelid(indexRelation) : InvalidOid, |
| stats->heap_tuples); |
| |
| #if 0 |
| /* |
| * Update an AO segment or block directory index oid |
| */ |
| switch(heapRelation->rd_rel->relkind) |
| { |
| case RELKIND_AOSEGMENTS: |
| UpdateAppendOnlyEntryIdxid(RelationGetRelid(heapRelation), |
| Anum_pg_appendonly_segidxid, |
| RelationGetRelid(indexRelation)); |
| break; |
| case RELKIND_AOBLOCKDIR: |
| UpdateAppendOnlyEntryIdxid(RelationGetRelid(heapRelation), |
| Anum_pg_appendonly_blkdiridxid, |
| RelationGetRelid(indexRelation)); |
| break; |
| default: |
| /* do nothing */ |
| } |
| #endif |
| |
| index_update_stats(indexRelation, |
| false, |
| false, |
| InvalidOid, |
| stats->index_tuples); |
| |
| /* Make the updated versions visible */ |
| CommandCounterIncrement(); |
| } |
| |
| /* |
| * IndexBuildScan - scan the heap, or the append-only row, or the append-only |
| * column relation to find tuples to be indexed. |
| * |
| * This is called back from an access-method-specific index build procedure |
| * after the AM has done whatever setup it needs. The parent relation |
| * is scanned to find tuples that should be entered into the index. Each |
| * such tuple is passed to the AM's callback routine, which does the right |
| * things to add it to the new index. After we return, the AM's index |
| * build procedure does whatever cleanup is needed; in particular, it should |
| * close the heap and index relations. |
| * |
| * The total count of heap tuples is returned. This is for updating pg_class |
| * statistics. (It's annoying not to be able to do that here, but we can't |
| * do it until after the relation is closed.) Note that the index AM itself |
| * must keep track of the number of index tuples; we don't do so here because |
| * the AM might reject some of the tuples for its own reasons, such as being |
| * unable to store NULLs. |
| */ |
| double |
| IndexBuildScan(Relation parentRelation, |
| Relation indexRelation, |
| struct IndexInfo *indexInfo, |
| IndexBuildCallback callback, |
| void *callback_state) |
| { |
| TupleTableSlot *slot; |
| EState *estate; |
| ExprContext *econtext; |
| Snapshot snapshot; |
| TransactionId OldestXmin; |
| double reltuples = 0; |
| |
| /* |
| * sanity checks |
| */ |
| Assert(OidIsValid(indexRelation->rd_rel->relam)); |
| |
| /* |
| * Need an EState for evaluation of index expressions and partial-index |
| * predicates. Also a slot to hold the current tuple. |
| */ |
| estate = CreateExecutorState(); |
| econtext = GetPerTupleExprContext(estate); |
| slot = MakeSingleTupleTableSlot(RelationGetDescr(parentRelation)); |
| |
| /* Arrange for econtext's scan tuple to be the tuple under test */ |
| econtext->ecxt_scantuple = slot; |
| |
| /* |
| * Prepare for scan of the base relation. In a normal index build, we use |
| * SnapshotAny because we must retrieve all tuples and do our own time |
| * qual checks (because we have to index RECENTLY_DEAD tuples). In a |
| * concurrent build, we take a regular MVCC snapshot and index whatever's |
| * live according to that. During bootstrap we just use SnapshotNow. |
| * |
| * If the relation is an append-only table, we use a regular MVCC snapshot |
| * and index what is actually in the table. |
| */ |
| if (IsBootstrapProcessingMode()) |
| { |
| snapshot = SnapshotNow; |
| OldestXmin = InvalidTransactionId; /* not used */ |
| } |
| else if (indexInfo->ii_Concurrent || |
| RelationIsAoRows(parentRelation)) |
| { |
| snapshot = CopySnapshot(GetTransactionSnapshot()); |
| OldestXmin = InvalidTransactionId; /* not used */ |
| } |
| else |
| { |
| snapshot = SnapshotAny; |
| /* okay to ignore lazy VACUUMs here */ |
| OldestXmin = GetOldestXmin(parentRelation->rd_rel->relisshared); |
| } |
| |
| if (RelationIsHeap(parentRelation)) |
| reltuples = IndexBuildHeapScan(parentRelation, |
| indexRelation, |
| indexInfo, |
| estate, |
| snapshot, |
| OldestXmin, |
| callback, |
| callback_state); |
| |
| else if (RelationIsAoRows(parentRelation)) |
| reltuples = IndexBuildAppendOnlyRowScan(parentRelation, |
| indexRelation, |
| indexInfo, |
| estate, |
| snapshot, |
| callback, |
| callback_state); |
| |
| ExecDropSingleTupleTableSlot(slot); |
| FreeExecutorState(estate); |
| |
| /* These may have been pointing to the now-gone estate */ |
| indexInfo->ii_ExpressionsState = NIL; |
| indexInfo->ii_PredicateState = NIL; |
| |
| return reltuples; |
| } |
| |
| |
| /* |
| * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed |
| * |
| * This is called back from an access-method-specific index build procedure |
| * after the AM has done whatever setup it needs. The parent heap relation |
| * is scanned to find tuples that should be entered into the index. Each |
| * such tuple is passed to the AM's callback routine, which does the right |
| * things to add it to the new index. After we return, the AM's index |
| * build procedure does whatever cleanup is needed; in particular, it should |
| * close the heap and index relations. |
| * |
| * The total count of heap tuples is returned. This is for updating pg_class |
| * statistics. (It's annoying not to be able to do that here, but we can't |
| * do it until after the relation is closed.) Note that the index AM itself |
| * must keep track of the number of index tuples; we don't do so here because |
| * the AM might reject some of the tuples for its own reasons, such as being |
| * unable to store NULLs. |
| */ |
| static double |
| IndexBuildHeapScan(Relation heapRelation, |
| Relation indexRelation, |
| struct IndexInfo *indexInfo, |
| EState *estate, |
| Snapshot snapshot, |
| TransactionId OldestXmin, |
| IndexBuildCallback callback, |
| void *callback_state) |
| { |
| MIRROREDLOCK_BUFMGR_DECLARE; |
| |
| HeapScanDesc scan; |
| HeapTuple heapTuple; |
| Datum values[INDEX_MAX_KEYS]; |
| bool isnull[INDEX_MAX_KEYS]; |
| double reltuples; |
| List *predicate = NIL; |
| ExprContext *econtext; |
| TupleTableSlot *slot; |
| |
| Assert(estate->es_per_tuple_exprcontext != NULL); |
| econtext = estate->es_per_tuple_exprcontext; |
| slot = econtext->ecxt_scantuple; |
| |
| /* Set up execution state for predicate, if any. */ |
| predicate = (List *) |
| ExecPrepareExpr((Expr *) indexInfo->ii_Predicate, estate); |
| |
| scan = heap_beginscan(heapRelation, /* relation */ |
| snapshot, /* seeself */ |
| 0, /* number of keys */ |
| NULL); /* scan key */ |
| |
| reltuples = 0; |
| |
| /* |
| * Scan all tuples in the base relation. |
| */ |
| while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) |
| { |
| bool tupleIsAlive; |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| if (snapshot == SnapshotAny) |
| { |
| /* do our own time qual check */ |
| bool indexIt; |
| |
| /* |
| * We could possibly get away with not locking the buffer here, |
| * since caller should hold ShareLock on the relation, but let's |
| * be conservative about it. |
| */ |
| |
| // -------- MirroredLock ---------- |
| MIRROREDLOCK_BUFMGR_LOCK; |
| |
| LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); |
| |
| switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin, |
| scan->rs_cbuf, true)) |
| { |
| case HEAPTUPLE_DEAD: |
| /* Definitely dead, we can ignore it */ |
| indexIt = false; |
| tupleIsAlive = false; |
| break; |
| case HEAPTUPLE_LIVE: |
| /* Normal case, index and unique-check it */ |
| indexIt = true; |
| tupleIsAlive = true; |
| break; |
| case HEAPTUPLE_RECENTLY_DEAD: |
| |
| /* |
| * If tuple is recently deleted then we must index it |
| * anyway to preserve MVCC semantics. (Pre-existing |
| * transactions could try to use the index after we finish |
| * building it, and may need to see such tuples.) Exclude |
| * it from unique-checking, however. |
| */ |
| indexIt = true; |
| tupleIsAlive = false; |
| break; |
| case HEAPTUPLE_INSERT_IN_PROGRESS: |
| |
| /* |
| * Since caller should hold ShareLock or better, we should |
| * not see any tuples inserted by open transactions --- |
| * unless it's our own transaction. (Consider INSERT |
| * followed by CREATE INDEX within a transaction.) An |
| * exception occurs when reindexing a system catalog, |
| * because we often release lock on system catalogs before |
| * committing. |
| */ |
| if (!TransactionIdIsCurrentTransactionId( |
| HeapTupleHeaderGetXmin(heapTuple->t_data)) |
| && !IsSystemRelation(heapRelation)) |
| elog(ERROR, "concurrent insert in progress"); |
| indexIt = true; |
| tupleIsAlive = true; |
| break; |
| case HEAPTUPLE_DELETE_IN_PROGRESS: |
| |
| /* |
| * Since caller should hold ShareLock or better, we should |
| * not see any tuples deleted by open transactions --- |
| * unless it's our own transaction. (Consider DELETE |
| * followed by CREATE INDEX within a transaction.) An |
| * exception occurs when reindexing a system catalog, |
| * because we often release lock on system catalogs before |
| * committing. |
| * |
| * XXX we also skip the check for any bitmap indexes. |
| */ |
| Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); |
| if (!TransactionIdIsCurrentTransactionId( |
| HeapTupleHeaderGetXmax(heapTuple->t_data)) |
| && !IsSystemRelation(heapRelation) |
| && (!RelationIsBitmapIndex(indexRelation))) |
| elog(ERROR, "concurrent delete in progress"); |
| indexIt = true; |
| tupleIsAlive = false; |
| break; |
| default: |
| elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); |
| indexIt = tupleIsAlive = false; /* keep compiler quiet */ |
| break; |
| } |
| |
| LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); |
| |
| MIRROREDLOCK_BUFMGR_UNLOCK; |
| // -------- MirroredLock ---------- |
| |
| if (!indexIt) |
| continue; |
| } |
| else |
| { |
| /* heap_getnext did the time qual check */ |
| tupleIsAlive = true; |
| } |
| |
| reltuples += 1; |
| |
| MemoryContextReset(econtext->ecxt_per_tuple_memory); |
| |
| /* Set up for predicate or expression evaluation */ |
| ExecStoreGenericTuple(heapTuple, slot, false); |
| |
| /* |
| * In a partial index, discard tuples that don't satisfy the |
| * predicate. |
| */ |
| if (predicate != NIL) |
| { |
| if (!ExecQual(predicate, econtext, false)) |
| continue; |
| } |
| |
| /* |
| * For the current heap tuple, extract all the attributes we use in |
| * this index, and note which are null. This also performs evaluation |
| * of any expressions needed. |
| */ |
| FormIndexDatum(indexInfo, |
| slot, |
| estate, |
| values, |
| isnull); |
| |
| /* |
| * You'd think we should go ahead and build the index tuple here, but |
| * some index AMs want to do further processing on the data first. So |
| * pass the values[] and isnull[] arrays, instead. |
| */ |
| |
| /* Call the AM's callback routine to process the tuple */ |
| callback(indexRelation, slot_get_ctid(slot), |
| values, isnull, tupleIsAlive, callback_state); |
| } |
| |
| heap_endscan(scan); |
| |
| return reltuples; |
| } |
| |
| /* |
| * IndexBuildAppendOnlyRowScan - scan the Append-Only Row relation to find |
| * tuples to be indexed. |
| * |
| * If the block directory of the append-only relation does not exist, it is |
| * created here. This occurs when the append-only relation is upgraded from |
| * pre-3.4 release. |
| */ |
| static double |
| IndexBuildAppendOnlyRowScan(Relation parentRelation, |
| Relation indexRelation, |
| struct IndexInfo *indexInfo, |
| EState *estate, |
| Snapshot snapshot, |
| IndexBuildCallback callback, |
| void *callback_state) |
| { |
| List *predicate = NIL; |
| ExprContext *econtext; |
| struct AppendOnlyScanDescData *aoscan; |
| TupleTableSlot *slot; |
| double reltuples = 0; |
| Datum values[INDEX_MAX_KEYS]; |
| bool isnull[INDEX_MAX_KEYS]; |
| |
| Assert(estate->es_per_tuple_exprcontext != NULL); |
| econtext = estate->es_per_tuple_exprcontext; |
| slot = econtext->ecxt_scantuple; |
| |
| /* Set up execution state for predicate, if any */ |
| predicate = (List *) |
| ExecPrepareExpr((Expr *)indexInfo->ii_Predicate, estate); |
| |
| aoscan = appendonly_beginscan(parentRelation, |
| snapshot, |
| 0, |
| NULL); |
| |
| while (appendonly_getnext(aoscan, ForwardScanDirection, slot) != NULL) |
| { |
| CHECK_FOR_INTERRUPTS(); |
| |
| reltuples++; |
| |
| MemoryContextReset(econtext->ecxt_per_tuple_memory); |
| |
| if (predicate != NIL) |
| { |
| if (!ExecQual(predicate, econtext, false)) |
| continue; |
| } |
| |
| /* |
| * For the current heap tuple, extract all the attributes we use in |
| * this index, and note which are null. This also performs evaluation |
| * of any expressions needed. |
| */ |
| FormIndexDatum(indexInfo, |
| slot, |
| estate, |
| values, |
| isnull); |
| |
| /* |
| * You'd think we should go ahead and build the index tuple here, but |
| * some index AMs want to do further processing on the data first. So |
| * pass the values[] and isnull[] arrays, instead. |
| */ |
| Assert(ItemPointerIsValid(slot_get_ctid(slot))); |
| |
| /* Call the AM's callback routine to process the tuple */ |
| callback(indexRelation, slot_get_ctid(slot), |
| values, isnull, true, callback_state); |
| } |
| |
| appendonly_endscan(aoscan); |
| |
| return reltuples; |
| } |
| |
| |
| /* |
| * validate_index - support code for concurrent index builds |
| * |
| * We do a concurrent index build by first building the index normally via |
| * index_create(), while holding a weak lock that allows concurrent |
| * insert/update/delete. Also, we index only tuples that are valid |
| * as of the start of the scan (see IndexBuildHeapScan), whereas a normal |
| * build takes care to include recently-dead tuples. This is OK because |
| * we won't mark the index valid until all transactions that might be able |
| * to see those tuples are gone. The reason for doing that is to avoid |
| * bogus unique-index failures due to concurrent UPDATEs (we might see |
| * different versions of the same row as being valid when we pass over them, |
| * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that |
| * does not contain any tuples added to the table while we built the index. |
| * |
| * Next, we commit the transaction so that the index becomes visible to other |
| * backends, but it is marked not "indisvalid" to prevent the planner from |
| * relying on it for indexscans. Then we wait for all transactions that |
| * could have been modifying the table to terminate. At this point we |
| * know that any subsequently-started transactions will see the index and |
| * insert their new tuples into it. We then take a new reference snapshot |
| * which is passed to validate_index(). Any tuples that are valid according |
| * to this snap, but are not in the index, must be added to the index. |
| * (Any tuples committed live after the snap will be inserted into the |
| * index by their originating transaction. Any tuples committed dead before |
| * the snap need not be indexed, because we will wait out all transactions |
| * that might care about them before we mark the index valid.) |
| * |
| * validate_index() works by first gathering all the TIDs currently in the |
| * index, using a bulkdelete callback that just stores the TIDs and doesn't |
| * ever say "delete it". (This should be faster than a plain indexscan; |
| * also, not all index AMs support full-index indexscan.) Then we sort the |
| * TIDs, and finally scan the table doing a "merge join" against the TID list |
| * to see which tuples are missing from the index. Thus we will ensure that |
| * all tuples valid according to the reference snapshot are in the index. |
| * |
| * Building a unique index this way is tricky: we might try to insert a |
| * tuple that is already dead or is in process of being deleted, and we |
| * mustn't have a uniqueness failure against an updated version of the same |
| * row. We can check the tuple to see if it's already dead and tell |
| * index_insert() not to do the uniqueness check, but that still leaves us |
| * with a race condition against an in-progress update. To handle that, |
| * we expect the index AM to recheck liveness of the to-be-inserted tuple |
| * before it declares a uniqueness error. |
| * |
| * After completing validate_index(), we wait until all transactions that |
| * were alive at the time of the reference snapshot are gone; this is |
| * necessary to be sure there are none left with a serializable snapshot |
| * older than the reference (and hence possibly able to see tuples we did |
| * not index). Then we mark the index valid and commit. |
| * |
| * Doing two full table scans is a brute-force strategy. We could try to be |
| * cleverer, eg storing new tuples in a special area of the table (perhaps |
| * making the table append-only by setting use_fsm). However that would |
| * add yet more locking issues. |
| */ |
| void |
| validate_index(Oid heapId, Oid indexId, Snapshot snapshot) |
| { |
| Relation heapRelation, |
| indexRelation; |
| IndexInfo *indexInfo; |
| IndexVacuumInfo ivinfo; |
| v_i_state state; |
| Oid save_userid; |
| bool save_secdefcxt; |
| |
| /* Open and lock the parent heap relation */ |
| heapRelation = heap_open(heapId, ShareUpdateExclusiveLock); |
| /* And the target index relation */ |
| indexRelation = index_open(indexId, RowExclusiveLock); |
| |
| /* |
| * Fetch info needed for index_insert. (You might think this should be |
| * passed in from DefineIndex, but its copy is long gone due to having |
| * been built in a previous transaction.) |
| */ |
| indexInfo = BuildIndexInfo(indexRelation); |
| |
| /* mark build is concurrent just for consistency */ |
| indexInfo->ii_Concurrent = true; |
| |
| /* |
| * Switch to the table owner's userid, so that any index functions are |
| * run as that user. |
| */ |
| GetUserIdAndContext(&save_userid, &save_secdefcxt); |
| SetUserIdAndContext(heapRelation->rd_rel->relowner, true); |
| |
| /* |
| * Scan the index and gather up all the TIDs into a tuplesort object. |
| */ |
| ivinfo.index = indexRelation; |
| ivinfo.vacuum_full = false; |
| ivinfo.message_level = DEBUG2; |
| ivinfo.num_heap_tuples = -1; |
| ivinfo.extra_oids = NIL; |
| state.tuplesort = NULL; |
| |
| PG_TRY(); |
| { |
| if(gp_enable_mk_sort) |
| state.tuplesort = tuplesort_begin_datum_mk(NULL, |
| TIDOID, |
| TIDLessOperator, |
| maintenance_work_mem, |
| false); |
| else |
| state.tuplesort = tuplesort_begin_datum(TIDOID, |
| TIDLessOperator, |
| maintenance_work_mem, |
| false); |
| |
| state.htups = state.itups = state.tups_inserted = 0; |
| |
| (void) index_bulk_delete(&ivinfo, NULL, |
| validate_index_callback, (void *) &state); |
| |
| /* Execute the sort */ |
| if(gp_enable_mk_sort) |
| { |
| tuplesort_performsort_mk((Tuplesortstate_mk *)state.tuplesort); |
| } |
| else |
| { |
| tuplesort_performsort((Tuplesortstate *) state.tuplesort); |
| } |
| |
| /* |
| * Now scan the heap and "merge" it with the index |
| */ |
| validate_index_heapscan(heapRelation, |
| indexRelation, |
| indexInfo, |
| snapshot, |
| &state); |
| |
| /* Done with tuplesort object */ |
| if(gp_enable_mk_sort) |
| { |
| tuplesort_end_mk((Tuplesortstate_mk *)state.tuplesort); |
| } |
| else |
| { |
| tuplesort_end((Tuplesortstate *) state.tuplesort); |
| } |
| |
| state.tuplesort = NULL; |
| |
| } |
| PG_CATCH(); |
| { |
| /* Clean up the sort state on error */ |
| if (state.tuplesort) |
| { |
| if(gp_enable_mk_sort) |
| { |
| tuplesort_end_mk((Tuplesortstate_mk *)state.tuplesort); |
| } |
| else |
| { |
| tuplesort_end((Tuplesortstate *) state.tuplesort); |
| } |
| state.tuplesort = NULL; |
| } |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| elog(DEBUG2, |
| "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples", |
| state.htups, state.itups, state.tups_inserted); |
| |
| /* Restore userid */ |
| SetUserIdAndContext(save_userid, save_secdefcxt); |
| |
| /* Close rels, but keep locks */ |
| index_close(indexRelation, NoLock); |
| heap_close(heapRelation, NoLock); |
| } |
| |
| /* |
| * validate_index_callback - bulkdelete callback to collect the index TIDs |
| */ |
| static bool |
| validate_index_callback(ItemPointer itemptr, void *opaque) |
| { |
| v_i_state *state = (v_i_state *) opaque; |
| |
| if(gp_enable_mk_sort) |
| tuplesort_putdatum_mk((Tuplesortstate_mk *) state->tuplesort, PointerGetDatum(itemptr), false); |
| else |
| tuplesort_putdatum((Tuplesortstate *) state->tuplesort, PointerGetDatum(itemptr), false); |
| |
| state->itups += 1; |
| return false; /* never actually delete anything */ |
| } |
| |
| /* |
| * validate_index_heapscan - second table scan for concurrent index build |
| * |
| * This has much code in common with IndexBuildHeapScan, but it's enough |
| * different that it seems cleaner to have two routines not one. |
| */ |
| static void |
| validate_index_heapscan(Relation heapRelation, |
| Relation indexRelation, |
| IndexInfo *indexInfo, |
| Snapshot snapshot, |
| v_i_state *state) |
| { |
| MIRROREDLOCK_BUFMGR_DECLARE; |
| |
| HeapScanDesc scan; |
| HeapTuple heapTuple; |
| Datum values[INDEX_MAX_KEYS]; |
| bool isnull[INDEX_MAX_KEYS]; |
| List *predicate; |
| TupleTableSlot *slot; |
| EState *estate; |
| ExprContext *econtext; |
| |
| /* state variables for the merge */ |
| ItemPointer indexcursor = NULL; |
| bool tuplesort_empty = false; |
| |
| /* |
| * sanity checks |
| */ |
| Assert(OidIsValid(indexRelation->rd_rel->relam)); |
| |
| /* |
| * Need an EState for evaluation of index expressions and partial-index |
| * predicates. Also a slot to hold the current tuple. |
| */ |
| estate = CreateExecutorState(); |
| econtext = GetPerTupleExprContext(estate); |
| slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation)); |
| |
| /* Arrange for econtext's scan tuple to be the tuple under test */ |
| econtext->ecxt_scantuple = slot; |
| |
| /* Set up execution state for predicate, if any. */ |
| predicate = (List *) |
| ExecPrepareExpr((Expr *) indexInfo->ii_Predicate, |
| estate); |
| |
| /* |
| * Prepare for scan of the base relation. We need just those tuples |
| * satisfying the passed-in reference snapshot. |
| */ |
| scan = heap_beginscan(heapRelation, /* relation */ |
| snapshot, /* seeself */ |
| 0, /* number of keys */ |
| NULL); /* scan key */ |
| |
| /* |
| * Scan all tuples matching the snapshot. |
| */ |
| while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) |
| { |
| ItemPointer heapcursor = &heapTuple->t_self; |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| state->htups += 1; |
| |
| /* |
| * "merge" by skipping through the index tuples until we find or pass |
| * the current heap tuple. |
| */ |
| while (!tuplesort_empty && |
| (!indexcursor || |
| ItemPointerCompare(indexcursor, heapcursor) < 0)) |
| { |
| Datum ts_val; |
| bool ts_isnull; |
| |
| if (indexcursor) |
| pfree(indexcursor); |
| |
| if(gp_enable_mk_sort) |
| tuplesort_empty = !tuplesort_getdatum_mk((Tuplesortstate_mk *) state->tuplesort, |
| true, &ts_val, &ts_isnull); |
| else |
| tuplesort_empty = !tuplesort_getdatum((Tuplesortstate *) state->tuplesort, |
| true, &ts_val, &ts_isnull); |
| |
| Assert(tuplesort_empty || !ts_isnull); |
| indexcursor = (ItemPointer) DatumGetPointer(ts_val); |
| } |
| |
| if (tuplesort_empty || |
| ItemPointerCompare(indexcursor, heapcursor) > 0) |
| { |
| /* |
| * We've overshot which means this heap tuple is missing from the |
| * index, so insert it. |
| */ |
| bool check_unique; |
| |
| MemoryContextReset(econtext->ecxt_per_tuple_memory); |
| |
| /* Set up for predicate or expression evaluation */ |
| ExecStoreGenericTuple(heapTuple, slot, false); |
| |
| /* |
| * In a partial index, discard tuples that don't satisfy the |
| * predicate. |
| */ |
| if (predicate != NIL) |
| { |
| if (!ExecQual(predicate, econtext, false)) |
| continue; |
| } |
| |
| /* |
| * For the current heap tuple, extract all the attributes we use |
| * in this index, and note which are null. This also performs |
| * evaluation of any expressions needed. |
| */ |
| FormIndexDatum(indexInfo, |
| slot, |
| estate, |
| values, |
| isnull); |
| |
| /* |
| * If the tuple is already committed dead, we still have to put it |
| * in the index (because some xacts might be able to see it), but |
| * we might as well suppress uniqueness checking. This is just an |
| * optimization because the index AM is not supposed to raise a |
| * uniqueness failure anyway. |
| */ |
| if (indexInfo->ii_Unique) |
| { |
| |
| // -------- MirroredLock ---------- |
| MIRROREDLOCK_BUFMGR_LOCK; |
| |
| /* must hold a buffer lock to call HeapTupleSatisfiesNow */ |
| LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); |
| |
| if (HeapTupleSatisfiesNow(scan->rs_rd, heapTuple->t_data, scan->rs_cbuf)) |
| check_unique = true; |
| else |
| check_unique = false; |
| |
| LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); |
| |
| MIRROREDLOCK_BUFMGR_UNLOCK; |
| // -------- MirroredLock ---------- |
| |
| } |
| else |
| check_unique = false; |
| |
| /* |
| * You'd think we should go ahead and build the index tuple here, |
| * but some index AMs want to do further processing on the data |
| * first. So pass the values[] and isnull[] arrays, instead. |
| */ |
| index_insert(indexRelation, |
| values, |
| isnull, |
| heapcursor, |
| heapRelation, |
| check_unique); |
| |
| state->tups_inserted += 1; |
| } |
| } |
| |
| heap_endscan(scan); |
| |
| ExecDropSingleTupleTableSlot(slot); |
| |
| FreeExecutorState(estate); |
| |
| /* These may have been pointing to the now-gone estate */ |
| indexInfo->ii_ExpressionsState = NIL; |
| indexInfo->ii_PredicateState = NIL; |
| } |
| |
| |
| /* |
| * IndexGetRelation: given an index's relation OID, get the OID of the |
| * relation it is an index on. Uses the system cache. |
| */ |
| Oid |
| IndexGetRelation(Oid indexId) |
| { |
| HeapTuple tuple; |
| Form_pg_index index; |
| Oid result; |
| cqContext *pcqCtx; |
| |
| pcqCtx = caql_beginscan( |
| NULL, |
| cql("SELECT * FROM pg_index " |
| " WHERE indexrelid = :1 ", |
| ObjectIdGetDatum(indexId))); |
| |
| tuple = caql_getnext(pcqCtx); |
| |
| if (!HeapTupleIsValid(tuple)) |
| elog(ERROR, "cache lookup failed for index %u", indexId); |
| index = (Form_pg_index) GETSTRUCT(tuple); |
| Assert(index->indexrelid == indexId); |
| |
| result = index->indrelid; |
| |
| caql_endscan(pcqCtx); |
| return result; |
| } |
| |
| /* |
| * createIndexInfoOpaque: create the opaque value in indexInfo |
| * based on the given list of OIDs passed from reindex_index(). |
| * |
| * The extra_oids contains 2 OID values. They are used by |
| * the bitmap indexes to create their internal heap and btree. |
| * See reindex_index() for more info. |
| */ |
| static void |
| createIndexInfoOpaque(List *extra_oids, |
| bool isBitmapIndex, |
| IndexInfo *indexInfo) |
| { |
| Assert(extra_oids != NULL && |
| list_length(extra_oids) == 2); |
| Assert(indexInfo != NULL); |
| Assert(indexInfo->opaque == NULL); |
| |
| indexInfo->opaque = (void*)palloc0(sizeof(IndexInfoOpaque)); |
| |
| ListCell *lc = list_head(extra_oids); |
| |
| ((IndexInfoOpaque *)indexInfo->opaque)->heapRelfilenode = |
| lfirst_oid(lc); |
| lc = lnext(lc); |
| ((IndexInfoOpaque *)indexInfo->opaque)->indexRelfilenode = |
| lfirst_oid(lc); |
| lc = lnext(lc); |
| |
| #ifdef USE_ASSERT_CHECKING |
| if (isBitmapIndex) |
| { |
| Assert(OidIsValid(((IndexInfoOpaque *)indexInfo->opaque)->heapRelfilenode)); |
| Assert(OidIsValid(((IndexInfoOpaque *)indexInfo->opaque)->indexRelfilenode)); |
| } |
| |
| else |
| { |
| Assert(!OidIsValid(((IndexInfoOpaque *)indexInfo->opaque)->heapRelfilenode)); |
| Assert(!OidIsValid(((IndexInfoOpaque *)indexInfo->opaque)->indexRelfilenode)); |
| } |
| #endif |
| } |
| |
| /* |
| * generateExtraOids: generate the given number of extra Oids. |
| * |
| * If genNewOid is true, all generated OIDs will be valid. Otherwise, |
| * all OIDs will be InvalidOid. |
| */ |
| static List * |
| generateExtraOids(int num_extra_oids, |
| Oid reltablespace, |
| bool relisshared, |
| bool genNewOid) |
| { |
| List *extra_oids = NIL; |
| |
| Assert(num_extra_oids > 0); |
| |
| for (int no = 0; no < num_extra_oids; no++) |
| { |
| Oid newOid = InvalidOid; |
| if (genNewOid) |
| newOid = GetNewRelFileNode(reltablespace, |
| relisshared, |
| NULL, |
| false); |
| |
| extra_oids = lappend_oid(extra_oids, newOid); |
| } |
| |
| return extra_oids; |
| } |
| |
| /* |
| * reindex_index - This routine is used to recreate a single index. |
| * |
| * GPDB: we return the new relfilenode for transmission to QEs. If |
| * newrelfilenode is valid, we use that Oid instead. |
| * |
| * XXX The bitmap index requires two additional oids for its internal |
| * heap and index. We pass those in as extra_oids. If there are no |
| * such oids, this function generates them and pass them out to |
| * the caller. |
| * |
| * The extra_oids list always contain 2 values. If the index is |
| * a bitmap index, those two values are valid OIDs. Otherwise, |
| * they are InvalidOids. |
| */ |
| Oid |
| reindex_index(Oid indexId, Oid newrelfilenode, List **extra_oids) |
| { |
| Relation iRel, |
| heapRelation, |
| pg_index; |
| Oid heapId; |
| bool inplace; |
| HeapTuple indexTuple; |
| Form_pg_index indexForm; |
| Oid retrelfilenode; |
| Oid namespaceId; |
| cqContext cqc; |
| cqContext *pcqCtx; |
| |
| Assert(OidIsValid(indexId)); |
| Assert(extra_oids != NULL); |
| |
| /* |
| * Open and lock the parent heap relation. ShareLock is sufficient since |
| * we only need to be sure no schema or data changes are going on. |
| */ |
| heapId = IndexGetRelation(indexId); |
| heapRelation = heap_open(heapId, ShareLock); |
| |
| namespaceId = RelationGetNamespace(heapRelation); |
| |
| /* |
| * Open the target index relation and get an exclusive lock on it, to |
| * ensure that no one else is touching this particular index. |
| */ |
| iRel = index_open(indexId, AccessExclusiveLock); |
| |
| /* |
| * Don't allow reindex on temp tables of other backends ... their local |
| * buffer manager is not going to cope. |
| */ |
| if (isOtherTempNamespace(RelationGetNamespace(iRel))) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot reindex temporary tables of other sessions"))); |
| |
| /* |
| * Also check for active uses of the index in the current transaction; |
| * we don't want to reindex underneath an open indexscan. |
| */ |
| CheckTableNotInUse(iRel, "REINDEX INDEX"); |
| |
| /* |
| * If it's a shared index, we must do inplace processing (because we have |
| * no way to update relfilenode in other databases). Otherwise we can do |
| * it the normal transaction-safe way. |
| * |
| * Since inplace processing isn't crash-safe, we only allow it in a |
| * standalone backend. (In the REINDEX TABLE and REINDEX DATABASE cases, |
| * the caller should have detected this.) |
| * |
| * MPP: If we are in a standalone backend always perform reindex operations |
| * in place. In postgres this only applies to shared relations, for |
| * Greenplum we apply it to all tables as a means of enabling upgrade to |
| * filerep: it is required to reindex gp_relation_node in place before it |
| * is possible to populate the gp_persistent tables. |
| */ |
| inplace = iRel->rd_rel->relisshared || !IsUnderPostmaster; |
| |
| if (inplace && IsUnderPostmaster) |
| ereport(ERROR, |
| (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
| errmsg("shared index \"%s\" can only be reindexed in stand-alone mode", |
| RelationGetRelationName(iRel)))); |
| |
| PG_TRY(); |
| { |
| IndexInfo *indexInfo; |
| |
| /* Suppress use of the target index while rebuilding it */ |
| SetReindexProcessing(heapId, indexId); |
| |
| /* Fetch info needed for index_build */ |
| indexInfo = BuildIndexInfo(iRel); |
| |
| if (inplace) |
| { |
| /* Truncate the actual file (and discard buffers) */ |
| |
| RelationTruncate( |
| iRel, |
| 0, |
| /* markPersistentAsPhysicallyTruncated */ true); |
| |
| retrelfilenode = iRel->rd_rel->relfilenode; |
| Assert(retrelfilenode == newrelfilenode || |
| !OidIsValid(newrelfilenode)); |
| } |
| else |
| { |
| /* |
| * We'll build a new physical relation for the index. |
| */ |
| if (OidIsValid(newrelfilenode)) |
| { |
| setNewRelfilenodeToOid(iRel, newrelfilenode); |
| retrelfilenode = newrelfilenode; |
| } |
| else |
| { |
| retrelfilenode = setNewRelfilenode(iRel); |
| |
| Assert(*extra_oids == NULL); |
| |
| /* |
| * If this is a bitmap index, we generate two more relfilenodes |
| * for its internal heap and index. |
| */ |
| *extra_oids = generateExtraOids(2, |
| iRel->rd_rel->reltablespace, |
| iRel->rd_rel->relisshared, |
| RelationIsBitmapIndex(iRel)); |
| |
| } |
| |
| |
| /* Store extra_oids into indexInfo->opaque */ |
| createIndexInfoOpaque(*extra_oids, |
| RelationIsBitmapIndex(iRel), |
| indexInfo); |
| } |
| |
| /* Initialize the index and rebuild */ |
| /* Note: we do not need to re-establish pkey setting */ |
| index_build(heapRelation, iRel, indexInfo, false); |
| } |
| PG_CATCH(); |
| { |
| /* Make sure flag gets cleared on error exit */ |
| ResetReindexProcessing(); |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| ResetReindexProcessing(); |
| |
| /* |
| * If the index is marked invalid (ie, it's from a failed CREATE INDEX |
| * CONCURRENTLY), we can now mark it valid. This allows REINDEX to be |
| * used to clean up in such cases. |
| */ |
| pg_index = heap_open(IndexRelationId, RowExclusiveLock); |
| |
| pcqCtx = caql_addrel(cqclr(&cqc), pg_index); |
| |
| indexTuple = caql_getfirst(pcqCtx, |
| cql("SELECT * FROM pg_index " |
| " WHERE indexrelid = :1 " |
| " FOR UPDATE ", |
| ObjectIdGetDatum(indexId))); |
| |
| if (!HeapTupleIsValid(indexTuple)) |
| elog(ERROR, "cache lookup failed for index %u", indexId); |
| indexForm = (Form_pg_index) GETSTRUCT(indexTuple); |
| |
| if (!indexForm->indisvalid) |
| { |
| indexForm->indisvalid = true; |
| caql_update_current(pcqCtx, indexTuple); |
| /* and Update indexes (implicit) */ |
| } |
| heap_close(pg_index, RowExclusiveLock); |
| |
| { |
| bool doIt = true; |
| char *subtyp = "REINDEX"; |
| |
| /* MPP-7576: don't track internal namespace tables */ |
| switch (namespaceId) |
| { |
| case PG_CATALOG_NAMESPACE: |
| /* MPP-7773: don't track objects in system namespace |
| * if modifying system tables (eg during upgrade) |
| */ |
| if (allowSystemTableModsDDL) |
| doIt = false; |
| break; |
| |
| case PG_TOAST_NAMESPACE: |
| case PG_BITMAPINDEX_NAMESPACE: |
| case PG_AOSEGMENT_NAMESPACE: |
| doIt = false; |
| break; |
| default: |
| break; |
| } |
| |
| if (doIt) |
| doIt = (!(isAnyTempNamespace(namespaceId))); |
| |
| /* MPP-6929: metadata tracking */ |
| /* MPP-7587: treat as a VACUUM operation, since the index is |
| * rebuilt */ |
| if (doIt) |
| MetaTrackUpdObject(RelationRelationId, |
| indexId, |
| GetUserId(), /* not ownerid */ |
| "VACUUM", subtyp |
| ); |
| } |
| |
| |
| /* Close rels, but keep locks */ |
| index_close(iRel, NoLock); |
| heap_close(heapRelation, NoLock); |
| |
| return retrelfilenode; |
| } |
| |
| /* |
| * reindex_relation - This routine is used to recreate all indexes |
| * of a relation (and optionally its toast relation too, if any). |
| * |
| * Returns true if any indexes were rebuilt. Note that a |
| * CommandCounterIncrement will occur after each index rebuild. |
| * |
| * If build_map is true, build a map of index relation OID -> new relfilenode. |
| * If it is false but *oidmap is valid and we're on a QE, use the |
| * new relfilenode specified in the map. |
| */ |
| bool |
| reindex_relation(Oid relid, bool toast_too, bool aoseg_too, bool aoblkdir_too, |
| List **oidmap, bool build_map) |
| { |
| Relation rel; |
| Oid toast_relid; |
| Oid aoseg_relid = InvalidOid; |
| Oid aoblkdir_relid = InvalidOid; |
| bool is_pg_class; |
| bool result; |
| List *indexIds, |
| *doneIndexes; |
| ListCell *indexId; |
| bool relIsAO = false; |
| |
| /* |
| * Open and lock the relation. ShareLock is sufficient since we only need |
| * to prevent schema and data changes in it. |
| */ |
| rel = heap_open(relid, ShareLock); |
| |
| relIsAO = (RelationIsAoRows(rel) || RelationIsParquet(rel)); |
| |
| toast_relid = rel->rd_rel->reltoastrelid; |
| |
| /* |
| * Get the list of index OIDs for this relation. (We trust to the |
| * relcache to get this with a sequential scan if ignoring system |
| * indexes.) |
| */ |
| indexIds = RelationGetIndexList(rel); |
| |
| /* |
| * reindex_index will attempt to update the pg_class rows for the relation |
| * and index. If we are processing pg_class itself, we want to make sure |
| * that the updates do not try to insert index entries into indexes we |
| * have not processed yet. (When we are trying to recover from corrupted |
| * indexes, that could easily cause a crash.) We can accomplish this |
| * because CatalogUpdateIndexes will use the relcache's index list to know |
| * which indexes to update. We just force the index list to be only the |
| * stuff we've processed. |
| * |
| * It is okay to not insert entries into the indexes we have not processed |
| * yet because all of this is transaction-safe. If we fail partway |
| * through, the updated rows are dead and it doesn't matter whether they |
| * have index entries. Also, a new pg_class index will be created with an |
| * entry for its own pg_class row because we do setNewRelfilenode() before |
| * we do index_build(). |
| * |
| * Note that we also clear pg_class's rd_oidindex until the loop is done, |
| * so that that index can't be accessed either. This means we cannot |
| * safely generate new relation OIDs while in the loop; shouldn't be a |
| * problem. |
| */ |
| is_pg_class = (RelationGetRelid(rel) == RelationRelationId); |
| doneIndexes = NIL; |
| |
| /* Reindex all the indexes. */ |
| foreach(indexId, indexIds) |
| { |
| Oid indexOid = lfirst_oid(indexId); |
| Oid newrelfilenode; |
| Oid mapoid = InvalidOid; |
| List *extra_oids = NIL; |
| |
| if (is_pg_class) |
| RelationSetIndexList(rel, doneIndexes, InvalidOid); |
| |
| if (Gp_role == GP_ROLE_EXECUTE && !build_map && oidmap && |
| *oidmap) |
| { |
| ListCell *c; |
| |
| /* Yes, this is O(N^2) but N is small */ |
| foreach(c, *oidmap) |
| { |
| List *map = lfirst(c); |
| Oid ind = linitial_oid(map); |
| |
| if (ind == indexOid) |
| { |
| mapoid = lsecond_oid(map); |
| |
| /* |
| * The map should contain more than 2 OIDs (the OID of the |
| * index and its new relfilenode), to support the bitmap |
| * index, see reindex_index() for more info. Construct |
| * the extra_oids list by skipping the first two OIDs. |
| */ |
| Assert(list_length(map) > 2); |
| extra_oids = list_copy_tail(map, 2); |
| |
| break; |
| } |
| } |
| Assert(OidIsValid(mapoid)); |
| } |
| |
| elog(DEBUG5, "reindexing index with OID %u (supplied %u as new OID)", |
| indexOid, mapoid); |
| |
| newrelfilenode = reindex_index(indexOid, mapoid, &extra_oids); |
| |
| Assert(!OidIsValid(mapoid) || newrelfilenode == mapoid); |
| |
| CommandCounterIncrement(); |
| |
| if (oidmap && build_map) |
| { |
| List *map = list_make2_oid(indexOid, newrelfilenode); |
| |
| Assert(extra_oids != NULL); |
| map = list_concat(map, extra_oids); |
| |
| *oidmap = lappend(*oidmap, map); |
| } |
| |
| if (is_pg_class) |
| doneIndexes = lappend_oid(doneIndexes, indexOid); |
| } |
| |
| if (is_pg_class) |
| RelationSetIndexList(rel, indexIds, ClassOidIndexId); |
| |
| /* |
| * Close rel, but continue to hold the lock. |
| */ |
| heap_close(rel, NoLock); |
| |
| result = (indexIds != NIL); |
| |
| /* |
| * If the relation has a secondary toast rel, reindex that too while we |
| * still hold the lock on the master table. |
| */ |
| if (toast_too && OidIsValid(toast_relid)) |
| result |= reindex_relation(toast_relid, false, false, false, oidmap, build_map); |
| |
| /* Obtain the aoseg_relid and aoblkdir_relid if the relation is an AO table. */ |
| if ((aoseg_too || aoblkdir_too) && relIsAO) |
| GetAppendOnlyEntryAuxOids(relid, SnapshotNow, |
| &aoseg_relid, NULL, &aoblkdir_relid, NULL); |
| |
| /* |
| * If an AO rel has a secondary segment list rel, reindex that too while we |
| * still hold the lock on the master table. |
| */ |
| if (aoseg_too && OidIsValid(aoseg_relid)) |
| result |= reindex_relation(aoseg_relid, false, false, false, oidmap, build_map); |
| |
| /* |
| * If an AO rel has a secondary block directory rel, reindex that too while we |
| * still hold the lock on the master table. |
| */ |
| if (aoblkdir_too && OidIsValid(aoblkdir_relid)) |
| result |= reindex_relation(aoblkdir_relid, false, false, false, oidmap, build_map); |
| |
| return result; |
| } |