| /*------------------------------------------------------------------------- |
| * |
| * cluster.c |
| * CLUSTER a table on an index. This is now also used for VACUUM FULL. |
| * |
| * There is hardly anything left of Paul Brown's original implementation... |
| * |
| * |
| * Portions Copyright (c) 2006-2008, Greenplum inc |
| * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates. |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994-5, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/commands/cluster.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include "access/amapi.h" |
| #include "access/heapam.h" |
| #include "access/multixact.h" |
| #include "access/reloptions.h" |
| #include "access/relscan.h" |
| #include "access/tableam.h" |
| #include "access/toast_internals.h" |
| #include "access/transam.h" |
| #include "access/xact.h" |
| #include "access/xlog.h" |
| #include "catalog/catalog.h" |
| #include "catalog/dependency.h" |
| #include "catalog/gp_matview_aux.h" |
| #include "catalog/heap.h" |
| #include "catalog/index.h" |
| #include "catalog/namespace.h" |
| #include "catalog/objectaccess.h" |
| #include "catalog/pg_appendonly.h" |
| #include "catalog/pg_attribute_encoding.h" |
| #include "catalog/pg_type.h" |
| #include "catalog/pg_namespace.h" |
| #include "catalog/pg_tablespace.h" |
| #include "catalog/pg_am.h" |
| #include "catalog/toasting.h" |
| #include "commands/cluster.h" |
| #include "commands/defrem.h" |
| #include "commands/progress.h" |
| #include "commands/tablecmds.h" |
| #include "commands/vacuum.h" |
| #include "miscadmin.h" |
| #include "optimizer/optimizer.h" |
| #include "pgstat.h" |
| #include "storage/bufmgr.h" |
| #include "storage/lmgr.h" |
| #include "storage/predicate.h" |
| #include "utils/acl.h" |
| #include "utils/builtins.h" |
| #include "utils/faultinjector.h" |
| #include "utils/fmgroids.h" |
| #include "utils/inval.h" |
| #include "utils/lsyscache.h" |
| #include "utils/memutils.h" |
| #include "utils/pg_rusage.h" |
| #include "utils/relmapper.h" |
| #include "utils/snapmgr.h" |
| #include "utils/syscache.h" |
| #include "utils/tuplesort.h" |
| |
| #include "catalog/aocatalog.h" |
| #include "catalog/oid_dispatch.h" |
| #include "cdb/cdbvars.h" |
| #include "cdb/cdbdisp_query.h" |
| #include "cdb/cdboidsync.h" |
| #include "libpq/pqformat.h" |
| |
| /* |
| * This struct is used to pass around the information on tables to be |
| * clustered. We need this so we can make a list of them when invoked without |
| * a specific table/index pair. |
| */ |
| typedef struct |
| { |
| Oid tableOid; |
| Oid indexOid; |
| } RelToCluster; |
| |
| |
| static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose); |
| static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, |
| bool verbose, bool *pSwapToastByContent, |
| TransactionId *pFreezeXid, MultiXactId *pCutoffMulti); |
| static List *get_tables_to_cluster(MemoryContext cluster_context); |
| |
| |
| /*--------------------------------------------------------------------------- |
| * This cluster code allows for clustering multiple tables at once. Because |
| * of this, we cannot just run everything on a single transaction, or we |
| * would be forced to acquire exclusive locks on all the tables being |
| * clustered, simultaneously --- very likely leading to deadlock. |
| * |
| * To solve this we follow a similar strategy to VACUUM code, |
| * clustering each relation in a separate transaction. For this to work, |
| * we need to: |
| * - provide a separate memory context so that we can pass information in |
| * a way that survives across transactions |
| * - start a new transaction every time a new relation is clustered |
| * - check for validity of the information on to-be-clustered relations, |
| * as someone might have deleted a relation behind our back, or |
| * clustered one on a different index |
| * - end the transaction |
| * |
| * The single-relation case does not have any such overhead. |
| * |
| * We also allow a relation to be specified without index. In that case, |
| * the indisclustered bit will be looked up, and an ERROR will be thrown |
| * if there is no index with the bit set. |
| *--------------------------------------------------------------------------- |
| */ |
| void |
| cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) |
| { |
| ListCell *lc; |
| ClusterParams params = {0}; |
| bool verbose = false; |
| |
| /* Parse option list */ |
| foreach(lc, stmt->params) |
| { |
| DefElem *opt = (DefElem *) lfirst(lc); |
| |
| if (strcmp(opt->defname, "verbose") == 0) |
| verbose = defGetBoolean(opt); |
| else |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| errmsg("unrecognized CLUSTER option \"%s\"", |
| opt->defname), |
| parser_errposition(pstate, opt->location))); |
| } |
| |
| params.options = (verbose ? CLUOPT_VERBOSE : 0); |
| |
| if (stmt->relation != NULL) |
| { |
| /* This is the single-relation case. */ |
| Oid tableOid, |
| indexOid = InvalidOid; |
| Relation rel; |
| |
| /* Find, lock, and check permissions on the table */ |
| tableOid = RangeVarGetRelidExtended(stmt->relation, |
| AccessExclusiveLock, |
| 0, |
| RangeVarCallbackOwnsTable, NULL); |
| rel = table_open(tableOid, NoLock); |
| |
| /* |
| * Reject clustering a remote temp table ... their local buffer |
| * manager is not going to cope. |
| */ |
| if (RELATION_IS_OTHER_TEMP(rel)) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot cluster temporary tables of other sessions"))); |
| |
| /* |
| * Reject clustering a partitioned table. |
| */ |
| if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot cluster a partitioned table"))); |
| |
| if (stmt->indexname == NULL) |
| { |
| ListCell *index; |
| |
| /* We need to find the index that has indisclustered set. */ |
| foreach(index, RelationGetIndexList(rel)) |
| { |
| indexOid = lfirst_oid(index); |
| if (get_index_isclustered(indexOid)) |
| break; |
| indexOid = InvalidOid; |
| } |
| |
| if (!OidIsValid(indexOid)) |
| ereport(ERROR, |
| (errcode(ERRCODE_UNDEFINED_OBJECT), |
| errmsg("there is no previously clustered index for table \"%s\"", |
| stmt->relation->relname))); |
| } |
| else |
| { |
| /* |
| * The index is expected to be in the same namespace as the |
| * relation. |
| */ |
| indexOid = get_relname_relid(stmt->indexname, |
| rel->rd_rel->relnamespace); |
| if (!OidIsValid(indexOid)) |
| ereport(ERROR, |
| (errcode(ERRCODE_UNDEFINED_OBJECT), |
| errmsg("index \"%s\" for table \"%s\" does not exist", |
| stmt->indexname, stmt->relation->relname))); |
| } |
| |
| /* close relation, keep lock till commit */ |
| table_close(rel, NoLock); |
| |
| /* Do the job. */ |
| /* GPDB_14_MERGE_FIXME: do we need the return value of cluster_rel to dispatch ? */ |
| cluster_rel(tableOid, indexOid, ¶ms); |
| |
| if (Gp_role == GP_ROLE_DISPATCH) |
| { |
| CdbDispatchUtilityStatement((Node *) stmt, |
| DF_CANCEL_ON_ERROR| |
| DF_WITH_SNAPSHOT| |
| DF_NEED_TWO_PHASE, |
| GetAssignedOidsForDispatch(), |
| NULL); |
| } |
| |
| if (IS_QD_OR_SINGLENODE()) |
| { |
| /* |
| * Update view status. |
| * In principle, CLUSTER command won't change the ligical data of |
| * a table, it may change the physical pages by index. |
| * But for Append Agg Plan in SERVERLESS mode, we need to fetch |
| * delta tuples from base table which requires the ability of storage |
| * to distint the pages instead, since latest relative materialized |
| * view REFRESH. |
| */ |
| SetRelativeMatviewAuxStatus(tableOid, |
| MV_DATA_STATUS_UP_REORGANIZED, |
| MV_DATA_STATUS_TRANSFER_DIRECTION_ALL); |
| |
| } |
| } |
| else |
| { |
| /* |
| * This is the "multi relation" case. We need to cluster all tables |
| * that have some index with indisclustered set. |
| */ |
| MemoryContext cluster_context; |
| List *rvs; |
| ListCell *rv; |
| |
| /* |
| * We cannot run this form of CLUSTER inside a user transaction block; |
| * we'd be holding locks way too long. |
| */ |
| PreventInTransactionBlock(isTopLevel, "CLUSTER"); |
| |
| /* |
| * Create special memory context for cross-transaction storage. |
| * |
| * Since it is a child of PortalContext, it will go away even in case |
| * of error. |
| */ |
| cluster_context = AllocSetContextCreate(PortalContext, |
| "Cluster", |
| ALLOCSET_DEFAULT_SIZES); |
| |
| /* |
| * Build the list of relations to cluster. Note that this lives in |
| * cluster_context. |
| */ |
| rvs = get_tables_to_cluster(cluster_context); |
| |
| /* Commit to get out of starting transaction */ |
| PopActiveSnapshot(); |
| CommitTransactionCommand(); |
| |
| /* Ok, now that we've got them all, cluster them one by one */ |
| foreach(rv, rvs) |
| { |
| RelToCluster *rvtc = (RelToCluster *) lfirst(rv); |
| bool dispatch; |
| ClusterParams cluster_params = params; |
| |
| /* Start a new transaction for each relation. */ |
| StartTransactionCommand(); |
| /* functions in indexes may want a snapshot set */ |
| PushActiveSnapshot(GetTransactionSnapshot()); |
| /* Do the job. */ |
| cluster_params.options |= CLUOPT_RECHECK; |
| dispatch = cluster_rel(rvtc->tableOid, rvtc->indexOid, |
| &cluster_params); |
| |
| if (Gp_role == GP_ROLE_DISPATCH && dispatch) |
| { |
| stmt->relation = makeNode(RangeVar); |
| stmt->relation->schemaname = get_namespace_name(get_rel_namespace(rvtc->tableOid)); |
| stmt->relation->relname = get_rel_name(rvtc->tableOid); |
| /* other fields in stmt are same */ |
| CdbDispatchUtilityStatement((Node *) stmt, |
| DF_CANCEL_ON_ERROR| |
| DF_WITH_SNAPSHOT, |
| GetAssignedOidsForDispatch(), |
| NULL); |
| } |
| /* See comments above. */ |
| if (IS_QD_OR_SINGLENODE()) |
| SetRelativeMatviewAuxStatus(rvtc->tableOid, |
| MV_DATA_STATUS_UP_REORGANIZED, |
| MV_DATA_STATUS_TRANSFER_DIRECTION_ALL); |
| |
| PopActiveSnapshot(); |
| CommitTransactionCommand(); |
| } |
| |
| /* Start a new transaction for the cleanup work. */ |
| StartTransactionCommand(); |
| |
| /* Clean up working storage */ |
| MemoryContextDelete(cluster_context); |
| } |
| } |
| |
| /* |
| * cluster_rel |
| * |
| * This clusters the table by creating a new, clustered table and |
| * swapping the relfilenodes of the new table and the old table, so |
| * the OID of the original table is preserved. Thus we do not lose |
| * GRANT, inheritance nor references to this table (this was a bug |
| * in releases through 7.3). |
| * |
| * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading |
| * the new table, it's better to create the indexes afterwards than to fill |
| * them incrementally while we load the table. |
| * |
| * If indexOid is InvalidOid, the table will be rewritten in physical order |
| * instead of index order. This is the new implementation of VACUUM FULL, |
| * and error messages should refer to the operation as VACUUM not CLUSTER. |
| * |
| */ |
| bool |
| cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) |
| { |
| Relation OldHeap; |
| Oid save_userid; |
| int save_sec_context; |
| int save_nestlevel; |
| bool verbose = ((params->options & CLUOPT_VERBOSE) != 0); |
| bool recheck = ((params->options & CLUOPT_RECHECK) != 0); |
| bool result = false; |
| |
| /* Check for user-requested abort. */ |
| CHECK_FOR_INTERRUPTS(); |
| |
| pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid); |
| if (OidIsValid(indexOid)) |
| pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND, |
| PROGRESS_CLUSTER_COMMAND_CLUSTER); |
| else |
| pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND, |
| PROGRESS_CLUSTER_COMMAND_VACUUM_FULL); |
| |
| /* |
| * We grab exclusive access to the target rel and index for the duration |
| * of the transaction. (This is redundant for the single-transaction |
| * case, since cluster() already did it.) The index lock is taken inside |
| * check_index_is_clusterable. |
| */ |
| OldHeap = try_relation_open(tableOid, AccessExclusiveLock, false); |
| |
| /* If the table has gone away, we can skip processing it */ |
| if (!OldHeap) |
| { |
| pgstat_progress_end_command(); |
| return false; |
| } |
| |
| /* |
| * Switch to the table owner's userid, so that any index functions are run |
| * as that user. Also lock down security-restricted operations and |
| * arrange to make GUC variable changes local to this command. |
| */ |
| GetUserIdAndSecContext(&save_userid, &save_sec_context); |
| SetUserIdAndSecContext(OldHeap->rd_rel->relowner, |
| save_sec_context | SECURITY_RESTRICTED_OPERATION); |
| save_nestlevel = NewGUCNestLevel(); |
| |
| /* |
| * Since we may open a new transaction for each relation, we have to check |
| * that the relation still is what we think it is. |
| * |
| * If this is a single-transaction CLUSTER, we can skip these tests. We |
| * *must* skip the one on indisclustered since it would reject an attempt |
| * to cluster a not-previously-clustered index. |
| */ |
| if (recheck) |
| { |
| /* Check that the user still owns the relation */ |
| if (!pg_class_ownercheck(tableOid, save_userid)) |
| { |
| relation_close(OldHeap, AccessExclusiveLock); |
| goto out; |
| } |
| |
| /* |
| * Silently skip a temp table for a remote session. Only doing this |
| * check in the "recheck" case is appropriate (which currently means |
| * somebody is executing a database-wide CLUSTER), because there is |
| * another check in cluster() which will stop any attempt to cluster |
| * remote temp tables by name. There is another check in cluster_rel |
| * which is redundant, but we leave it for extra safety. |
| */ |
| if (RELATION_IS_OTHER_TEMP(OldHeap)) |
| { |
| relation_close(OldHeap, AccessExclusiveLock); |
| goto out; |
| } |
| |
| if (OidIsValid(indexOid)) |
| { |
| /* |
| * Check that the index still exists |
| */ |
| if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid))) |
| { |
| relation_close(OldHeap, AccessExclusiveLock); |
| goto out; |
| } |
| |
| /* |
| * Check that the index is still the one with indisclustered set. |
| */ |
| if (!get_index_isclustered(indexOid)) |
| { |
| relation_close(OldHeap, AccessExclusiveLock); |
| goto out; |
| } |
| } |
| } |
| |
| /* |
| * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER |
| * would work in most respects, but the index would only get marked as |
| * indisclustered in the current database, leading to unexpected behavior |
| * if CLUSTER were later invoked in another database. |
| */ |
| if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot cluster a shared catalog"))); |
| |
| /* |
| * Don't process temp tables of other backends ... their local buffer |
| * manager is not going to cope. |
| */ |
| if (RELATION_IS_OTHER_TEMP(OldHeap)) |
| { |
| if (OidIsValid(indexOid)) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot cluster temporary tables of other sessions"))); |
| else |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot vacuum temporary tables of other sessions"))); |
| } |
| |
| /* |
| * Also check for active uses of the relation in the current transaction, |
| * including open scans and pending AFTER trigger events. |
| */ |
| CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM"); |
| |
| /* Check heap and index are valid to cluster on */ |
| if (OidIsValid(indexOid)) |
| check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock); |
| |
| /* |
| * Quietly ignore the request if this is a materialized view which has not |
| * been populated from its query. No harm is done because there is no data |
| * to deal with, and we don't want to throw an error if this is part of a |
| * multi-relation request -- for example, CLUSTER was run on the entire |
| * database. |
| */ |
| if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW && |
| !RelationIsPopulated(OldHeap)) |
| { |
| relation_close(OldHeap, AccessExclusiveLock); |
| goto out; |
| } |
| |
| /* |
| * All predicate locks on the tuples or pages are about to be made |
| * invalid, because we move tuples around. Promote them to relation |
| * locks. Predicate locks on indexes will be promoted when they are |
| * reindexed. |
| */ |
| TransferPredicateLocksToHeapRelation(OldHeap); |
| |
| /* rebuild_relation does all the dirty work */ |
| rebuild_relation(OldHeap, indexOid, verbose); |
| |
| /* NB: rebuild_relation does table_close() on OldHeap */ |
| |
| result = true; |
| |
| out: |
| /* Roll back any GUC changes executed by index functions */ |
| AtEOXact_GUC(false, save_nestlevel); |
| |
| /* Restore userid and security context */ |
| SetUserIdAndSecContext(save_userid, save_sec_context); |
| |
| pgstat_progress_end_command(); |
| |
| return result; |
| } |
| |
| /* |
| * Verify that the specified heap and index are valid to cluster on |
| * |
| * Side effect: obtains lock on the index. The caller may |
| * in some cases already have AccessExclusiveLock on the table, but |
| * not in all cases so we can't rely on the table-level lock for |
| * protection here. |
| */ |
| void |
| check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMODE lockmode) |
| { |
| Relation OldIndex; |
| |
| OldIndex = index_open(indexOid, lockmode); |
| |
| /* |
| * Check that index is in fact an index on the given relation |
| */ |
| if (OldIndex->rd_index == NULL || |
| OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap)) |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("\"%s\" is not an index for table \"%s\"", |
| RelationGetRelationName(OldIndex), |
| RelationGetRelationName(OldHeap)))); |
| |
| /* Index AM must allow clustering */ |
| if (!OldIndex->rd_indam->amclusterable) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot cluster on index \"%s\" because access method does not support clustering", |
| RelationGetRelationName(OldIndex)))); |
| |
| /* |
| * Disallow clustering on incomplete indexes (those that might not index |
| * every row of the relation). We could relax this by making a separate |
| * seqscan pass over the table to copy the missing rows, but that seems |
| * expensive and tedious. |
| */ |
| if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL)) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot cluster on partial index \"%s\"", |
| RelationGetRelationName(OldIndex)))); |
| |
| /* |
| * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY; |
| * it might well not contain entries for every heap row, or might not even |
| * be internally consistent. (But note that we don't check indcheckxmin; |
| * the worst consequence of following broken HOT chains would be that we |
| * might put recently-dead tuples out-of-order in the new table, and there |
| * is little harm in that.) |
| */ |
| if (!OldIndex->rd_index->indisvalid) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot cluster on invalid index \"%s\"", |
| RelationGetRelationName(OldIndex)))); |
| |
| /* Drop relcache refcnt on OldIndex, but keep lock */ |
| index_close(OldIndex, NoLock); |
| } |
| |
| /* |
| * mark_index_clustered: mark the specified index as the one clustered on |
| * |
| * With indexOid == InvalidOid, will mark all indexes of rel not-clustered. |
| */ |
| void |
| mark_index_clustered(Relation rel, Oid indexOid, bool is_internal) |
| { |
| HeapTuple indexTuple; |
| Form_pg_index indexForm; |
| Relation pg_index; |
| ListCell *index; |
| |
| /* Disallow applying to a partitioned table */ |
| if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot mark index clustered in partitioned table"))); |
| |
| /* |
| * If the index is already marked clustered, no need to do anything. |
| */ |
| if (OidIsValid(indexOid)) |
| { |
| if (get_index_isclustered(indexOid)) |
| return; |
| } |
| |
| /* |
| * Check each index of the relation and set/clear the bit as needed. |
| */ |
| pg_index = table_open(IndexRelationId, RowExclusiveLock); |
| |
| foreach(index, RelationGetIndexList(rel)) |
| { |
| Oid thisIndexOid = lfirst_oid(index); |
| |
| indexTuple = SearchSysCacheCopy1(INDEXRELID, |
| ObjectIdGetDatum(thisIndexOid)); |
| if (!HeapTupleIsValid(indexTuple)) |
| elog(ERROR, "cache lookup failed for index %u", thisIndexOid); |
| indexForm = (Form_pg_index) GETSTRUCT(indexTuple); |
| |
| /* |
| * Unset the bit if set. We know it's wrong because we checked this |
| * earlier. |
| */ |
| if (indexForm->indisclustered) |
| { |
| indexForm->indisclustered = false; |
| CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); |
| } |
| else if (thisIndexOid == indexOid) |
| { |
| /* this was checked earlier, but let's be real sure */ |
| if (!indexForm->indisvalid) |
| elog(ERROR, "cannot cluster on invalid index %u", indexOid); |
| indexForm->indisclustered = true; |
| CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); |
| } |
| |
| InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0, |
| InvalidOid, is_internal); |
| |
| heap_freetuple(indexTuple); |
| } |
| |
| table_close(pg_index, RowExclusiveLock); |
| } |
| |
| /* |
| * rebuild_relation: rebuild an existing relation in index or physical order |
| * |
| * OldHeap: table to rebuild --- must be opened and exclusive-locked! |
| * indexOid: index to cluster by, or InvalidOid to rewrite in physical order. |
| * |
| * NB: this routine closes OldHeap at the right time; caller should not. |
| */ |
| static void |
| rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose) |
| { |
| Oid tableOid = RelationGetRelid(OldHeap); |
| Oid accessMethod = OldHeap->rd_rel->relam; |
| Oid tableSpace = OldHeap->rd_rel->reltablespace; |
| Oid OIDNewHeap; |
| char relpersistence; |
| bool is_system_catalog; |
| bool swap_toast_by_content; |
| TransactionId frozenXid; |
| MultiXactId cutoffMulti; |
| /* |
| * GPDB_12_MERGE_FIXME: We use specific bool in abstract code. This should |
| * be somehow hidden by table am api or necessity of this switch should be |
| * revisited. |
| */ |
| bool is_ao = RelationIsAppendOptimized(OldHeap); |
| |
| /* Mark the correct index as clustered */ |
| if (OidIsValid(indexOid)) |
| mark_index_clustered(OldHeap, indexOid, true); |
| |
| /* Remember info about rel before closing OldHeap */ |
| relpersistence = OldHeap->rd_rel->relpersistence; |
| is_system_catalog = IsSystemRelation(OldHeap); |
| |
| /* Close relcache entry, but keep lock until transaction commit */ |
| table_close(OldHeap, NoLock); |
| |
| /* Create the transient table that will receive the re-ordered data */ |
| OIDNewHeap = make_new_heap(tableOid, tableSpace, |
| accessMethod, |
| relpersistence, |
| AccessExclusiveLock, |
| true /* createAoBlockDirectory */, |
| false); |
| |
| /* Copy the heap data into the new table in the desired order */ |
| copy_table_data(OIDNewHeap, tableOid, indexOid, verbose, |
| &swap_toast_by_content, &frozenXid, &cutoffMulti); |
| |
| /* |
| * Swap the physical files of the target and transient tables, then |
| * rebuild the target's indexes and throw away the transient table. |
| */ |
| finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog, |
| swap_toast_by_content, |
| !is_ao /* swap_stats */, |
| false, true, |
| frozenXid, cutoffMulti, |
| relpersistence); |
| } |
| |
| static char * |
| make_column_name(char *prefix, char *colname) |
| { |
| StringInfoData namebuf; |
| |
| initStringInfo(&namebuf); |
| appendStringInfo(&namebuf, "%s%s", prefix, colname); |
| return namebuf.data; |
| } |
| |
| /* |
| * Create the transient table that will be filled with new data during |
| * CLUSTER, ALTER TABLE, and similar operations. The transient table |
| * duplicates the logical structure of the OldHeap; but will have the |
| * specified physical storage properties NewTableSpace, NewAccessMethod, and |
| * relpersistence. |
| * |
| * Specify a colprefix can create a table with different colname, incase |
| * column conflict issue happens in REFRESH MATERIALIZED VIEW operation. |
| * |
| * After this, the caller should load the new heap with transferred/modified |
| * data, then call finish_heap_swap to complete the operation. |
| */ |
| Oid |
| make_new_heap_with_colname(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod, |
| char relpersistence, |
| LOCKMODE lockmode, |
| bool createAoBlockDirectory, |
| bool makeCdbPolicy, |
| char *colprefix) |
| { |
| TupleDesc OldHeapDesc; |
| char NewHeapName[NAMEDATALEN]; |
| Oid OIDNewHeap; |
| Oid toastid; |
| Relation OldHeap; |
| HeapTuple tuple; |
| Datum reloptions; |
| bool isNull; |
| Oid namespaceid; |
| |
| OldHeap = table_open(OIDOldHeap, lockmode); |
| OldHeapDesc = RelationGetDescr(OldHeap); |
| |
| if (colprefix != NULL) |
| { |
| for (int i = 0; i < OldHeapDesc->natts; i++) |
| { |
| Form_pg_attribute attr = TupleDescAttr(OldHeapDesc, i); |
| char *attname = make_column_name(colprefix, NameStr(attr->attname)); |
| namestrcpy(&(attr->attname), attname); |
| pfree(attname); |
| } |
| } |
| /* |
| * Note that the NewHeap will not receive any of the defaults or |
| * constraints associated with the OldHeap; we don't need 'em, and there's |
| * no reason to spend cycles inserting them into the catalogs only to |
| * delete them. |
| */ |
| |
| /* |
| * But we do want to use reloptions of the old heap for new heap. |
| */ |
| tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap)); |
| if (!HeapTupleIsValid(tuple)) |
| elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap); |
| reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions, |
| &isNull); |
| if (isNull) |
| reloptions = (Datum) 0; |
| |
| /* |
| * Unless we are changing access method between heap and AO/CO, look further. |
| */ |
| /* |
| * GPDB: some considerations when AM is going to change between heap and AO/CO: |
| * |
| * If user has also requested setting new reloptions, the new reloptions should have |
| * replaced the old ones at this point. We just need to reuse those on the new table. |
| * |
| * If user does NOT request new reloptions, we should discard the existing reloptions. |
| * And one more consideration if we are changing the table from heap to AO: we should |
| * also pick up options from gp_default_storage_options, just like CREATE TABLE does. |
| */ |
| if (RelationIsHeap(OldHeap) && IsAccessMethodAO(NewAccessMethod)) |
| { |
| /* |
| * Heap to AO/CO: filter out any reloptions that belong to heap, |
| * and pick up from gp_default_storage_options. |
| */ |
| int numoptions; |
| relopt_value *options; |
| |
| /* |
| * Process the reloptions as for AO tables. And validate=false will silently |
| * filter out any reloptions that belong to heap. |
| */ |
| StdRdOptions *stdRdOptions = (StdRdOptions *)default_reloptions(reloptions, |
| false, /* validate */ |
| RELOPT_KIND_APPENDOPTIMIZED); |
| |
| /* Pick up from gp_default_storage_options. */ |
| options = parseRelOptions(reloptions, false, RELOPT_KIND_APPENDOPTIMIZED, &numoptions); |
| validate_and_refill_options(stdRdOptions, options, numoptions, RELOPT_KIND_APPENDOPTIMIZED, true); |
| |
| /* Update the reloptions string. */ |
| reloptions = transformAOStdRdOptions(stdRdOptions, reloptions); |
| } |
| else if (RelationIsAppendOptimized(OldHeap) && NewAccessMethod == HEAP_TABLE_AM_OID) |
| { |
| /* |
| * AO/CO to Heap: unfortunately we don't have a convenient routine to transform |
| * heap StdRdOptions back to reloption string. So we take a slightly different |
| * approach than the case of heap to AO/CO: we check if there is any AO reloptions: |
| * |
| * (1) If there is, just discard them (AO options do not apply to heap). |
| * (2) If there is none, that means we either have replaced it with heap reloptions |
| * or the reloptions field is just empty, and either way we will pass the existing |
| * reloptions on to the new table. |
| * |
| * This is possible because at this point we only have either AO/AOCO reloptions or |
| * heap reloptions, but we cannot have both (see ATExecSetRelOptions). |
| */ |
| Datum aoreloptions = (Datum) 0; |
| StdRdOptions *stdRdOptions = (StdRdOptions *)default_reloptions(reloptions, |
| false, /* validate */ |
| RELOPT_KIND_APPENDOPTIMIZED); |
| |
| /* |
| * Transform the stdRdOptions to get a reloptions string, from which we will |
| * know if there is any AO reloptions. |
| */ |
| aoreloptions = transformAOStdRdOptions(stdRdOptions, aoreloptions); |
| if (aoreloptions != (Datum) 0) |
| reloptions = (Datum) 0; |
| } |
| |
| if (relpersistence == RELPERSISTENCE_TEMP) |
| namespaceid = LookupCreationNamespace("pg_temp"); |
| else |
| namespaceid = RelationGetNamespace(OldHeap); |
| |
| /* |
| * Create the new heap, using a temporary name in the same namespace as |
| * the existing table. NOTE: there is some risk of collision with user |
| * relnames. Working around this seems more trouble than it's worth; in |
| * particular, we can't create the new heap in a different namespace from |
| * the old, or we will have problems with the TEMP status of temp tables. |
| * |
| * Note: the new heap is not a shared relation, even if we are rebuilding |
| * a shared rel. However, we do make the new heap mapped if the source is |
| * mapped. This simplifies swap_relation_files, and is absolutely |
| * necessary for rebuilding pg_class, for reasons explained there. |
| */ |
| snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap); |
| |
| OIDNewHeap = heap_create_with_catalog(NewHeapName, |
| namespaceid, |
| NewTableSpace, |
| InvalidOid, |
| InvalidOid, |
| InvalidOid, |
| OldHeap->rd_rel->relowner, |
| NewAccessMethod, |
| OldHeapDesc, |
| NIL, |
| RELKIND_RELATION, |
| relpersistence, |
| false, |
| RelationIsMapped(OldHeap), |
| ONCOMMIT_NOOP, |
| makeCdbPolicy? OldHeap->rd_cdbpolicy: NULL,/*CDB*/ |
| reloptions, |
| false, |
| true, |
| true, |
| OIDOldHeap, |
| NULL, |
| true); |
| Assert(OIDNewHeap != InvalidOid); |
| |
| ReleaseSysCache(tuple); |
| |
| /* |
| * Advance command counter so that the newly-created relation's catalog |
| * tuples will be visible to table_open. |
| */ |
| CommandCounterIncrement(); |
| |
| /* |
| * If necessary, create a TOAST table for the new relation, or an Append |
| * Only segment table. |
| * |
| * If the relation doesn't have a TOAST table already, we can't need one |
| * for the new relation. The other way around is possible though: if some |
| * wide columns have been dropped, NewHeapCreateToastTable can decide that |
| * no TOAST table is needed for the new table. |
| * |
| * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so |
| * that the TOAST table will be visible for insertion. |
| */ |
| toastid = OldHeap->rd_rel->reltoastrelid; |
| if (OidIsValid(toastid)) |
| { |
| /* keep the existing toast table's reloptions, if any */ |
| tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid)); |
| if (!HeapTupleIsValid(tuple)) |
| elog(ERROR, "cache lookup failed for relation %u", toastid); |
| reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions, |
| &isNull); |
| if (isNull) |
| reloptions = (Datum) 0; |
| |
| NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid); |
| |
| ReleaseSysCache(tuple); |
| } |
| |
| if (IsAccessMethodAO(NewAccessMethod)) |
| NewRelationCreateAOAuxTables(OIDNewHeap, createAoBlockDirectory); |
| |
| CacheInvalidateRelcacheByRelid(OIDNewHeap); |
| |
| /* |
| * Copy the pg_attribute_encoding entries over if new table needs them. |
| * Note that in the case of AM change from heap/ao to aoco, we still need |
| * to do this since we created those entries for the heap/ao table at the |
| * phase 2 of ATSETAM (see ATExecCmd). |
| */ |
| if (NewAccessMethod == AO_COLUMN_TABLE_AM_OID) |
| cloneAttributeEncoding(OIDOldHeap, |
| OIDNewHeap, |
| RelationGetNumberOfAttributes(OldHeap)); |
| |
| table_close(OldHeap, NoLock); |
| |
| return OIDNewHeap; |
| } |
| |
| Oid |
| make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod, |
| char relpersistence, |
| LOCKMODE lockmode, |
| bool createAoBlockDirectory, |
| bool makeCdbPolicy) |
| { |
| return make_new_heap_with_colname(OIDOldHeap, NewTableSpace, NewAccessMethod, |
| relpersistence, lockmode, createAoBlockDirectory, makeCdbPolicy, |
| NULL); |
| |
| } |
| |
| /* |
| * Do the physical copying of table data. |
| * |
| * There are three output parameters: |
| * *pSwapToastByContent is set true if toast tables must be swapped by content. |
| * *pFreezeXid receives the TransactionId used as freeze cutoff point. |
| * *pCutoffMulti receives the MultiXactId used as a cutoff point. |
| */ |
| static void |
| copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, |
| bool *pSwapToastByContent, TransactionId *pFreezeXid, |
| MultiXactId *pCutoffMulti) |
| { |
| Relation NewHeap, |
| OldHeap, |
| OldIndex; |
| Relation relRelation; |
| HeapTuple reltup; |
| Form_pg_class relform; |
| TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY; |
| TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY; |
| TransactionId OldestXmin; |
| TransactionId FreezeXid; |
| MultiXactId MultiXactCutoff; |
| bool use_sort; |
| double num_tuples = 0, |
| tups_vacuumed = 0, |
| tups_recently_dead = 0; |
| BlockNumber num_pages; |
| int elevel = verbose ? INFO : DEBUG2; |
| PGRUsage ru0; |
| |
| pg_rusage_init(&ru0); |
| |
| /* |
| * Open the relations we need. |
| */ |
| NewHeap = table_open(OIDNewHeap, AccessExclusiveLock); |
| OldHeap = table_open(OIDOldHeap, AccessExclusiveLock); |
| if (OidIsValid(OIDOldIndex)) |
| OldIndex = index_open(OIDOldIndex, AccessExclusiveLock); |
| else |
| OldIndex = NULL; |
| |
| /* |
| * Their tuple descriptors should be exactly alike, but here we only need |
| * assume that they have the same number of columns. |
| */ |
| oldTupDesc = RelationGetDescr(OldHeap); |
| newTupDesc = RelationGetDescr(NewHeap); |
| Assert(newTupDesc->natts == oldTupDesc->natts); |
| |
| /* |
| * If the OldHeap has a toast table, get lock on the toast table to keep |
| * it from being vacuumed. This is needed because autovacuum processes |
| * toast tables independently of their main tables, with no lock on the |
| * latter. If an autovacuum were to start on the toast table after we |
| * compute our OldestXmin below, it would use a later OldestXmin, and then |
| * possibly remove as DEAD toast tuples belonging to main tuples we think |
| * are only RECENTLY_DEAD. Then we'd fail while trying to copy those |
| * tuples. |
| * |
| * We don't need to open the toast relation here, just lock it. The lock |
| * will be held till end of transaction. |
| */ |
| if (OldHeap->rd_rel->reltoastrelid) |
| LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock); |
| |
| /* |
| * If both tables have TOAST tables, perform toast swap by content. It is |
| * possible that the old table has a toast table but the new one doesn't, |
| * if toastable columns have been dropped. In that case we have to do |
| * swap by links. This is okay because swap by content is only essential |
| * for system catalogs, and we don't support schema changes for them. |
| */ |
| if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid) |
| { |
| *pSwapToastByContent = true; |
| |
| /* |
| * When doing swap by content, any toast pointers written into NewHeap |
| * must use the old toast table's OID, because that's where the toast |
| * data will eventually be found. Set this up by setting rd_toastoid. |
| * This also tells toast_save_datum() to preserve the toast value |
| * OIDs, which we want so as not to invalidate toast pointers in |
| * system catalog caches, and to avoid making multiple copies of a |
| * single toast value. |
| * |
| * Note that we must hold NewHeap open until we are done writing data, |
| * since the relcache will not guarantee to remember this setting once |
| * the relation is closed. Also, this technique depends on the fact |
| * that no one will try to read from the NewHeap until after we've |
| * finished writing it and swapping the rels --- otherwise they could |
| * follow the toast pointers to the wrong place. (It would actually |
| * work for values copied over from the old toast table, but not for |
| * any values that we toast which were previously not toasted.) |
| */ |
| NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid; |
| } |
| else |
| *pSwapToastByContent = false; |
| |
| /* |
| * Compute xids used to freeze and weed out dead tuples and multixacts. |
| * Since we're going to rewrite the whole table anyway, there's no reason |
| * not to be aggressive about this. |
| */ |
| vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, |
| &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff, |
| NULL); |
| |
| /* |
| * FreezeXid will become the table's new relfrozenxid, and that mustn't go |
| * backwards, so take the max. |
| */ |
| if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) && |
| TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid)) |
| FreezeXid = OldHeap->rd_rel->relfrozenxid; |
| |
| /* |
| * MultiXactCutoff, similarly, shouldn't go backwards either. |
| */ |
| if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) && |
| MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid)) |
| MultiXactCutoff = OldHeap->rd_rel->relminmxid; |
| |
| /* |
| * Decide whether to use an indexscan or seqscan-and-optional-sort to scan |
| * the OldHeap. We know how to use a sort to duplicate the ordering of a |
| * btree index, and will use seqscan-and-sort for that case if the planner |
| * tells us it's cheaper. Otherwise, always indexscan if an index is |
| * provided, else plain seqscan. |
| */ |
| if (OldIndex != NULL && IsIndexAccessMethod(OldIndex->rd_rel->relam, BTREE_AM_OID)) |
| use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex); |
| else |
| use_sort = false; |
| |
| /* Log what we're doing */ |
| if (OldIndex != NULL && !use_sort) |
| ereport(elevel, |
| (errmsg("clustering \"%s.%s\" using index scan on \"%s\"", |
| get_namespace_name(RelationGetNamespace(OldHeap)), |
| RelationGetRelationName(OldHeap), |
| RelationGetRelationName(OldIndex)))); |
| else if (use_sort) |
| ereport(elevel, |
| (errmsg("clustering \"%s.%s\" using sequential scan and sort", |
| get_namespace_name(RelationGetNamespace(OldHeap)), |
| RelationGetRelationName(OldHeap)))); |
| else |
| ereport(elevel, |
| (errmsg("vacuuming \"%s.%s\"", |
| get_namespace_name(RelationGetNamespace(OldHeap)), |
| RelationGetRelationName(OldHeap)))); |
| |
| /* |
| * Hand of the actual copying to AM specific function, the generic code |
| * cannot know how to deal with visibility across AMs. Note that this |
| * routine is allowed to set FreezeXid / MultiXactCutoff to different |
| * values (e.g. because the AM doesn't use freezing). |
| */ |
| table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort, |
| OldestXmin, &FreezeXid, &MultiXactCutoff, |
| &num_tuples, &tups_vacuumed, |
| &tups_recently_dead); |
| |
| /* return selected values to caller, get set as relfrozenxid/minmxid */ |
| *pFreezeXid = FreezeXid; |
| *pCutoffMulti = MultiXactCutoff; |
| |
| /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */ |
| NewHeap->rd_toastoid = InvalidOid; |
| |
| num_pages = RelationGetNumberOfBlocks(NewHeap); |
| |
| /* Log what we did */ |
| ereport(elevel, |
| (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages", |
| RelationGetRelationName(OldHeap), |
| tups_vacuumed, num_tuples, |
| RelationGetNumberOfBlocks(OldHeap)), |
| errdetail("%.0f dead row versions cannot be removed yet.\n" |
| "%s.", |
| tups_recently_dead, |
| pg_rusage_show(&ru0)))); |
| |
| if (OldIndex != NULL) |
| index_close(OldIndex, NoLock); |
| table_close(OldHeap, NoLock); |
| table_close(NewHeap, NoLock); |
| |
| /* Update pg_class to reflect the correct values of pages and tuples. */ |
| relRelation = table_open(RelationRelationId, RowExclusiveLock); |
| |
| reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap)); |
| if (!HeapTupleIsValid(reltup)) |
| elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap); |
| relform = (Form_pg_class) GETSTRUCT(reltup); |
| |
| relform->relpages = num_pages; |
| relform->reltuples = num_tuples; |
| |
| /* Don't update the stats for pg_class. See swap_relation_files. */ |
| if (OIDOldHeap != RelationRelationId) |
| CatalogTupleUpdate(relRelation, &reltup->t_self, reltup); |
| else |
| CacheInvalidateRelcacheByTuple(reltup); |
| |
| /* Clean up. */ |
| heap_freetuple(reltup); |
| table_close(relRelation, RowExclusiveLock); |
| |
| /* Make the update visible */ |
| CommandCounterIncrement(); |
| } |
| |
| /* |
| * Change dependency links for objects that are being swapped. |
| * |
| * 'tabletype' can be "TOAST table", "aoseg", "aoblkdir". |
| * It is used for printing error messages. |
| */ |
| static void |
| changeDependencyLinks(Oid baseOid1, Oid baseOid2, Oid oid1, Oid oid2, |
| const char *tabletype) |
| { |
| ObjectAddress baseobject, newobject; |
| long count; |
| |
| /* Delete old dependencies */ |
| if (oid1) |
| { |
| count = deleteDependencyRecordsFor(RelationRelationId, oid1, false); |
| if (count != 1) |
| elog(ERROR, "expected one dependency record for %s table, found %ld", |
| tabletype, count); |
| } |
| |
| if (oid2) |
| { |
| count = deleteDependencyRecordsFor(RelationRelationId, oid2, false); |
| if (count != 1) |
| elog(ERROR, "expected one dependency record for %s table, found %ld", |
| tabletype, count); |
| } |
| |
| /* Register new dependencies */ |
| baseobject.classId = RelationRelationId; |
| baseobject.objectSubId = 0; |
| newobject.classId = RelationRelationId; |
| newobject.objectSubId = 0; |
| |
| if (oid1) |
| { |
| baseobject.objectId = baseOid1; |
| newobject.objectId = oid1; |
| recordDependencyOn(&newobject, &baseobject, DEPENDENCY_INTERNAL); |
| } |
| |
| if (oid2) |
| { |
| baseobject.objectId = baseOid2; |
| newobject.objectId = oid2; |
| recordDependencyOn(&newobject, &baseobject, DEPENDENCY_INTERNAL); |
| } |
| } |
| |
| /* |
| * Swap the physical files of two given relations. |
| * |
| * We swap the physical identity (reltablespace, relfilenode) while keeping the |
| * same logical identities of the two relations. relpersistence is also |
| * swapped, which is critical since it determines where buffers live for each |
| * relation. |
| * |
| * We can swap associated TOAST data in either of two ways: recursively swap |
| * the physical content of the toast tables (and their indexes), or swap the |
| * TOAST links in the given relations' pg_class entries. The former is needed |
| * to manage rewrites of shared catalogs (where we cannot change the pg_class |
| * links) while the latter is the only way to handle cases in which a toast |
| * table is added or removed altogether. |
| * |
| * Additionally, the first relation is marked with relfrozenxid set to |
| * frozenXid. It seems a bit ugly to have this here, but the caller would |
| * have to do it anyway, so having it here saves a heap_update. Note: in |
| * the swap-toast-links case, we assume we don't need to change the toast |
| * table's relfrozenxid: the new version of the toast table should already |
| * have relfrozenxid set to RecentXmin, which is good enough. |
| * |
| * Lastly, if r2 and its toast table and toast index (if any) are mapped, |
| * their OIDs are emitted into mapped_tables[]. This is hacky but beats |
| * having to look the information up again later in finish_heap_swap. |
| * |
| * GPDB: also swap aoseg, aoblkdir links. |
| */ |
| void |
| swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, |
| bool swap_toast_by_content, |
| bool swap_stats, |
| bool is_internal, |
| TransactionId frozenXid, |
| MultiXactId cutoffMulti, |
| Oid *mapped_tables) |
| { |
| Relation relRelation, |
| rel; |
| HeapTuple reltup1, |
| reltup2; |
| Form_pg_class relform1, |
| relform2; |
| Oid relfilenode1, |
| relfilenode2; |
| Oid swaptemp; |
| char swptmpchr; |
| |
| /* We need writable copies of both pg_class tuples. */ |
| relRelation = table_open(RelationRelationId, RowExclusiveLock); |
| |
| reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1)); |
| if (!HeapTupleIsValid(reltup1)) |
| elog(ERROR, "cache lookup failed for relation %u", r1); |
| relform1 = (Form_pg_class) GETSTRUCT(reltup1); |
| |
| reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2)); |
| if (!HeapTupleIsValid(reltup2)) |
| elog(ERROR, "cache lookup failed for relation %u", r2); |
| relform2 = (Form_pg_class) GETSTRUCT(reltup2); |
| |
| if (IsAccessMethodAO(relform1->relam) || IsAccessMethodAO(relform2->relam)) |
| ATAOEntries(relform1, relform2, frozenXid, cutoffMulti); |
| |
| /* Also swap reloptions if we are swaping between heap and AO/AOCO tables. */ |
| if ((relform1->relam == HEAP_TABLE_AM_OID && IsAccessMethodAO(relform2->relam)) || |
| (relform2->relam == HEAP_TABLE_AM_OID && IsAccessMethodAO(relform1->relam))) |
| { |
| Datum val[Natts_pg_class] = {0}; |
| bool null[Natts_pg_class] = {0}; |
| bool repl[Natts_pg_class] = {0}; |
| bool isNull; |
| |
| val[Anum_pg_class_reloptions - 1] = SysCacheGetAttr(RELOID, reltup2, Anum_pg_class_reloptions, &isNull); |
| null[Anum_pg_class_reloptions - 1] = isNull; |
| repl[Anum_pg_class_reloptions - 1] = true; |
| |
| reltup1 = heap_modify_tuple(reltup1, RelationGetDescr(relRelation), |
| val, null, repl); |
| relform1 = (Form_pg_class) GETSTRUCT(reltup1); |
| } |
| |
| relfilenode1 = relform1->relfilenode; |
| relfilenode2 = relform2->relfilenode; |
| |
| if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2)) |
| { |
| /* |
| * Normal non-mapped relations: swap relfilenodes, reltablespaces, |
| * relpersistence |
| */ |
| Assert(!target_is_pg_class); |
| |
| swaptemp = relform1->relfilenode; |
| relform1->relfilenode = relform2->relfilenode; |
| relform2->relfilenode = swaptemp; |
| |
| swaptemp = relform1->reltablespace; |
| relform1->reltablespace = relform2->reltablespace; |
| relform2->reltablespace = swaptemp; |
| |
| swaptemp = relform1->relam; |
| relform1->relam = relform2->relam; |
| relform2->relam = swaptemp; |
| |
| swptmpchr = relform1->relpersistence; |
| relform1->relpersistence = relform2->relpersistence; |
| relform2->relpersistence = swptmpchr; |
| |
| /* Also swap toast links, if we're swapping by links */ |
| if (!swap_toast_by_content) |
| { |
| swaptemp = relform1->reltoastrelid; |
| relform1->reltoastrelid = relform2->reltoastrelid; |
| relform2->reltoastrelid = swaptemp; |
| } |
| } |
| else |
| { |
| /* |
| * Mapped-relation case. Here we have to swap the relation mappings |
| * instead of modifying the pg_class columns. Both must be mapped. |
| */ |
| if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2)) |
| elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation", |
| NameStr(relform1->relname)); |
| |
| /* |
| * We can't change the tablespace nor persistence of a mapped rel, and |
| * we can't handle toast link swapping for one either, because we must |
| * not apply any critical changes to its pg_class row. These cases |
| * should be prevented by upstream permissions tests, so these checks |
| * are non-user-facing emergency backstop. |
| */ |
| if (relform1->reltablespace != relform2->reltablespace) |
| elog(ERROR, "cannot change tablespace of mapped relation \"%s\"", |
| NameStr(relform1->relname)); |
| if (relform1->relpersistence != relform2->relpersistence) |
| elog(ERROR, "cannot change persistence of mapped relation \"%s\"", |
| NameStr(relform1->relname)); |
| if (relform1->relam != relform2->relam) |
| elog(ERROR, "cannot change access method of mapped relation \"%s\"", |
| NameStr(relform1->relname)); |
| if (!swap_toast_by_content && |
| (relform1->reltoastrelid || relform2->reltoastrelid)) |
| elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"", |
| NameStr(relform1->relname)); |
| |
| /* |
| * Fetch the mappings --- shouldn't fail, but be paranoid |
| */ |
| relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared); |
| if (!OidIsValid(relfilenode1)) |
| elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", |
| NameStr(relform1->relname), r1); |
| relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared); |
| if (!OidIsValid(relfilenode2)) |
| elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", |
| NameStr(relform2->relname), r2); |
| |
| /* |
| * Send replacement mappings to relmapper. Note these won't actually |
| * take effect until CommandCounterIncrement. |
| */ |
| RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false); |
| RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false); |
| |
| /* Pass OIDs of mapped r2 tables back to caller */ |
| *mapped_tables++ = r2; |
| } |
| |
| /* |
| * Recognize that rel1's relfilenode (swapped from rel2) is new in this |
| * subtransaction. The rel2 storage (swapped from rel1) may or may not be |
| * new. |
| */ |
| { |
| Relation rel1, |
| rel2; |
| |
| rel1 = relation_open(r1, NoLock); |
| rel2 = relation_open(r2, NoLock); |
| rel2->rd_createSubid = rel1->rd_createSubid; |
| rel2->rd_newRelfilenodeSubid = rel1->rd_newRelfilenodeSubid; |
| rel2->rd_firstRelfilenodeSubid = rel1->rd_firstRelfilenodeSubid; |
| RelationAssumeNewRelfilenode(rel1); |
| relation_close(rel1, NoLock); |
| relation_close(rel2, NoLock); |
| } |
| |
| /* |
| * In the case of a shared catalog, these next few steps will only affect |
| * our own database's pg_class row; but that's okay, because they are all |
| * noncritical updates. That's also an important fact for the case of a |
| * mapped catalog, because it's possible that we'll commit the map change |
| * and then fail to commit the pg_class update. |
| */ |
| |
| /* set rel1's frozen Xid and minimum MultiXid */ |
| if (relform1->relkind != RELKIND_INDEX) |
| { |
| Assert(!TransactionIdIsValid(frozenXid) || |
| TransactionIdIsNormal(frozenXid)); |
| relform1->relfrozenxid = frozenXid; |
| Assert(MultiXactIdIsValid(cutoffMulti)); |
| relform1->relminmxid = cutoffMulti; |
| } |
| /* |
| * Cloudberry: append-optimized tables do not have a valid relfrozenxid. |
| * Overwrite the entry for both relations. |
| */ |
| if (relform1->relkind != RELKIND_INDEX && IsAccessMethodAO(relform1->relam)) |
| relform1->relfrozenxid = InvalidTransactionId; |
| if (relform2->relkind != RELKIND_INDEX && IsAccessMethodAO(relform2->relam)) |
| relform2->relfrozenxid = InvalidTransactionId; |
| |
| /* swap size statistics too, since new rel has freshly-updated stats */ |
| if (swap_stats) |
| { |
| int32 swap_pages; |
| float4 swap_tuples; |
| int32 swap_allvisible; |
| |
| swap_pages = relform1->relpages; |
| relform1->relpages = relform2->relpages; |
| relform2->relpages = swap_pages; |
| |
| swap_tuples = relform1->reltuples; |
| relform1->reltuples = relform2->reltuples; |
| relform2->reltuples = swap_tuples; |
| |
| swap_allvisible = relform1->relallvisible; |
| relform1->relallvisible = relform2->relallvisible; |
| relform2->relallvisible = swap_allvisible; |
| } |
| |
| /* |
| * Swap auxiliary tables if the table AM has non-standard structure. |
| * See the details of the callback swap_relation_files. |
| */ |
| if ((relform1->relkind == RELKIND_RELATION || |
| relform1->relkind == RELKIND_MATVIEW) |
| && (relform1->relam == PAX_AM_OID || |
| relform2->relam == PAX_AM_OID)) |
| { |
| const TableAmRoutine *tam; |
| Oid relam; |
| |
| relam = relform1->relam; |
| if (relam != relform2->relam) |
| elog(ERROR, "PAX not allow swap relation files for different AM"); |
| |
| tam = GetTableAmRoutineByAmId(relam); |
| if (tam->swap_relation_files) |
| tam->swap_relation_files(r1, r2, frozenXid, cutoffMulti); |
| } |
| |
| /* |
| * Update the tuples in pg_class --- unless the target relation of the |
| * swap is pg_class itself. In that case, there is zero point in making |
| * changes because we'd be updating the old data that we're about to throw |
| * away. Because the real work being done here for a mapped relation is |
| * just to change the relation map settings, it's all right to not update |
| * the pg_class rows in this case. The most important changes will instead |
| * performed later, in finish_heap_swap() itself. |
| */ |
| if (!target_is_pg_class) |
| { |
| CatalogIndexState indstate; |
| |
| indstate = CatalogOpenIndexes(relRelation); |
| CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1, |
| indstate); |
| CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2, |
| indstate); |
| CatalogCloseIndexes(indstate); |
| |
| /* |
| * Increment counter to reflect the AM change as the caller might soon |
| * build the new relation descriptor which expects consistent AM and aux |
| * tables. This shouldn't be needed for other cases as of now, especially |
| * not for critical catalogs such as pg_attribute. |
| */ |
| if (relform1->relam != relform2->relam) |
| CommandCounterIncrement(); |
| } |
| else |
| { |
| /* no update ... but we do still need relcache inval */ |
| CacheInvalidateRelcacheByTuple(reltup1); |
| CacheInvalidateRelcacheByTuple(reltup2); |
| } |
| |
| /* |
| * Post alter hook for modified relations. The change to r2 is always |
| * internal, but r1 depends on the invocation context. |
| */ |
| InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0, |
| InvalidOid, is_internal); |
| InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0, |
| InvalidOid, true); |
| |
| /* |
| * If we have toast tables associated with the relations being swapped, |
| * deal with them too. |
| */ |
| if (relform1->reltoastrelid || relform2->reltoastrelid) |
| { |
| if (swap_toast_by_content) |
| { |
| if (relform1->reltoastrelid && relform2->reltoastrelid) |
| { |
| /* Recursively swap the contents of the toast tables */ |
| swap_relation_files(relform1->reltoastrelid, |
| relform2->reltoastrelid, |
| target_is_pg_class, |
| swap_toast_by_content, |
| swap_stats, |
| is_internal, |
| frozenXid, |
| cutoffMulti, |
| mapped_tables); |
| } |
| else |
| { |
| /* caller messed up */ |
| elog(ERROR, "cannot swap toast files by content when there's only one"); |
| } |
| } |
| else |
| { |
| /* |
| * We swapped the ownership links, so we need to change dependency |
| * data to match. |
| * |
| * NOTE: it is possible that only one table has a toast table. |
| * |
| * NOTE: at present, a TOAST table's only dependency is the one on |
| * its owning table. If more are ever created, we'd need to use |
| * something more selective than deleteDependencyRecordsFor() to |
| * get rid of just the link we want. |
| */ |
| |
| /* |
| * We disallow this case for system catalogs, to avoid the |
| * possibility that the catalog we're rebuilding is one of the |
| * ones the dependency changes would change. It's too late to be |
| * making any data changes to the target catalog. |
| */ |
| if (IsSystemClass(r1, relform1)) |
| elog(ERROR, "cannot swap toast files by links for system catalogs"); |
| |
| /* Delete old dependencies */ |
| changeDependencyLinks(r1, r2, |
| relform1->reltoastrelid, relform2->reltoastrelid, |
| "TOAST"); |
| } |
| } |
| |
| #ifdef USE_ASSERT_CHECKING |
| /* |
| * Check with assert if AO table's toast table kept existing relfrozenxid unchanged. |
| * |
| * CLUSTER operation on append-optimized tables does not |
| * compute freeze limit (frozenXid) because AO tables do not |
| * have relfrozenxid. The toast tables need to keep existing |
| * relfrozenxid value unchanged in this case. |
| */ |
| if (swap_toast_by_content |
| && frozenXid == InvalidTransactionId |
| && relform1->relkind == RELKIND_TOASTVALUE |
| && relform2->relkind == RELKIND_TOASTVALUE) |
| { |
| Assert(relform1->relfrozenxid == relform2->relfrozenxid); |
| } |
| #endif |
| |
| /* |
| * If we're swapping two toast tables by content, do the same for their |
| * valid index. The swap can actually be safely done only if the relations |
| * have indexes. |
| */ |
| if (swap_toast_by_content && |
| relform1->relkind == RELKIND_TOASTVALUE && |
| relform2->relkind == RELKIND_TOASTVALUE) |
| { |
| Oid toastIndex1, |
| toastIndex2; |
| |
| /* Get valid index for each relation */ |
| toastIndex1 = toast_get_valid_index(r1, |
| AccessExclusiveLock); |
| toastIndex2 = toast_get_valid_index(r2, |
| AccessExclusiveLock); |
| |
| swap_relation_files(toastIndex1, |
| toastIndex2, |
| target_is_pg_class, |
| swap_toast_by_content, |
| swap_stats, |
| is_internal, |
| InvalidTransactionId, |
| InvalidMultiXactId, |
| mapped_tables); |
| } |
| |
| /* Send statistics from QE to QD */ |
| if (Gp_role == GP_ROLE_EXECUTE && swap_stats && !IsSystemClass(r1, relform1)) |
| { |
| rel = relation_open(r1, AccessShareLock); |
| |
| vac_send_relstats_to_qd(rel, |
| relform1->relpages, |
| relform1->reltuples, |
| relform1->relallvisible); |
| |
| relation_close(rel, AccessShareLock); |
| } |
| /* Clean up. */ |
| heap_freetuple(reltup1); |
| heap_freetuple(reltup2); |
| |
| table_close(relRelation, RowExclusiveLock); |
| |
| /* |
| * Close both relcache entries' smgr links. We need this kluge because |
| * both links will be invalidated during upcoming CommandCounterIncrement. |
| * Whichever of the rels is the second to be cleared will have a dangling |
| * reference to the other's smgr entry. Rather than trying to avoid this |
| * by ordering operations just so, it's easiest to close the links first. |
| * (Fortunately, since one of the entries is local in our transaction, |
| * it's sufficient to clear out our own relcache this way; the problem |
| * cannot arise for other backends when they see our update on the |
| * non-transient relation.) |
| * |
| * Caution: the placement of this step interacts with the decision to |
| * handle toast rels by recursion. When we are trying to rebuild pg_class |
| * itself, the smgr close on pg_class must happen after all accesses in |
| * this function. |
| */ |
| RelationCloseSmgrByOid(r1); |
| RelationCloseSmgrByOid(r2); |
| } |
| |
| /* |
| * Remove the transient table that was built by make_new_heap, and finish |
| * cleaning up (including rebuilding all indexes on the old heap). |
| */ |
| void |
| finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, |
| bool is_system_catalog, |
| bool swap_toast_by_content, |
| bool swap_stats, |
| bool check_constraints, |
| bool is_internal, |
| TransactionId frozenXid, |
| MultiXactId cutoffMulti, |
| char newrelpersistence) |
| { |
| ObjectAddress object; |
| Oid mapped_tables[4]; |
| int reindex_flags; |
| ReindexParams reindex_params = {0}; |
| int i; |
| |
| /* Report that we are now swapping relation files */ |
| pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, |
| PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES); |
| |
| /* Zero out possible results from swapped_relation_files */ |
| memset(mapped_tables, 0, sizeof(mapped_tables)); |
| |
| /* |
| * Swap the contents of the heap relations (including any toast tables). |
| * Also set old heap's relfrozenxid to frozenXid. |
| */ |
| swap_relation_files(OIDOldHeap, OIDNewHeap, |
| (OIDOldHeap == RelationRelationId), |
| swap_toast_by_content, |
| swap_stats, |
| is_internal, |
| frozenXid, cutoffMulti, mapped_tables); |
| |
| SIMPLE_FAULT_INJECTOR("after_swap_relation_files"); |
| |
| /* |
| * If it's a system catalog, queue a sinval message to flush all catcaches |
| * on the catalog when we reach CommandCounterIncrement. |
| */ |
| if (is_system_catalog) |
| CacheInvalidateCatalog(OIDOldHeap); |
| |
| /* |
| * Rebuild each index on the relation (but not the toast table, which is |
| * all-new at this point). It is important to do this before the DROP |
| * step because if we are processing a system catalog that will be used |
| * during DROP, we want to have its indexes available. There is no |
| * advantage to the other order anyway because this is all transactional, |
| * so no chance to reclaim disk space before commit. We do not need a |
| * final CommandCounterIncrement() because reindex_relation does it. |
| * |
| * Note: because index_build is called via reindex_relation, it will never |
| * set indcheckxmin true for the indexes. This is OK even though in some |
| * sense we are building new indexes rather than rebuilding existing ones, |
| * because the new heap won't contain any HOT chains at all, let alone |
| * broken ones, so it can't be necessary to set indcheckxmin. |
| */ |
| reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE; |
| if (check_constraints) |
| reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS; |
| |
| /* |
| * Ensure that the indexes have the same persistence as the parent |
| * relation. |
| */ |
| if (newrelpersistence == RELPERSISTENCE_UNLOGGED) |
| reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED; |
| else if (newrelpersistence == RELPERSISTENCE_PERMANENT) |
| reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT; |
| |
| /* Report that we are now reindexing relations */ |
| pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, |
| PROGRESS_CLUSTER_PHASE_REBUILD_INDEX); |
| |
| reindex_relation(OIDOldHeap, reindex_flags, &reindex_params); |
| |
| /* Report that we are now doing clean up */ |
| pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, |
| PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP); |
| |
| /* |
| * If the relation being rebuilt is pg_class, swap_relation_files() |
| * couldn't update pg_class's own pg_class entry (check comments in |
| * swap_relation_files()), thus relfrozenxid was not updated. That's |
| * annoying because a potential reason for doing a VACUUM FULL is a |
| * imminent or actual anti-wraparound shutdown. So, now that we can |
| * access the new relation using its indices, update relfrozenxid. |
| * pg_class doesn't have a toast relation, so we don't need to update the |
| * corresponding toast relation. Not that there's little point moving all |
| * relfrozenxid updates here since swap_relation_files() needs to write to |
| * pg_class for non-mapped relations anyway. |
| */ |
| if (OIDOldHeap == RelationRelationId) |
| { |
| Relation relRelation; |
| HeapTuple reltup; |
| Form_pg_class relform; |
| |
| relRelation = table_open(RelationRelationId, RowExclusiveLock); |
| |
| reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap)); |
| if (!HeapTupleIsValid(reltup)) |
| elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap); |
| relform = (Form_pg_class) GETSTRUCT(reltup); |
| |
| relform->relfrozenxid = frozenXid; |
| relform->relminmxid = cutoffMulti; |
| |
| CatalogTupleUpdate(relRelation, &reltup->t_self, reltup); |
| |
| table_close(relRelation, RowExclusiveLock); |
| } |
| |
| /* Destroy new heap with old filenode */ |
| object.classId = RelationRelationId; |
| object.objectId = OIDNewHeap; |
| object.objectSubId = 0; |
| |
| /* |
| * The new relation is local to our transaction and we know nothing |
| * depends on it, so DROP_RESTRICT should be OK. |
| */ |
| performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL); |
| |
| /* performDeletion does CommandCounterIncrement at end */ |
| |
| /* |
| * Now we must remove any relation mapping entries that we set up for the |
| * transient table, as well as its toast table and toast index if any. If |
| * we fail to do this before commit, the relmapper will complain about new |
| * permanent map entries being added post-bootstrap. |
| */ |
| for (i = 0; OidIsValid(mapped_tables[i]); i++) |
| RelationMapRemoveMapping(mapped_tables[i]); |
| |
| /* |
| * At this point, everything is kosher except that, if we did toast swap |
| * by links, the toast table's name corresponds to the transient table. |
| * The name is irrelevant to the backend because it's referenced by OID, |
| * but users looking at the catalogs could be confused. Rename it to |
| * prevent this problem. |
| * |
| * Note no lock required on the relation, because we already hold an |
| * exclusive lock on it. |
| */ |
| if (!swap_toast_by_content) |
| { |
| Relation newrel; |
| |
| newrel = table_open(OIDOldHeap, NoLock); |
| if (OidIsValid(newrel->rd_rel->reltoastrelid)) |
| { |
| Oid toastidx; |
| char NewToastName[NAMEDATALEN]; |
| |
| /* Get the associated valid index to be renamed */ |
| toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid, |
| NoLock); |
| |
| /* rename the toast table ... */ |
| snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u", |
| OIDOldHeap); |
| RenameRelationInternal(newrel->rd_rel->reltoastrelid, |
| NewToastName, true, false); |
| |
| /* ... and its valid index too. */ |
| snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index", |
| OIDOldHeap); |
| |
| RenameRelationInternal(toastidx, |
| NewToastName, true, true); |
| |
| /* |
| * Reset the relrewrite for the toast. The command-counter |
| * increment is required here as we are about to update |
| * the tuple that is updated as part of RenameRelationInternal. |
| */ |
| CommandCounterIncrement(); |
| ResetRelRewrite(newrel->rd_rel->reltoastrelid); |
| } |
| relation_close(newrel, NoLock); |
| } |
| |
| /* if it's not a catalog table, clear any missing attribute settings */ |
| if (!is_system_catalog) |
| { |
| Relation newrel; |
| |
| newrel = table_open(OIDOldHeap, NoLock); |
| RelationClearMissing(newrel); |
| relation_close(newrel, NoLock); |
| } |
| } |
| |
| |
| /* |
| * Get a list of tables that the current user owns and |
| * have indisclustered set. Return the list in a List * of RelToCluster |
| * (stored in the specified memory context), each one giving the tableOid |
| * and the indexOid on which the table is already clustered. |
| */ |
| static List * |
| get_tables_to_cluster(MemoryContext cluster_context) |
| { |
| Relation indRelation; |
| TableScanDesc scan; |
| ScanKeyData entry; |
| HeapTuple indexTuple; |
| Form_pg_index index; |
| MemoryContext old_context; |
| RelToCluster *rvtc; |
| List *rvs = NIL; |
| |
| /* |
| * Get all indexes that have indisclustered set and are owned by |
| * appropriate user. |
| */ |
| indRelation = table_open(IndexRelationId, AccessShareLock); |
| ScanKeyInit(&entry, |
| Anum_pg_index_indisclustered, |
| BTEqualStrategyNumber, F_BOOLEQ, |
| BoolGetDatum(true)); |
| scan = table_beginscan_catalog(indRelation, 1, &entry); |
| while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) |
| { |
| index = (Form_pg_index) GETSTRUCT(indexTuple); |
| |
| if (!pg_class_ownercheck(index->indrelid, GetUserId())) |
| continue; |
| |
| /* |
| * We have to build the list in a different memory context so it will |
| * survive the cross-transaction processing |
| */ |
| old_context = MemoryContextSwitchTo(cluster_context); |
| |
| rvtc = (RelToCluster *) palloc(sizeof(RelToCluster)); |
| rvtc->tableOid = index->indrelid; |
| rvtc->indexOid = index->indexrelid; |
| rvs = lappend(rvs, rvtc); |
| |
| MemoryContextSwitchTo(old_context); |
| } |
| table_endscan(scan); |
| |
| relation_close(indRelation, AccessShareLock); |
| |
| return rvs; |
| } |