| /*------------------------------------------------------------------------- |
| * |
| * appendonlywriter.c |
| * routines for selecting AO segment for inserts. |
| * |
| * |
| * Note: This is also used by AOCS tables. |
| * |
| * Portions Copyright (c) 2008, Greenplum Inc |
| * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates. |
| * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/access/appendonly/appendonlywriter.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "access/appendonly_compaction.h" |
| #include "access/appendonlytid.h" /* AOTupleId_MaxRowNum */ |
| #include "access/appendonlywriter.h" |
| #include "access/aocssegfiles.h" /* AOCS */ |
| #include "access/heapam.h" /* heap_open */ |
| #include "access/transam.h" |
| #include "access/xact.h" |
| #include "catalog/indexing.h" |
| #include "catalog/pg_am.h" |
| #include "catalog/pg_appendonly.h" |
| #include "catalog/pg_authid.h" |
| #include "cdb/cdbvars.h" |
| #include "libpq-fe.h" |
| #include "miscadmin.h" |
| #include "nodes/pathnodes.h" |
| #include "storage/bufmgr.h" |
| #include "storage/lmgr.h" |
| #include "utils/faultinjector.h" |
| #include "utils/fmgroids.h" |
| #include "utils/guc.h" |
| #include "utils/snapmgr.h" |
| #include "utils/syscache.h" |
| |
| #define SEGFILE_CAPACITY_THRESHOLD 0.9 |
| |
| /* |
| * local functions |
| */ |
| static int choose_segno_internal(Relation rel, List *avoid_segnos, choose_segno_mode mode); |
| static int num_non_existing_segfiles(Relation rel, bool *existing_segnos, List *avoid_segnos); |
| static int choose_new_segfile(Relation rel, bool *used, List *avoid_segnos); |
| static void get_aoseg_fields(Relation rel, Relation pg_aoseg_rel, HeapTuple tuple, |
| int32 *segno, int64 *tupcount, int16 *state, int16 *formatversion); |
| |
| /* |
| * segfileMaxRowThreshold |
| * |
| * Returns the row count threshold - when a segfile more than this number of |
| * rows we don't allow inserting more data into it anymore. |
| */ |
| static int64 |
| segfileMaxRowThreshold(void) |
| { |
| int64 maxallowed = (int64) AOTupleId_MaxRowNum; |
| |
| if (maxallowed < 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), |
| errmsg("int64 out of range"))); |
| |
| return (SEGFILE_CAPACITY_THRESHOLD * maxallowed); |
| } |
| |
| typedef struct |
| { |
| int32 segno; |
| ItemPointerData ctid; |
| float8 tupcount; |
| } candidate_segment; |
| |
| /* |
| * Compare candidate segments on tuple count. |
| */ |
| static int |
| compare_candidates(const void *a, const void *b) |
| { |
| candidate_segment *ca = (candidate_segment *) a; |
| candidate_segment *cb = (candidate_segment *) b; |
| |
| if (ca->tupcount < cb->tupcount) |
| return -1; |
| else if (ca->tupcount > cb->tupcount) |
| return 1; |
| else |
| { |
| /* On tie, prefer lower-numbered segment */ |
| if (ca->segno < cb->segno) |
| return -1; |
| else |
| { |
| Assert(ca->segno > cb->segno); |
| return 1; |
| } |
| } |
| } |
| |
| /* |
| * Lock an existing segfile for writing. |
| * |
| * The caller should ensure that the segfile is available for writing, |
| * otherwise this will error out. Typical usage is to pass segno=0 on |
| * a newly-created relation, e.g. in CREATE TABLE AS. |
| * |
| * The locking and logic for whether a segfile can be used is mostly |
| * the same as in choose_segfile_internal(), but we already know which |
| * segfile we want. |
| */ |
| void |
| LockSegnoForWrite(Relation rel, int segno) |
| { |
| Relation pg_aoseg_rel; |
| TupleDesc pg_aoseg_dsc; |
| SysScanDesc aoscan; |
| HeapTuple tuple; |
| Snapshot snapshot; |
| Snapshot appendOnlyMetaDataSnapshot; |
| Oid segrelid; |
| bool found = false; |
| |
| if (Debug_appendonly_print_segfile_choice) |
| ereport(LOG, |
| (errmsg("LockSegNoForWrite: Locking segno %d for append-only relation \"%s\"", |
| segno, RelationGetRelationName(rel)))); |
| |
| /* |
| * The algorithm below for choosing a target segment is not concurrent-safe. |
| * Grab a lock to serialize. |
| */ |
| LockDatabaseObject(rel->rd_locator.dbOid, (Oid)rel->rd_locator.relNumber, 0, ExclusiveLock); |
| |
| appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); |
| GetAppendOnlyEntryAuxOids(rel, |
| &segrelid, NULL, NULL, NULL, NULL); |
| /* |
| * Now pick a segment that is not in use, and is not over the allowed |
| * size threshold (90% full). |
| */ |
| pg_aoseg_rel = table_open(segrelid, AccessShareLock); |
| pg_aoseg_dsc = RelationGetDescr(pg_aoseg_rel); |
| |
| /* |
| * Obtain the snapshot that is taken at the beginning of the transaction. |
| * If a tuple is visible to this snapshot, and it hasn't been updated since |
| * (that's checked implicitly by heap_lock_tuple()), it's visible to any |
| * snapshot in this backend, and can be used as insertion target. We can't |
| * simply call GetTransactionSnapshot() here because it will create a new |
| * distributed snapshot for non-serializable transaction isolation level, |
| * and it may be too late. |
| */ |
| snapshot = GetOldestSnapshot(); |
| if (snapshot == NULL) |
| snapshot = GetTransactionSnapshot(); |
| |
| if (Debug_appendonly_print_segfile_choice) |
| { |
| elog(LOG, "usedByConcurrentTransaction: TransactionXmin = %u, xmin = %u, xmax = %u, myxid = %u", |
| TransactionXmin, snapshot->xmin, snapshot->xmax, GetCurrentTransactionIdIfAny()); |
| LogDistributedSnapshotInfo(snapshot, "Used snapshot: "); |
| } |
| |
| aoscan = systable_beginscan(pg_aoseg_rel, InvalidOid, false, snapshot, 0, NULL); |
| while ((tuple = systable_getnext(aoscan)) != NULL) |
| { |
| int32 this_segno; |
| int64 tupcount; |
| int16 state; |
| int16 formatversion; |
| |
| get_aoseg_fields(rel, pg_aoseg_rel, tuple, |
| &this_segno, &tupcount, &state, &formatversion); |
| |
| if (segno != this_segno) |
| continue; |
| |
| if (state != AOSEG_STATE_DEFAULT) |
| elog(ERROR, "segfile %d is in unexpected state %d", segno, state); |
| |
| /* If the ao segment is full, can't use it */ |
| if (tupcount > segfileMaxRowThreshold()) |
| elog(ERROR, "segfile %d is full", segno); |
| |
| /* Skip using the ao segment if not latest version (except as a compaction target) */ |
| if (formatversion != AOSegfileFormatVersion_GetLatest()) |
| elog(ERROR, "segfile %d is not of the latest version", segno); |
| |
| found = true; |
| break; |
| } |
| |
| if (!found) |
| { |
| /* create it! */ |
| if (RelationIsAoRows(rel)) |
| InsertInitialSegnoEntry(rel, segno); |
| else |
| InsertInitialAOCSFileSegInfo(rel, segno, |
| RelationGetNumberOfAttributes(rel), segrelid); |
| |
| /* the tuple was locked by InsertInitial already */ |
| } |
| /* |
| * If we have already used this segment in this transaction, no need |
| * to look further. We can continue to use it. We should already hold |
| * a tuple lock on the pg_aoseg row, too. |
| */ |
| else if (HeapTupleHeaderGetXmin(tuple->t_data) == GetCurrentTransactionId()) |
| { |
| } |
| else |
| { |
| /* this segno is available and not full. Try to lock it. */ |
| HeapTupleData locktup; |
| Buffer buf = InvalidBuffer; |
| TM_FailureData hufd; |
| TM_Result result; |
| |
| locktup.t_self = tuple->t_self; |
| result = heap_lock_tuple(pg_aoseg_rel, &locktup, |
| GetCurrentCommandId(true), |
| LockTupleExclusive, |
| LockWaitSkip, |
| false, /* follow_updates */ |
| &buf, |
| &hufd); |
| if (BufferIsValid(buf)) |
| ReleaseBuffer(buf); |
| if (result != TM_Ok) |
| elog(ERROR, "could not lock segfile %d", segno); |
| } |
| |
| /* OK, we have the aoseg tuple locked for us. */ |
| systable_endscan(aoscan); |
| |
| UnlockDatabaseObject(rel->rd_locator.dbOid, (Oid)rel->rd_locator.relNumber, 0, ExclusiveLock); |
| |
| heap_close(pg_aoseg_rel, AccessShareLock); |
| |
| UnregisterSnapshot(appendOnlyMetaDataSnapshot); |
| |
| /* success! */ |
| } |
| |
| int |
| ChooseSegnoForWrite(Relation rel) |
| { |
| int chosen_segno; |
| |
| if (Debug_appendonly_print_segfile_choice) |
| ereport(LOG, |
| (errmsg("ChooseSegnoForWrite: Choosing a segfile for relation \"%s\"", |
| RelationGetRelationName(rel)))); |
| |
| chosen_segno = choose_segno_internal(rel, NIL, CHOOSE_MODE_WRITE); |
| |
| if (chosen_segno == -1) |
| ereport(ERROR, |
| (errcode(ERRCODE_INSUFFICIENT_RESOURCES), |
| (errmsg("could not find segment file to use for inserting into relation \"%s\"", |
| RelationGetRelationName(rel))))); |
| return chosen_segno; |
| } |
| |
| int |
| ChooseSegnoForWriteMultiFile(Relation rel, List *avoid_segnos) |
| { |
| int chosen_segno; |
| |
| if (Debug_appendonly_print_segfile_choice) |
| ereport(LOG, |
| (errmsg("ChooseSegnoForWrite: Choosing a segfile for relation \"%s\"", |
| RelationGetRelationName(rel)))); |
| |
| chosen_segno = choose_segno_internal(rel, avoid_segnos, CHOOSE_MODE_WRITE); |
| |
| if (chosen_segno == -1) |
| ereport(ERROR, |
| (errcode(ERRCODE_INSUFFICIENT_RESOURCES), |
| (errmsg("could not find segment file to use for inserting into relation \"%s\"", |
| RelationGetRelationName(rel))))); |
| return chosen_segno; |
| } |
| |
| /* |
| * Select a segfile to write surviving tuples to, when doing VACUUM compaction. |
| */ |
| int |
| ChooseSegnoForCompactionWrite(Relation rel, List *avoid_segnos) |
| { |
| if (Debug_appendonly_print_segfile_choice) |
| ereport(LOG, |
| (errmsg("ChooseSegnoForCompactionWrite: Choosing a segfile for relation \"%s\"", |
| RelationGetRelationName(rel)))); |
| |
| return choose_segno_internal(rel, avoid_segnos, CHOOSE_MODE_COMPACTION_WRITE); |
| } |
| |
| /* |
| * Select a segfile to compact, during VACUUM. |
| */ |
| int |
| ChooseSegnoForCompaction(Relation rel, List *avoid_segnos) |
| { |
| if (Debug_appendonly_print_segfile_choice) |
| ereport(LOG, |
| (errmsg("ChooseSegnoForCompaction: Choosing a segfile to compact in relation \"%s\"", |
| RelationGetRelationName(rel)))); |
| |
| return choose_segno_internal(rel, avoid_segnos, CHOOSE_MODE_COMPACTION_TARGET); |
| } |
| |
| /* |
| * Reserved segno is special: it is inserted as a regular tuple (not frozen) |
| * in gp_fastsequence to leverage MVCC for cleanup in case of abort. Reserved |
| * segno should be chosen for insert when the insert command is part of the |
| * same transaction that created the table. See |
| * InsertInitialFastSequenceEntries for more details. |
| */ |
| bool |
| ShouldUseReservedSegno(Relation rel, choose_segno_mode mode) |
| { |
| HeapTuple tuple; |
| TransactionId xmin; |
| |
| /* |
| * Reserved segno can only be chosen for non-vacuum cases because vacuum |
| * cannot be executed from inside a transaction. |
| */ |
| if (mode != CHOOSE_MODE_WRITE) |
| return false; |
| |
| tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(RelationGetRelid(rel))); |
| if (!HeapTupleIsValid(tuple)) |
| elog(ERROR, "unable to find relation entry in pg_class for %s", |
| RelationGetRelationName(rel)); |
| |
| xmin = HeapTupleHeaderGetXmin(tuple->t_data); |
| ReleaseSysCache(tuple); |
| |
| return TransactionIdIsCurrentTransactionId(xmin); |
| } |
| |
| |
| /* |
| * Decide which segment number should be used to write into during the COPY, |
| * INSERT, or VACUUM operation we're executing. This contains the common |
| * logic for all three ChooseSegno* variants. |
| * |
| * The rules for which segfiles can be selected and which ones are preferred |
| * depend on the mode: |
| * |
| * - In WRITE mode, pick any existing segment, preferring tuples with lower |
| * tupcount. If they're all in use, create a new one. |
| * |
| * - In COMPACTION_WRITE mode, prefer existing segments with tupcount=0. If |
| * none are available, create a new segfile. If a new segfile cannot be |
| * created either, then reuse an existing segfile with non-zero tupcount. |
| * |
| * - In COMPACTION_TARGET mode, only existings segments with non-zero tupcount |
| * are chosen. |
| * |
| * If 'avoid_segnos' is non-empty, we will not choose any of those segments as |
| * the target. |
| * |
| * The return value is a segment file number to use for inserting by each |
| * segdb into its local AO table. It can be -1 no suitable existing segfile |
| * was found and a new one could not be created either. The returned segfile |
| * is locked for this transaction. |
| */ |
| static int |
| choose_segno_internal(Relation rel, List *avoid_segnos, choose_segno_mode mode) |
| { |
| Relation pg_aoseg_rel; |
| TupleDesc pg_aoseg_dsc; |
| int i; |
| int32 chosen_segno = -1; |
| candidate_segment candidates[MAX_AOREL_CONCURRENCY]; |
| bool existing_segnos[MAX_AOREL_CONCURRENCY]; /* already have aoseg row */ |
| int ncandidates = 0; |
| int nemptysegs = 0; |
| SysScanDesc aoscan; |
| HeapTuple tuple; |
| Snapshot snapshot; |
| Oid segrelid; |
| bool tried_creating_new_segfile = false; |
| |
| memset(existing_segnos, 0, sizeof(existing_segnos)); |
| |
| if (ShouldUseReservedSegno(rel, mode)) |
| { |
| Assert(avoid_segnos == NIL); |
| if (Debug_appendonly_print_segfile_choice) |
| elog(LOG, "choose_segno_internal: chose RESERVED_SEGNO for write"); |
| |
| LockSegnoForWrite(rel, RESERVED_SEGNO); |
| return RESERVED_SEGNO; |
| } |
| |
| /* |
| * The algorithm below for choosing a target segment is not concurrent-safe. |
| * Grab a lock to serialize. |
| */ |
| LockDatabaseObject(rel->rd_locator.dbOid, (Oid)rel->rd_locator.relNumber, 0, ExclusiveLock); |
| |
| /* |
| * Obtain the snapshot that is taken at the beginning of the transaction. |
| * If a tuple is visible to this snapshot, and it hasn't been updated since |
| * (that's checked implicitly by heap_lock_tuple()), it's visible to any |
| * snapshot in this backend, and can be used as insertion target. We can't |
| * simply call GetTransactionSnapshot() here because it will create a new |
| * distributed snapshot for non-serializable transaction isolation level, |
| * and it may be too late. |
| */ |
| snapshot = GetOldestSnapshot(); |
| if (snapshot == NULL) |
| snapshot = GetTransactionSnapshot(); |
| |
| if (Debug_appendonly_print_segfile_choice) |
| { |
| elog(LOG, "choose_segno_internal: TransactionXmin = %u, xmin = %u, xmax = %u, myxid = %u", |
| TransactionXmin, snapshot->xmin, snapshot->xmax, GetCurrentTransactionIdIfAny()); |
| LogDistributedSnapshotInfo(snapshot, "Used snapshot: "); |
| } |
| |
| GetAppendOnlyEntryAuxOids(rel, |
| &segrelid, NULL, NULL, NULL, NULL); |
| |
| /* |
| * Now pick a segment that is not in use, and is not over the allowed |
| * size threshold (90% full). |
| */ |
| pg_aoseg_rel = heap_open(segrelid, AccessShareLock); |
| pg_aoseg_dsc = RelationGetDescr(pg_aoseg_rel); |
| |
| /* |
| * Scan through all the pg_aoseg (or pg_aocs) entries, and make note of |
| * all "candidates". |
| */ |
| aoscan = systable_beginscan(pg_aoseg_rel, InvalidOid, false, snapshot, 0, NULL); |
| while ((tuple = systable_getnext(aoscan)) != NULL) |
| { |
| int32 segno; |
| int64 tupcount; |
| int16 state; |
| int16 formatversion; |
| |
| get_aoseg_fields(rel, pg_aoseg_rel, tuple, &segno, |
| &tupcount, &state, &formatversion); |
| |
| existing_segnos[segno] = true; |
| |
| /* never write to AWAITING_DROP segments */ |
| if (state != AOSEG_STATE_DEFAULT) |
| continue; |
| |
| /* skip over segfiles that the caller asked to avoid */ |
| if (list_member_int(avoid_segnos, segno)) |
| continue; |
| |
| if (mode != CHOOSE_MODE_COMPACTION_TARGET) |
| { |
| /* If the ao segment is full, skip it */ |
| if (tupcount > segfileMaxRowThreshold()) |
| continue; |
| |
| /* Skip using the ao segment if not latest version (except as a compaction target) */ |
| if (formatversion != AOSegfileFormatVersion_GetLatest()) |
| continue; |
| |
| /* |
| * Historically, segment 0 was only used in utility mode. |
| * Nowadays, segment 0 is also used for CTAS and alter table |
| * rewrite commands. |
| */ |
| if (Gp_role != GP_ROLE_UTILITY && segno == RESERVED_SEGNO) |
| continue; |
| |
| /* |
| * If we have already used this segment in this transaction, no need |
| * to look further. We can continue to use it. We should already hold |
| * a tuple lock on the pg_aoseg row, too. |
| */ |
| if (HeapTupleHeaderGetXmin(tuple->t_data) == GetCurrentTransactionId()) |
| { |
| chosen_segno = segno; |
| |
| if (Debug_appendonly_print_segfile_choice) |
| elog(LOG, "choose_segno_interna: chose segfile %d because it was updated earlier in the transaction already", |
| chosen_segno); |
| break; |
| } |
| } |
| else if (tupcount == 0) |
| { |
| /* If the ao segment is empty, do not choose it for compaction */ |
| nemptysegs++; |
| continue; |
| } |
| |
| candidates[ncandidates].segno = segno; |
| candidates[ncandidates].ctid = tuple->t_self; |
| candidates[ncandidates].tupcount = tupcount; |
| ncandidates++; |
| } |
| systable_endscan(aoscan); |
| |
| /* |
| * Try to find a segment we can use among the candidates, and lock it. |
| */ |
| if (chosen_segno == -1) |
| { |
| |
| /* |
| * If we are choosing the next segfile to compact, check to see if we |
| * still have enough segfiles that can be inserted into. |
| * |
| * The grand total of non-existing segfiles, empty segfiles and segfiles |
| * that are worthy of compaction (ncandidates) represent the total |
| * number of available segfiles that can serve inserts. This total must |
| * at least be gp_appendonly_compaction_segfile_limit. |
| * |
| * Otherwise, we might put too many segments into AOSEG_STATE_AWAITING_DROP. |
| * Segfiles also remain in that state if VACUUM runs while there is an |
| * older snapshot in the system. |
| */ |
| if (mode == CHOOSE_MODE_COMPACTION_TARGET) |
| { |
| int non_existing_segfile_count; |
| |
| non_existing_segfile_count = |
| num_non_existing_segfiles(rel, existing_segnos, avoid_segnos); |
| if (non_existing_segfile_count + ncandidates + nemptysegs < gp_appendonly_compaction_segfile_limit) |
| { |
| ereportif(Debug_appendonly_print_segfile_choice, LOG, |
| (errmsg("number of available segfiles for inserts is below gp_appendonly_compaction_segfile_limit"), |
| errdetail("compaction candidate count = %d, non-existing segfile count = %d, empty segfile count = %d, limit = %d", |
| ncandidates, non_existing_segfile_count, nemptysegs, gp_appendonly_compaction_segfile_limit))); |
| goto cleanup; |
| } |
| } |
| |
| /* |
| * Sort the candidates by tuple count, to prefer segment with fewest existing |
| * tuples. (In particular, in COMPACTION_WRITE mode, this puts all empty |
| * segfiles to the front). |
| */ |
| qsort((void *) candidates, ncandidates, sizeof(candidate_segment), |
| compare_candidates); |
| |
| for (i = 0; i < ncandidates; i++) |
| { |
| HeapTupleData locktup; |
| Buffer buf = InvalidBuffer; |
| TM_FailureData hufd; |
| TM_Result result; |
| |
| /* |
| * When performing VACUUM compaction, we prefer to create a new segment |
| * over reusing a non-empty segfile, as the target to write the surviving |
| * tuples to. Because if we insert to a non-empty segfile, we won't be |
| * able to compact it later in the VACUUM cycle. (Or if we do, we'll scan |
| * through all the tuples we moved onto it earlier.) So before we proceed |
| * to try locking any non-empty segments, try to create a new one. |
| */ |
| if (mode == CHOOSE_MODE_COMPACTION_WRITE && |
| !tried_creating_new_segfile && |
| candidates[i].tupcount > 0) |
| { |
| chosen_segno = choose_new_segfile(rel, existing_segnos, avoid_segnos); |
| tried_creating_new_segfile = true; |
| if (chosen_segno != -1) |
| break; |
| } |
| |
| locktup.t_self = candidates[i].ctid; |
| result = heap_lock_tuple(pg_aoseg_rel, &locktup, |
| GetCurrentCommandId(true), |
| LockTupleExclusive, |
| LockWaitSkip, |
| false, /* follow_updates */ |
| &buf, |
| &hufd); |
| if (BufferIsValid(buf)) |
| ReleaseBuffer(buf); |
| if (result == TM_Ok) |
| { |
| chosen_segno = candidates[i].segno; |
| if (Debug_appendonly_print_segfile_choice) |
| elog(LOG, "choose_segno_internal: locked existing segfile %d", chosen_segno); |
| break; |
| } |
| else |
| { |
| if (Debug_appendonly_print_segfile_choice) |
| elog(LOG, "choose_segno_internal: skipped segfile %d because could not be locked", |
| candidates[i].segno); |
| } |
| } |
| } |
| |
| /* |
| * If no existing segment could be used, create a new one. |
| */ |
| if (chosen_segno == -1 && |
| mode != CHOOSE_MODE_COMPACTION_TARGET && |
| !tried_creating_new_segfile) |
| { |
| chosen_segno = choose_new_segfile(rel, existing_segnos, avoid_segnos); |
| } |
| |
| cleanup: |
| UnlockDatabaseObject(rel->rd_locator.dbOid, (Oid)rel->rd_locator.relNumber, 0, ExclusiveLock); |
| |
| if (Debug_appendonly_print_segfile_choice && chosen_segno != -1) |
| ereport(LOG, |
| (errmsg("Segno chosen for append-only relation \"%s\" is %d", |
| RelationGetRelationName(rel), chosen_segno))); |
| |
| heap_close(pg_aoseg_rel, AccessShareLock); |
| |
| return chosen_segno; |
| } |
| |
| /* |
| * Discounting the 'existing_segnos' and 'avoid_segnos', count the number of |
| * segnos for this append-optimized relation. These segnos won't have an |
| * aoseg/aocsseg row. |
| */ |
| static int |
| num_non_existing_segfiles(Relation rel, bool *existing_segnos, List *avoid_segnos) |
| { |
| int non_existing = 0; |
| |
| Assert(RelationStorageIsAO(rel)); |
| |
| for (int segno = 0; segno < MAX_AOREL_CONCURRENCY; segno++) |
| { |
| /* Only choose seg 0 in utility mode. See above. */ |
| if (Gp_role != GP_ROLE_UTILITY && segno == 0) |
| continue; |
| |
| if (!existing_segnos[segno] && !list_member_int(avoid_segnos, segno)) |
| non_existing++; |
| } |
| |
| return non_existing; |
| } |
| |
| static int |
| choose_new_segfile(Relation rel, bool *used, List *avoid_segnos) |
| { |
| int chosen_segno = -1; |
| |
| Assert(RelationStorageIsAO(rel)); |
| |
| /* No segment found. Try to create a new one. */ |
| for (int segno = 0; segno < MAX_AOREL_CONCURRENCY; segno++) |
| { |
| /* Only choose seg 0 in utility mode. See above. */ |
| if (Gp_role != GP_ROLE_UTILITY && segno == 0) |
| continue; |
| |
| if (!used[segno] && !list_member_int(avoid_segnos, segno)) |
| { |
| chosen_segno = segno; |
| break; |
| } |
| } |
| |
| /* If can't create a new one because MAX_AOREL_CONCURRENCY was reached */ |
| if (chosen_segno != -1) |
| { |
| if (Debug_appendonly_print_segfile_choice) |
| elog(LOG, "choose_new_segfile: creating new segfile %d", |
| chosen_segno); |
| |
| if (RelationIsAoRows(rel)) |
| InsertInitialSegnoEntry(rel, chosen_segno); |
| else |
| { |
| Oid segrelid; |
| Snapshot appendOnlyMetaDataSnapshot; |
| |
| appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); |
| GetAppendOnlyEntryAuxOids(rel, |
| &segrelid, NULL, NULL, NULL, NULL); |
| UnregisterSnapshot(appendOnlyMetaDataSnapshot); |
| |
| InsertInitialAOCSFileSegInfo(rel, chosen_segno, |
| RelationGetNumberOfAttributes(rel), segrelid); |
| } |
| } |
| else |
| { |
| if (Debug_appendonly_print_segfile_choice) |
| elog(LOG, "choose_new_segfile: could not create segfile, all segfiles are in use"); |
| } |
| |
| return chosen_segno; |
| } |
| |
| /* |
| * Helper function to extract 'segno', 'tupcount', 'state', and 'formatversion' |
| * from a pg_aoseg or pg_aocs tuple. |
| */ |
| static void |
| get_aoseg_fields(Relation rel, Relation pg_aoseg_rel, HeapTuple tuple, |
| int32 *segno, int64 *tupcount, int16 *state, int16 *formatversion) |
| { |
| TupleDesc pg_aoseg_dsc = RelationGetDescr(pg_aoseg_rel); |
| bool isNull; |
| |
| Assert(RelationStorageIsAO(rel)); |
| |
| if (RelationIsAoRows(rel)) |
| { |
| *segno = DatumGetInt32(fastgetattr(tuple, |
| Anum_pg_aoseg_segno, |
| pg_aoseg_dsc, &isNull)); |
| Assert(!isNull); |
| |
| *tupcount = DatumGetInt64(fastgetattr(tuple, |
| Anum_pg_aoseg_tupcount, |
| pg_aoseg_dsc, &isNull)); |
| Assert(!isNull); |
| |
| *state = DatumGetInt16(fastgetattr(tuple, |
| Anum_pg_aoseg_state, |
| pg_aoseg_dsc, &isNull)); |
| Assert(!isNull); |
| |
| *formatversion = DatumGetInt16(fastgetattr(tuple, |
| Anum_pg_aoseg_formatversion, |
| pg_aoseg_dsc, &isNull)); |
| Assert(!isNull); |
| } |
| else |
| { |
| *segno = DatumGetInt32(fastgetattr(tuple, |
| Anum_pg_aocs_segno, |
| pg_aoseg_dsc, &isNull)); |
| Assert(!isNull); |
| *tupcount = DatumGetInt64(fastgetattr(tuple, |
| Anum_pg_aocs_tupcount, |
| pg_aoseg_dsc, &isNull)); |
| Assert(!isNull); |
| |
| *state = DatumGetInt16(fastgetattr(tuple, |
| Anum_pg_aocs_state, |
| pg_aoseg_dsc, &isNull)); |
| Assert(!isNull); |
| |
| *formatversion = DatumGetInt16(fastgetattr(tuple, |
| Anum_pg_aocs_formatversion, |
| pg_aoseg_dsc, &isNull)); |
| Assert(!isNull); |
| } |
| } |
| |
| /* |
| * AORelIncrementModCount |
| * |
| * Update the modcount of an aoseg table. The modcount is used by incremental backup |
| * to detect changed relations. |
| */ |
| void |
| AORelIncrementModCount(Relation parentrel) |
| { |
| int segno; |
| |
| Assert(RelationStorageIsAO(parentrel)); |
| |
| if (Debug_appendonly_print_segfile_choice) |
| ereport(LOG, |
| (errmsg("AORelIncrementModCount: Incrementing modcount of aoseg entry for append-only relation %d", |
| RelationGetRelid(parentrel)))); |
| |
| /* |
| * It doesn't matter which segment we use, as long as the segment can be used by us |
| * (same rules as for inserting). |
| */ |
| segno = ChooseSegnoForWrite(parentrel); |
| |
| if (RelationIsAoRows(parentrel)) |
| { |
| /* |
| * Update the master AO segment info table with correct tuple count total |
| */ |
| IncrementFileSegInfoModCount(parentrel, segno); |
| } |
| else |
| { |
| /* AO column store */ |
| AOCSFileSegInfoAddCount(parentrel, segno, 0, 0, 1); |
| } |
| } |