| /*------------------------------------------------------------------------- |
| * |
| * inval.c |
| * POSTGRES cache invalidation dispatcher code. |
| * |
| * This is subtle stuff, so pay attention: |
| * |
| * When a tuple is updated or deleted, our standard time qualification rules |
| * consider that it is *still valid* so long as we are in the same command, |
| * ie, until the next CommandCounterIncrement() or transaction commit. |
| * (See utils/time/tqual.c, and note that system catalogs are generally |
| * scanned under SnapshotNow rules by the system, or plain user snapshots |
| * for user queries.) At the command boundary, the old tuple stops |
| * being valid and the new version, if any, becomes valid. Therefore, |
| * we cannot simply flush a tuple from the system caches during heap_update() |
| * or heap_delete(). The tuple is still good at that point; what's more, |
| * even if we did flush it, it might be reloaded into the caches by a later |
| * request in the same command. So the correct behavior is to keep a list |
| * of outdated (updated/deleted) tuples and then do the required cache |
| * flushes at the next command boundary. We must also keep track of |
| * inserted tuples so that we can flush "negative" cache entries that match |
| * the new tuples; again, that mustn't happen until end of command. |
| * |
| * Once we have finished the command, we still need to remember inserted |
| * tuples (including new versions of updated tuples), so that we can flush |
| * them from the caches if we abort the transaction. Similarly, we'd better |
| * be able to flush "negative" cache entries that may have been loaded in |
| * place of deleted tuples, so we still need the deleted ones too. |
| * |
| * If we successfully complete the transaction, we have to broadcast all |
| * these invalidation events to other backends (via the SI message queue) |
| * so that they can flush obsolete entries from their caches. Note we have |
| * to record the transaction commit before sending SI messages, otherwise |
| * the other backends won't see our updated tuples as good. |
| * |
| * When a subtransaction aborts, we can process and discard any events |
| * it has queued. When a subtransaction commits, we just add its events |
| * to the pending lists of the parent transaction. |
| * |
| * In short, we need to remember until xact end every insert or delete |
| * of a tuple that might be in the system caches. Updates are treated as |
| * two events, delete + insert, for simplicity. (There are cases where |
| * it'd be possible to record just one event, but we don't currently try.) |
| * |
| * We do not need to register EVERY tuple operation in this way, just those |
| * on tuples in relations that have associated catcaches. We do, however, |
| * have to register every operation on every tuple that *could* be in a |
| * catcache, whether or not it currently is in our cache. Also, if the |
| * tuple is in a relation that has multiple catcaches, we need to register |
| * an invalidation message for each such catcache. catcache.c's |
| * PrepareToInvalidateCacheTuple() routine provides the knowledge of which |
| * catcaches may need invalidation for a given tuple. |
| * |
| * Also, whenever we see an operation on a pg_class or pg_attribute tuple, |
| * we register a relcache flush operation for the relation described by that |
| * tuple. pg_class updates trigger an smgr flush operation as well. |
| * |
| * We keep the relcache and smgr flush requests in lists separate from the |
| * catcache tuple flush requests. This allows us to issue all the pending |
| * catcache flushes before we issue relcache flushes, which saves us from |
| * loading a catcache tuple during relcache load only to flush it again |
| * right away. Also, we avoid queuing multiple relcache flush requests for |
| * the same relation, since a relcache flush is relatively expensive to do. |
| * (XXX is it worth testing likewise for duplicate catcache flush entries? |
| * Probably not.) |
| * |
| * If a relcache flush is issued for a system relation that we preload |
| * from the relcache init file, we must also delete the init file so that |
| * it will be rebuilt during the next backend restart. The actual work of |
| * manipulating the init file is in relcache.c, but we keep track of the |
| * need for it here. |
| * |
| * The request lists proper are kept in CurTransactionContext of their |
| * creating (sub)transaction, since they can be forgotten on abort of that |
| * transaction but must be kept till top-level commit otherwise. For |
| * simplicity we keep the controlling list-of-lists in TopTransactionContext. |
| * |
| * |
| * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * IDENTIFICATION |
| * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.78 2006/10/04 00:30:00 momjian Exp $ |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include "access/twophase_rmgr.h" |
| #include "access/xact.h" |
| #include "catalog/catalog.h" |
| #include "catalog/gp_policy.h" |
| #include "miscadmin.h" |
| #include "storage/sinval.h" |
| #include "storage/smgr.h" |
| #include "utils/inval.h" |
| #include "utils/memutils.h" |
| #include "utils/relcache.h" |
| #include "utils/simex.h" |
| #include "utils/syscache.h" |
| #include "commands/dbcommands.h" |
| #include "access/heapam.h" |
| #include "utils/guc.h" |
| |
| |
| |
| /* |
| * To minimize palloc traffic, we keep pending requests in successively- |
| * larger chunks (a slightly more sophisticated version of an expansible |
| * array). All request types can be stored as SharedInvalidationMessage |
| * records. The ordering of requests within a list is important for some |
| * components (e.g. Metadata Versioning), so we make sure to maintain it. |
| */ |
| typedef struct InvalidationChunk |
| { |
| struct InvalidationChunk *next; /* list link */ |
| int nitems; /* # items currently stored in chunk */ |
| int maxitems; /* size of allocated array in this chunk */ |
| SharedInvalidationMessage msgs[1]; /* VARIABLE LENGTH ARRAY */ |
| } InvalidationChunk; /* VARIABLE LENGTH STRUCTURE */ |
| |
| typedef struct InvalidationListHeader |
| { |
| InvalidationChunk *cclist; /* list of chunks holding catcache msgs */ |
| InvalidationChunk *rclist; /* list of chunks holding relcache/smgr msgs */ |
| InvalidationChunk *velist; /* list of chunks holding versioning event msgs */ |
| } InvalidationListHeader; |
| |
| /*---------------- |
| * Invalidation info is divided into two lists: |
| * 1) events so far in current command, not yet reflected to caches. |
| * 2) events in previous commands of current transaction; these have |
| * been reflected to local caches, and must be either broadcast to |
| * other backends or rolled back from local cache when we commit |
| * or abort the transaction. |
| * Actually, we need two such lists for each level of nested transaction, |
| * so that we can discard events from an aborted subtransaction. When |
| * a subtransaction commits, we append its lists to the parent's lists. |
| * |
| * The relcache-file-invalidated flag can just be a simple boolean, |
| * since we only act on it at transaction commit; we don't care which |
| * command of the transaction set it. |
| *---------------- |
| */ |
| |
| typedef struct TransInvalidationInfo |
| { |
| /* Back link to parent transaction's info */ |
| struct TransInvalidationInfo *parent; |
| |
| /* Subtransaction nesting depth */ |
| int my_level; |
| |
| /* |
| * head of current-command event list. |
| * In Metadata versioning, this is the Command Versioning Queue (CVQ) |
| */ |
| InvalidationListHeader CurrentCmdInvalidMsgs; |
| |
| /* |
| * head of previous-commands event list. |
| * In Metadata versioning, this is the Transaction Versioning Queue (XVQ) |
| */ |
| InvalidationListHeader PriorCmdInvalidMsgs; |
| |
| /* init file must be invalidated? */ |
| bool RelcacheInitFileInval; |
| |
| } TransInvalidationInfo; |
| |
| /* |
| * This global variable is used for testing Metadata Versioning, and |
| * therefore it needs to be non-static |
| */ |
| TransInvalidationInfo *transInvalInfo = NULL; |
| |
| /* |
| * Dynamically-registered callback functions. Current implementation |
| * assumes there won't be very many of these at once; could improve if needed. |
| */ |
| |
| #define MAX_CACHE_CALLBACKS 20 |
| |
| static struct CACHECALLBACK |
| { |
| int16 id; /* cache number or message type id */ |
| CacheCallbackFunction function; |
| Datum arg; |
| } cache_callback_list[MAX_CACHE_CALLBACKS]; |
| |
| static int cache_callback_count = 0; |
| |
| /* info values for 2PC callback */ |
| #define TWOPHASE_INFO_MSG 0 /* SharedInvalidationMessage */ |
| #define TWOPHASE_INFO_FILE_BEFORE 1 /* relcache file inval */ |
| #define TWOPHASE_INFO_FILE_AFTER 2 /* relcache file inval */ |
| |
| static void PersistInvalidationMessage(SharedInvalidationMessage *msg); |
| static void PrepareForRelcacheInvalidation(Oid relid, HeapTuple tuple); |
| /* ---------------------------------------------------------------- |
| * Invalidation list support functions |
| * |
| * These three routines encapsulate processing of the "chunked" |
| * representation of what is logically just a list of messages. |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* |
| * AddInvalidationMessage |
| * Add an invalidation message to a list (of chunks). |
| * |
| * We make sure that we maintain the original order of messages by |
| * always appending at the end of the list. |
| */ |
| static void |
| AddInvalidationMessage(InvalidationChunk **listHdr, |
| SharedInvalidationMessage *msg) |
| { |
| InvalidationChunk *lastChunk = *listHdr; |
| |
| if (lastChunk == NULL) |
| { |
| /* First time through; create initial chunk */ |
| #define FIRSTCHUNKSIZE 16 |
| lastChunk = (InvalidationChunk *) |
| MemoryContextAlloc(CurTransactionContext, |
| sizeof(InvalidationChunk) + |
| (FIRSTCHUNKSIZE - 1) *sizeof(SharedInvalidationMessage)); |
| lastChunk->nitems = 0; |
| lastChunk->maxitems = FIRSTCHUNKSIZE; |
| lastChunk->next = *listHdr; |
| *listHdr = lastChunk; |
| } |
| else |
| { |
| /* We already have chunks in the list. Go to the last one */ |
| while (NULL != lastChunk->next) |
| { |
| lastChunk = lastChunk->next; |
| } |
| |
| /* Is there room in the last chunk? */ |
| if (lastChunk->nitems >= lastChunk->maxitems) |
| { |
| /* Need another chunk; double size of last chunk */ |
| int chunksize = 2 * lastChunk->maxitems; |
| |
| InvalidationChunk * newChunk = (InvalidationChunk *) |
| MemoryContextAlloc(CurTransactionContext, |
| sizeof(InvalidationChunk) + |
| (chunksize - 1) *sizeof(SharedInvalidationMessage)); |
| newChunk->nitems = 0; |
| newChunk->maxitems = chunksize; |
| newChunk->next = NULL; |
| |
| /* Append new chunk after the last one */ |
| lastChunk->next = newChunk; |
| lastChunk = newChunk; |
| } |
| } |
| |
| /* Okay, add message to last chunk */ |
| lastChunk->msgs[lastChunk->nitems] = *msg; |
| |
| lastChunk->nitems++; |
| } |
| |
| /* |
| * Append one list of invalidation message chunks to another, resetting |
| * the source chunk-list pointer to NULL. |
| * Source is appended at the end of the destination to preserve ordering. |
| */ |
| static void |
| AppendInvalidationMessageList(InvalidationChunk **destHdr, |
| InvalidationChunk **srcHdr) |
| { |
| if (NULL == *srcHdr) |
| { |
| /* nothing to do */ |
| return; |
| } |
| |
| InvalidationChunk *chunk = *destHdr; |
| |
| if (NULL == chunk) |
| { |
| /* Destination is empty */ |
| *destHdr = *srcHdr; |
| } |
| else |
| { |
| /* Find last chunk of destination list */ |
| while (chunk->next != NULL) |
| chunk = chunk->next; |
| |
| /* Append source list at the end of destination */ |
| chunk->next = *srcHdr; |
| } |
| |
| *srcHdr = NULL; |
| } |
| |
| /* |
| * Process a list of invalidation messages. |
| * |
| * This is a macro that executes the given code fragment for each message in |
| * a message chunk list. The fragment should refer to the message as *msg. |
| */ |
| #define ProcessMessageList(listHdr, codeFragment) \ |
| do { \ |
| InvalidationChunk *_chunk; \ |
| for (_chunk = (listHdr); _chunk != NULL; _chunk = _chunk->next) \ |
| { \ |
| int _cindex; \ |
| for (_cindex = 0; _cindex < _chunk->nitems; _cindex++) \ |
| { \ |
| SharedInvalidationMessage *msg = &_chunk->msgs[_cindex]; \ |
| codeFragment; \ |
| } \ |
| } \ |
| } while (0) |
| |
| /* |
| * Process a list of invalidation messages group-wise. |
| * |
| * As above, but the code fragment can handle an array of messages. |
| * The fragment should refer to the messages as msgs[], with n entries. |
| */ |
| #define ProcessMessageListMulti(listHdr, codeFragment) \ |
| do { \ |
| InvalidationChunk *_chunk; \ |
| for (_chunk = (listHdr); _chunk != NULL; _chunk = _chunk->next) \ |
| { \ |
| SharedInvalidationMessage *msgs = _chunk->msgs; \ |
| int n = _chunk->nitems; \ |
| codeFragment; \ |
| } \ |
| } while (0) |
| |
| |
| /* ---------------------------------------------------------------- |
| * Invalidation set support functions |
| * |
| * These routines understand about the division of a logical invalidation |
| * list into separate physical lists for catcache and relcache/smgr entries. |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* |
| * Add a catcache inval entry |
| */ |
| static void |
| AddCatcacheInvalidationMessage(InvalidationListHeader *hdr, |
| int id, uint32 hashValue, |
| ItemPointer tuplePtr, Oid dbId, |
| SysCacheInvalidateAction action) |
| { |
| SharedInvalidationMessage msg; |
| |
| msg.cc.id = (int16) id; |
| msg.cc.tuplePtr = *tuplePtr; |
| msg.cc.dbId = dbId; |
| msg.cc.hashValue = hashValue; |
| AddInvalidationMessage(&hdr->cclist, &msg); |
| } |
| |
| /* |
| * Add a relcache inval entry |
| */ |
| static void |
| AddRelcacheInvalidationMessage(InvalidationListHeader *hdr, |
| Oid dbId, Oid relId) |
| { |
| SharedInvalidationMessage msg; |
| |
| /* Don't add a duplicate item */ |
| /* We assume dbId need not be checked because it will never change */ |
| ProcessMessageList(hdr->rclist, |
| if (msg->rc.id == SHAREDINVALRELCACHE_ID && |
| msg->rc.relId == relId) |
| return); |
| |
| /* OK, add the item */ |
| msg.rc.id = SHAREDINVALRELCACHE_ID; |
| msg.rc.dbId = dbId; |
| msg.rc.relId = relId; |
| AddInvalidationMessage(&hdr->rclist, &msg); |
| } |
| |
| /* |
| * Add an smgr inval entry |
| */ |
| static void |
| AddSmgrInvalidationMessage(InvalidationListHeader *hdr, |
| RelFileNode rnode) |
| { |
| SharedInvalidationMessage msg; |
| |
| /* Don't add a duplicate item */ |
| ProcessMessageList(hdr->rclist, |
| if (msg->sm.id == SHAREDINVALSMGR_ID && |
| RelFileNodeEquals(msg->sm.rnode, rnode)) |
| return); |
| |
| /* OK, add the item */ |
| msg.sm.id = SHAREDINVALSMGR_ID; |
| msg.sm.rnode = rnode; |
| AddInvalidationMessage(&hdr->rclist, &msg); |
| } |
| |
| /* |
| * Append one list of invalidation messages to another, resetting |
| * the source list to empty. |
| */ |
| static void |
| AppendInvalidationMessages(InvalidationListHeader *dest, |
| InvalidationListHeader *src) |
| { |
| AppendInvalidationMessageList(&dest->cclist, &src->cclist); |
| AppendInvalidationMessageList(&dest->rclist, &src->rclist); |
| AppendInvalidationMessageList(&dest->velist, &src->velist); |
| } |
| |
| /* |
| * Execute the given function for all the messages in an invalidation list. |
| * The list is not altered. |
| * |
| * catcache entries are processed first, for reasons mentioned above. |
| */ |
| static void |
| ProcessInvalidationMessages(InvalidationListHeader *hdr, |
| void (*func) (SharedInvalidationMessage *msg)) |
| { |
| ProcessMessageList(hdr->cclist, func(msg)); |
| ProcessMessageList(hdr->rclist, func(msg)); |
| ProcessMessageList(hdr->velist, func(msg)); |
| } |
| |
| /* |
| * As above, but the function is able to process an array of messages |
| * rather than just one at a time. |
| */ |
| static void |
| ProcessInvalidationMessageMulti(InvalidationListHeader *hdr, |
| void (*func) (SharedInvalidationMessage *msgs, int n)) |
| { |
| ProcessMessageListMulti(hdr->cclist, func(msgs, n)); |
| ProcessMessageListMulti(hdr->rclist, func(msgs, n)); |
| ProcessMessageListMulti(hdr->velist, func(msgs, n)); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * private support functions |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* |
| * RegisterCatcacheInvalidation |
| * |
| * Register an invalidation event for a catcache tuple entry. |
| */ |
| static void |
| RegisterCatcacheInvalidation(int cacheId, |
| uint32 hashValue, |
| ItemPointer tuplePtr, |
| Oid dbId, |
| SysCacheInvalidateAction action) |
| { |
| AddCatcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, |
| cacheId, hashValue, tuplePtr, dbId, action); |
| } |
| |
| /* |
| * RegisterRelcacheInvalidation |
| * |
| * As above, but register a relcache invalidation event. |
| */ |
| static void |
| RegisterRelcacheInvalidation(Oid dbId, Oid relId) |
| { |
| AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, |
| dbId, relId); |
| |
| /* |
| * Most of the time, relcache invalidation is associated with system |
| * catalog updates, but there are a few cases where it isn't. Quick |
| * hack to ensure that the next CommandCounterIncrement() will think |
| * that we need to do CommandEndInvalidationMessages(). |
| */ |
| (void) GetCurrentCommandId(/*true*/); |
| |
| /* |
| * If the relation being invalidated is one of those cached in the |
| * relcache init file, mark that we need to zap that file at commit. |
| */ |
| if (RelationIdIsInInitFile(relId)) |
| transInvalInfo->RelcacheInitFileInval = true; |
| } |
| |
| /* |
| * RegisterSmgrInvalidation |
| * |
| * As above, but register an smgr invalidation event. |
| */ |
| static void |
| RegisterSmgrInvalidation(RelFileNode rnode) |
| { |
| AddSmgrInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, |
| rnode); |
| |
| /* |
| * As above, just in case there is not an associated catalog change. |
| */ |
| (void) GetCurrentCommandId(/*true*/); |
| } |
| |
| #ifdef USE_ASSERT_CHECKING |
| static char * |
| si_to_str(SharedInvalidationMessage *msg) |
| { |
| StringInfoData buf; |
| int i; |
| char *s; |
| |
| initStringInfo(&buf); |
| appendStringInfo(&buf, "message id = %d", msg->id); |
| s = (char *)&(msg->cc); |
| for (i = 0; i < sizeof(SharedInvalCatcacheMsg); i++) |
| { |
| if (i == 0) |
| appendStringInfo(&buf, " CC:"); |
| |
| appendStringInfo(&buf, " %x", *(s + i)); |
| |
| } |
| s = (char *)&(msg->rc); |
| for (i = 0; i < sizeof(SharedInvalRelcacheMsg); i++) |
| { |
| if (i == 0) |
| appendStringInfo(&buf, " RC:"); |
| |
| appendStringInfo(&buf, " %x", *(s + i)); |
| |
| } |
| return buf.data; |
| } |
| #endif |
| |
| /* |
| * LocalExecuteInvalidationMessage |
| * |
| * Process a single invalidation message (which could be of any type). |
| * Only the local caches are flushed; this does not transmit the message |
| * to other backends. |
| */ |
| static void |
| LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) |
| { |
| int i; |
| |
| if (msg->id >= 0) |
| { |
| /* When msg->id > 0, this is a CatCache invalidation message */ |
| if (msg->cc.dbId == MyDatabaseId || msg->cc.dbId == 0) |
| { |
| CatalogCacheIdInvalidate(msg->cc.id, |
| msg->cc.hashValue, |
| &msg->cc.tuplePtr); |
| |
| for (i = 0; i < cache_callback_count; i++) |
| { |
| struct CACHECALLBACK *ccitem = cache_callback_list + i; |
| |
| if (ccitem->id == msg->cc.id) |
| (*ccitem->function) (ccitem->arg, InvalidOid); |
| } |
| } |
| |
| return; |
| } |
| |
| /* |
| * If we got here, we must have msg->id < 0. This means the invalidation |
| * message is for one of the other caches. Find out which one and |
| * take the specific actions for each. |
| */ |
| Assert(msg->id < 0); |
| switch (msg->id) |
| { |
| case SHAREDINVALRELCACHE_ID: |
| if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid) |
| { |
| RelationCacheInvalidateEntry(msg->rc.relId); |
| |
| for (i = 0; i < cache_callback_count; i++) |
| { |
| struct CACHECALLBACK *ccitem = cache_callback_list + i; |
| |
| if (ccitem->id == SHAREDINVALRELCACHE_ID) |
| (*ccitem->function) (ccitem->arg, msg->rc.relId); |
| } |
| } |
| break; |
| |
| case SHAREDINVALSMGR_ID: |
| /* |
| * We could have smgr entries for relations of other databases, so no |
| * short-circuit test is possible here. |
| */ |
| smgrclosenode(msg->sm.rnode); |
| break; |
| |
| default: |
| #ifdef USE_ASSERT_CHECKING |
| elog(NOTICE, "invalid SI message: %s", si_to_str(msg)); |
| #endif |
| elog(FATAL, "unrecognized SI message id: %d", msg->id); |
| } |
| } |
| |
| /* |
| * InvalidateSystemCaches |
| * |
| * This blows away all tuples in the system catalog caches and |
| * all the cached relation descriptors and smgr cache entries. |
| * Relation descriptors that have positive refcounts are then rebuilt. |
| * |
| * We call this when we see a shared-inval-queue overflow signal, |
| * since that tells us we've lost some shared-inval messages and hence |
| * don't know what needs to be invalidated. |
| */ |
| static void |
| InvalidateSystemCaches(void) |
| { |
| int i; |
| |
| ResetCatalogCaches(); |
| RelationCacheInvalidate(); /* gets smgr cache too */ |
| |
| for (i = 0; i < cache_callback_count; i++) |
| { |
| struct CACHECALLBACK *ccitem = cache_callback_list + i; |
| |
| (*ccitem->function) (ccitem->arg, InvalidOid); |
| } |
| } |
| |
| /* |
| * This is identical to InvalidateSystemCache, for now, with exception that this is |
| * a public interface, mainly because we want to keep the old interface private. |
| * At some point in the future we may want to consolidate them, though. |
| */ |
| void |
| ResetSystemCaches(void) |
| { |
| InvalidateSystemCaches(); |
| } |
| |
| /* |
| * PrepareForTupleInvalidation |
| * Detect whether invalidation of this tuple implies invalidation |
| * of catalog/relation cache entries; if so, register inval events. |
| */ |
| static void |
| PrepareForTupleInvalidation(Relation relation, HeapTuple tuple, SysCacheInvalidateAction action) |
| { |
| Oid tupleRelId; |
| |
| /* Do nothing during bootstrap */ |
| if (IsBootstrapProcessingMode()) |
| return; |
| |
| /* |
| * We only need to worry about invalidation for tuples that are in system |
| * relations; user-relation tuples are never in catcaches and can't affect |
| * the relcache either. |
| */ |
| if (!IsSystemRelation(relation)) |
| return; |
| |
| /* |
| * TOAST tuples can likewise be ignored here. Note that TOAST tables are |
| * considered system relations so they are not filtered by the above test. |
| */ |
| if (IsToastRelation(relation)) |
| return; |
| |
| /* |
| * First let the catcache do its thing |
| */ |
| PrepareToInvalidateCacheTuple(relation, tuple, action, |
| RegisterCatcacheInvalidation); |
| |
| /* |
| * Now, is this tuple one of the primary definers of a relcache entry? |
| */ |
| tupleRelId = RelationGetRelid(relation); |
| PrepareForRelcacheInvalidation(tupleRelId, tuple); |
| } |
| |
| /* |
| * PrepareForRelcacheInvalidation |
| * Detect whether invalidation of this tuple implies invalidation |
| * of catalog/relation cache entries; if so, register inval events. |
| */ |
| static void |
| PrepareForRelcacheInvalidation(Oid relid, HeapTuple tuple) |
| { |
| Oid relationId = InvalidOid; |
| Oid databaseId = InvalidOid; |
| if (relid == RelationRelationId) |
| { |
| Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple); |
| RelFileNode rnode; |
| |
| relationId = HeapTupleGetOid(tuple); |
| if (classtup->relisshared) |
| databaseId = InvalidOid; |
| else |
| databaseId = MyDatabaseId; |
| |
| /* |
| * We need to send out an smgr inval as well as a relcache inval. This |
| * is needed because other backends might possibly possess smgr cache |
| * but not relcache entries for the target relation. |
| * |
| * Note: during a pg_class row update that assigns a new relfilenode |
| * or reltablespace value, we will be called on both the old and new |
| * tuples, and thus will broadcast invalidation messages showing both |
| * the old and new RelFileNode values. This ensures that other |
| * backends will close smgr references to the old file. |
| * |
| * XXX possible future cleanup: it might be better to trigger smgr |
| * flushes explicitly, rather than indirectly from pg_class updates. |
| */ |
| if (classtup->reltablespace) |
| rnode.spcNode = classtup->reltablespace; |
| else if (relstorage_is_ao(classtup->relstorage)) |
| rnode.spcNode = get_database_dts(databaseId); |
| else |
| rnode.spcNode = MyDatabaseTableSpace; |
| |
| rnode.dbNode = databaseId; |
| rnode.relNode = classtup->relfilenode; |
| RegisterSmgrInvalidation(rnode); |
| } |
| else if (relid == AttributeRelationId) |
| { |
| Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple); |
| |
| relationId = atttup->attrelid; |
| |
| /* |
| * KLUGE ALERT: we always send the relcache event with MyDatabaseId, |
| * even if the rel in question is shared (which we can't easily tell). |
| * This essentially means that only backends in this same database |
| * will react to the relcache flush request. This is in fact |
| * appropriate, since only those backends could see our pg_attribute |
| * change anyway. It looks a bit ugly though. (In practice, shared |
| * relations can't have schema changes after bootstrap, so we should |
| * never come here for a shared rel anyway.) |
| */ |
| databaseId = MyDatabaseId; |
| } |
| else if (relid == GpPolicyRelationId) |
| { |
| FormData_gp_policy *gptup = (FormData_gp_policy *) GETSTRUCT(tuple); |
| |
| relationId = gptup->localoid; |
| databaseId = MyDatabaseId; |
| } |
| else if (relid == IndexRelationId) |
| { |
| Form_pg_index indextup = (Form_pg_index) GETSTRUCT(tuple); |
| |
| /* |
| * When a pg_index row is updated, we should send out a relcache inval |
| * for the index relation. As above, we don't know the shared status |
| * of the index, but in practice it doesn't matter since indexes of |
| * shared catalogs can't have such updates. |
| */ |
| relationId = indextup->indexrelid; |
| databaseId = MyDatabaseId; |
| } |
| else |
| return; |
| |
| /* |
| * Yes. We need to register a relcache invalidation event. |
| */ |
| RegisterRelcacheInvalidation(databaseId, relationId); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * public functions |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* |
| * AcceptInvalidationMessages |
| * Read and process invalidation messages from the shared invalidation |
| * message queue. |
| * |
| * Note: |
| * This should be called as the first step in processing a transaction. |
| */ |
| void |
| AcceptInvalidationMessages(void) |
| { |
| ReceiveSharedInvalidMessages(LocalExecuteInvalidationMessage, |
| InvalidateSystemCaches); |
| |
| Assert(SysCacheFlushForce_IsValid(gp_test_system_cache_flush_force)); |
| |
| #ifdef USE_TEST_UTILS |
| /* |
| * Test code to force cache flushes anytime a flush could happen. |
| * |
| * If used with CLOBBER_FREED_MEMORY, gp_test_system_cache_flush_force provides |
| * a fairly thorough test that the system contains no cache-flush hazards. |
| * However, it also makes the system unbelievably slow --- the regression |
| * tests take about 100 times longer than normal. |
| * |
| * gp_test_system_cache_flush_force_recursive slows things by |
| * at least a factor of 10000, so I wouldn't suggest |
| * trying to run the entire regression tests that way. It's useful to try |
| * a few simple tests, to make sure that cache reload isn't subject to |
| * internal cache-flush hazards, but after you've done a few thousand |
| * recursive reloads it's unlikely you'll learn more. |
| */ |
| if (SysCacheFlushForce_Recursive == gp_test_system_cache_flush_force) |
| { |
| /* potentially recursive cache invalidation */ |
| InvalidateSystemCaches(); |
| } |
| else |
| { |
| static bool in_recursion = false; |
| |
| if (!in_recursion) |
| { |
| bool invalidate = (SysCacheFlushForce_NonRecursive == gp_test_system_cache_flush_force); |
| |
| if (!invalidate && |
| gp_simex_init && |
| gp_simex_run && |
| gp_simex_class == SimExESClass_CacheInvalidation) |
| { |
| /* |
| * Same basic idea as above, except using the SimEx facility, the main |
| * advantage of this approach is that it only triggers the invalidation |
| * once per unique call stack, which should make testing significantly |
| * faster. |
| */ |
| invalidate = (SimExESSubClass_CacheInvalidation == SimEx_CheckInject()); |
| } |
| |
| if (invalidate) |
| { |
| /* avoid recursive cache invalidation */ |
| in_recursion = true; |
| InvalidateSystemCaches(); |
| in_recursion = false; |
| } |
| } |
| } |
| #endif |
| } |
| |
| /* |
| * AtStart_Inval |
| * Initialize inval lists at start of a main transaction. |
| */ |
| void |
| AtStart_Inval(void) |
| { |
| Assert(transInvalInfo == NULL); |
| transInvalInfo = (TransInvalidationInfo *) |
| MemoryContextAllocZero(TopTransactionContext, |
| sizeof(TransInvalidationInfo)); |
| transInvalInfo->my_level = GetCurrentTransactionNestLevel(); |
| |
| } |
| |
| /* |
| * AtPrepare_Inval |
| * Save the inval lists state at 2PC transaction prepare. |
| * |
| * In this phase we just generate 2PC records for all the pending invalidation |
| * work. |
| */ |
| void |
| AtPrepare_Inval(void) |
| { |
| /* Must be at top of stack */ |
| Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL); |
| |
| /* |
| * Relcache init file invalidation requires processing both before and |
| * after we send the SI messages. |
| */ |
| if (transInvalInfo->RelcacheInitFileInval) |
| RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_BEFORE, |
| NULL, 0); |
| |
| AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, |
| &transInvalInfo->CurrentCmdInvalidMsgs); |
| |
| ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, |
| PersistInvalidationMessage); |
| |
| if (transInvalInfo->RelcacheInitFileInval) |
| RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_AFTER, |
| NULL, 0); |
| } |
| |
| /* |
| * PostPrepare_Inval |
| * Clean up after successful PREPARE. |
| * |
| * Here, we want to act as though the transaction aborted, so that we will |
| * undo any syscache changes it made, thereby bringing us into sync with the |
| * outside world, which doesn't believe the transaction committed yet. |
| * |
| * If the prepared transaction is later aborted, there is nothing more to |
| * do; if it commits, we will receive the consequent inval messages just |
| * like everyone else. |
| */ |
| void |
| PostPrepare_Inval(void) |
| { |
| AtEOXact_Inval(false); |
| } |
| |
| /* |
| * AtSubStart_Inval |
| * Initialize inval lists at start of a subtransaction. |
| */ |
| void |
| AtSubStart_Inval(void) |
| { |
| TransInvalidationInfo *myInfo; |
| |
| Assert(transInvalInfo != NULL); |
| myInfo = (TransInvalidationInfo *) |
| MemoryContextAllocZero(TopTransactionContext, |
| sizeof(TransInvalidationInfo)); |
| myInfo->parent = transInvalInfo; |
| myInfo->my_level = GetCurrentTransactionNestLevel(); |
| |
| transInvalInfo = myInfo; |
| |
| } |
| |
| /* |
| * PersistInvalidationMessage |
| * Write an invalidation message to the 2PC state file. |
| */ |
| static void |
| PersistInvalidationMessage(SharedInvalidationMessage *msg) |
| { |
| RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_MSG, |
| msg, sizeof(SharedInvalidationMessage)); |
| } |
| |
| /* |
| * inval_twophase_postcommit |
| * Process an invalidation message from the 2PC state file. |
| */ |
| void |
| inval_twophase_postcommit(TransactionId xid, uint16 info, |
| void *recdata, uint32 len) |
| { |
| SharedInvalidationMessage *msg; |
| |
| switch (info) |
| { |
| case TWOPHASE_INFO_MSG: |
| msg = (SharedInvalidationMessage *) recdata; |
| Assert(len == sizeof(SharedInvalidationMessage)); |
| SendSharedInvalidMessages(msg, 1); |
| break; |
| case TWOPHASE_INFO_FILE_BEFORE: |
| RelationCacheInitFileInvalidate(true); |
| break; |
| case TWOPHASE_INFO_FILE_AFTER: |
| RelationCacheInitFileInvalidate(false); |
| break; |
| default: |
| Assert(false); |
| break; |
| } |
| } |
| |
| |
| /* |
| * AtEOXact_Inval |
| * Process queued-up invalidation messages at end of main transaction. |
| * |
| * If isCommit, we must send out the messages in our PriorCmdInvalidMsgs list |
| * to the shared invalidation message queue. Note that these will be read |
| * not only by other backends, but also by our own backend at the next |
| * transaction start (via AcceptInvalidationMessages). This means that |
| * we can skip immediate local processing of anything that's still in |
| * CurrentCmdInvalidMsgs, and just send that list out too. |
| * |
| * If not isCommit, we are aborting, and must locally process the messages |
| * in PriorCmdInvalidMsgs. No messages need be sent to other backends, |
| * since they'll not have seen our changed tuples anyway. We can forget |
| * about CurrentCmdInvalidMsgs too, since those changes haven't touched |
| * the caches yet. |
| * |
| * In any case, reset the various lists to empty. We need not physically |
| * free memory here, since TopTransactionContext is about to be emptied |
| * anyway. |
| * |
| * Note: |
| * This should be called as the last step in processing a transaction. |
| */ |
| void |
| AtEOXact_Inval(bool isCommit) |
| { |
| if (isCommit) |
| { |
| /* Must be at top of stack */ |
| Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL); |
| |
| /* |
| * Relcache init file invalidation requires processing both before and |
| * after we send the SI messages. However, we need not do anything |
| * unless we committed. |
| */ |
| if (transInvalInfo->RelcacheInitFileInval) |
| RelationCacheInitFileInvalidate(true); |
| |
| AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, |
| &transInvalInfo->CurrentCmdInvalidMsgs); |
| |
| ProcessInvalidationMessageMulti(&transInvalInfo->PriorCmdInvalidMsgs, |
| SendSharedInvalidMessages); |
| |
| if (transInvalInfo->RelcacheInitFileInval) |
| { |
| RelationCacheInitFileInvalidate(false); |
| } |
| |
| } |
| else if (transInvalInfo != NULL) |
| { |
| /* Must be at top of stack */ |
| Assert(transInvalInfo->parent == NULL); |
| |
| ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, |
| LocalExecuteInvalidationMessage); |
| } |
| |
| transInvalInfo = NULL; |
| } |
| |
| /* |
| * AtEOSubXact_Inval |
| * Process queued-up invalidation messages at end of subtransaction. |
| * |
| * If isCommit, process CurrentCmdInvalidMsgs if any (there probably aren't), |
| * and then attach both CurrentCmdInvalidMsgs and PriorCmdInvalidMsgs to the |
| * parent's PriorCmdInvalidMsgs list. |
| * |
| * If not isCommit, we are aborting, and must locally process the messages |
| * in PriorCmdInvalidMsgs. No messages need be sent to other backends. |
| * We can forget about CurrentCmdInvalidMsgs too, since those changes haven't |
| * touched the caches yet. |
| * |
| * In any case, pop the transaction stack. We need not physically free memory |
| * here, since CurTransactionContext is about to be emptied anyway |
| * (if aborting). Beware of the possibility of aborting the same nesting |
| * level twice, though. |
| */ |
| void |
| AtEOSubXact_Inval(bool isCommit) |
| { |
| int my_level = GetCurrentTransactionNestLevel(); |
| TransInvalidationInfo *myInfo = transInvalInfo; |
| |
| if (isCommit) |
| { |
| /* Must be at non-top of stack */ |
| Assert(myInfo != NULL && myInfo->parent != NULL); |
| Assert(myInfo->my_level == my_level); |
| |
| /* If CurrentCmdInvalidMsgs still has anything, fix it */ |
| CommandEndInvalidationMessages(); |
| |
| /* Pass up my inval messages to parent */ |
| AppendInvalidationMessages(&myInfo->parent->PriorCmdInvalidMsgs, |
| &myInfo->PriorCmdInvalidMsgs); |
| |
| /* Pending relcache inval becomes parent's problem too */ |
| if (myInfo->RelcacheInitFileInval) |
| myInfo->parent->RelcacheInitFileInval = true; |
| |
| /* Pop the transaction state stack */ |
| transInvalInfo = myInfo->parent; |
| |
| /* Need not free anything else explicitly */ |
| pfree(myInfo); |
| } |
| else if (myInfo != NULL && myInfo->my_level == my_level) |
| { |
| /* Must be at non-top of stack */ |
| Assert(myInfo->parent != NULL); |
| |
| ProcessInvalidationMessages(&myInfo->PriorCmdInvalidMsgs, |
| LocalExecuteInvalidationMessage); |
| |
| /* Pop the transaction state stack */ |
| transInvalInfo = myInfo->parent; |
| |
| /* Need not free anything else explicitly */ |
| pfree(myInfo); |
| } |
| |
| } |
| |
| /* |
| * CommandEndInvalidationMessages |
| * Process queued-up invalidation messages at end of one command |
| * in a transaction. |
| * |
| * Here, we send no messages to the shared queue, since we don't know yet if |
| * we will commit. We do need to locally process the CurrentCmdInvalidMsgs |
| * list, so as to flush our caches of any entries we have outdated in the |
| * current command. We then move the current-cmd list over to become part |
| * of the prior-cmds list. |
| * |
| * Note: |
| * This should be called during CommandCounterIncrement(), |
| * after we have advanced the command ID. |
| */ |
| void |
| CommandEndInvalidationMessages(void) |
| { |
| /* |
| * You might think this shouldn't be called outside any transaction, but |
| * bootstrap does it, and also ABORT issued when not in a transaction. So |
| * just quietly return if no state to work on. |
| */ |
| if (transInvalInfo == NULL) |
| return; |
| |
| ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs, |
| LocalExecuteInvalidationMessage); |
| AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, |
| &transInvalInfo->CurrentCmdInvalidMsgs); |
| } |
| |
| /* |
| * CacheInvalidateHeapTuple |
| * Register the given tuple for invalidation at end of command |
| * (ie, current command is creating or outdating this tuple). |
| */ |
| void |
| CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, SysCacheInvalidateAction action) |
| { |
| PrepareForTupleInvalidation(relation, tuple, action); |
| } |
| |
| /* |
| * CacheInvalidateRelcache |
| * Register invalidation of the specified relation's relcache entry |
| * at end of command. |
| * |
| * This is used in places that need to force relcache rebuild but aren't |
| * changing any of the tuples recognized as contributors to the relcache |
| * entry by PrepareForTupleInvalidation. (An example is dropping an index.) |
| * We assume in particular that relfilenode/reltablespace aren't changing |
| * (so the rd_node value is still good). |
| * |
| * XXX most callers of this probably don't need to force an smgr flush. |
| */ |
| void |
| CacheInvalidateRelcache(Relation relation) |
| { |
| Oid databaseId; |
| Oid relationId; |
| |
| relationId = RelationGetRelid(relation); |
| if (relation->rd_rel->relisshared) |
| databaseId = InvalidOid; |
| else |
| databaseId = MyDatabaseId; |
| |
| RegisterRelcacheInvalidation(databaseId, relationId); |
| RegisterSmgrInvalidation(relation->rd_node); |
| } |
| |
| /* |
| * CacheInvalidateRelcacheByTuple |
| * As above, but relation is identified by passing its pg_class tuple. |
| */ |
| void |
| CacheInvalidateRelcacheByTuple(HeapTuple classTuple) |
| { |
| Form_pg_class classtup = (Form_pg_class) GETSTRUCT(classTuple); |
| Oid databaseId; |
| Oid relationId; |
| RelFileNode rnode; |
| |
| relationId = HeapTupleGetOid(classTuple); |
| if (classtup->relisshared) |
| databaseId = InvalidOid; |
| else |
| databaseId = MyDatabaseId; |
| |
| if (classtup->reltablespace) |
| rnode.spcNode = classtup->reltablespace; |
| else if (relstorage_is_ao(classtup->relstorage)) |
| rnode.spcNode = get_database_dts(databaseId); |
| else |
| rnode.spcNode = MyDatabaseTableSpace; |
| |
| rnode.dbNode = databaseId; |
| rnode.relNode = classtup->relfilenode; |
| |
| RegisterRelcacheInvalidation(databaseId, relationId); |
| RegisterSmgrInvalidation(rnode); |
| } |
| |
| /* |
| * CacheInvalidateRelcacheByRelid |
| * As above, but relation is identified by passing its OID. |
| * This is the least efficient of the three options; use one of |
| * the above routines if you have a Relation or pg_class tuple. |
| */ |
| void |
| CacheInvalidateRelcacheByRelid(Oid relid) |
| { |
| HeapTuple tup; |
| |
| tup = SearchSysCache(RELOID, |
| ObjectIdGetDatum(relid), |
| 0, 0, 0); |
| if (!HeapTupleIsValid(tup)) |
| elog(ERROR, "cache lookup failed for relation %u", relid); |
| CacheInvalidateRelcacheByTuple(tup); |
| ReleaseSysCache(tup); |
| } |
| |
| /* |
| * CacheRegisterSyscacheCallback |
| * Register the specified function to be called for all future |
| * invalidation events in the specified cache. |
| * |
| * NOTE: currently, the OID argument to the callback routine is not |
| * provided for syscache callbacks; the routine doesn't really get any |
| * useful info as to exactly what changed. It should treat every call |
| * as a "cache flush" request. |
| */ |
| void |
| CacheRegisterSyscacheCallback(int cacheid, |
| CacheCallbackFunction func, |
| Datum arg) |
| { |
| if (cache_callback_count >= MAX_CACHE_CALLBACKS) |
| elog(FATAL, "out of cache_callback_list slots"); |
| |
| cache_callback_list[cache_callback_count].id = cacheid; |
| cache_callback_list[cache_callback_count].function = func; |
| cache_callback_list[cache_callback_count].arg = arg; |
| |
| ++cache_callback_count; |
| } |
| |
| /* |
| * CacheRegisterRelcacheCallback |
| * Register the specified function to be called for all future |
| * relcache invalidation events. The OID of the relation being |
| * invalidated will be passed to the function. |
| * |
| * NOTE: InvalidOid will be passed if a cache reset request is received. |
| * In this case the called routines should flush all cached state. |
| */ |
| void |
| CacheRegisterRelcacheCallback(CacheCallbackFunction func, |
| Datum arg) |
| { |
| if (cache_callback_count >= MAX_CACHE_CALLBACKS) |
| elog(FATAL, "out of cache_callback_list slots"); |
| |
| cache_callback_list[cache_callback_count].id = SHAREDINVALRELCACHE_ID; |
| cache_callback_list[cache_callback_count].function = func; |
| cache_callback_list[cache_callback_count].arg = arg; |
| |
| ++cache_callback_count; |
| } |