| /*------------------------------------------------------------------------- |
| * |
| * runaway_cleaner.c |
| * Implementation of the runaway cleaner that checks if a session is marked |
| * as runaway (i.e., consuming too much vmem) by the red-zone handler |
| * (redzone_handler.c). The runaway cleaner cleans up such session by triggering |
| * an elog(ERROR, ...) which rolls back transaction and releases memory. Once |
| * cleanup is finished, the runaway cleaner also informs the red zone handler |
| * so that a new runaway session can be chosen if necessary. |
| * |
| * Copyright (c) 2014-Present VMware, Inc. or its affiliates. |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/utils/mmgr/runaway_cleaner.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "access/xact.h" |
| #include "cdb/cdbvars.h" |
| #include "miscadmin.h" |
| #include "port/atomics.h" |
| #include "utils/faultinjector.h" |
| #include "utils/resgroup.h" |
| #include "utils/resource_manager.h" |
| #include "utils/session_state.h" |
| #include "utils/vmem_tracker.h" |
| |
| /* External dependencies within the runaway cleanup framework */ |
| extern bool vmemTrackerInited; |
| extern bool isProcessActive; |
| extern EventVersion activationVersion; |
| extern EventVersion deactivationVersion; |
| extern volatile uint32 *isRunawayDetector; |
| extern volatile EventVersion *latestRunawayVersion; |
| |
| /* |
| * The cleanupCountdown in the SessionState determines how many |
| * processes we need to cleanup to declare a session clean. If it |
| * reaches 0, we mark the session clean. However, -1 indicates |
| * that the session is either done cleaning previous runaway event |
| * or it never started a cleaning. |
| */ |
| #define CLEANUP_COUNTDOWN_BEFORE_RUNAWAY -1 |
| |
| /* The runaway version for which this process started cleaning up */ |
| static EventVersion beginCleanupRunawayVersion = 0; |
| |
| /* The runaway version for which this process finished cleaning up */ |
| static EventVersion endCleanupRunawayVersion = 0; |
| |
| void RunawayCleaner_Init(void); |
| void RunawayCleaner_StartCleanup(void); |
| bool RunawayCleaner_IsCleanupInProgress(void); |
| |
| /* |
| * Initializes the per-process states of the runaway cleaner. |
| */ |
| void |
| RunawayCleaner_Init() |
| { |
| beginCleanupRunawayVersion = 0; |
| endCleanupRunawayVersion = 0; |
| } |
| |
| /* Returns true if the current process should start a runaway cleanup */ |
| static bool |
| RunawayCleaner_ShouldStartRunawayCleanup() |
| { |
| if (NULL != MySessionState && MySessionState->runawayStatus != RunawayStatus_NotRunaway && |
| beginCleanupRunawayVersion != *latestRunawayVersion) |
| { |
| AssertImply(isProcessActive, activationVersion >= deactivationVersion); |
| AssertImply(!isProcessActive, deactivationVersion >= activationVersion); |
| |
| /* |
| * We are marked as runaway. Therefore, if the runaway event happened before deactivation, |
| * we must have a version counter increment |
| */ |
| AssertImply(*latestRunawayVersion < deactivationVersion && !isProcessActive, activationVersion < deactivationVersion); |
| |
| if (isProcessActive && *latestRunawayVersion > activationVersion) |
| { |
| /* Active process and the runaway event came after the activation */ |
| return true; |
| } |
| else if (!isProcessActive && *latestRunawayVersion < deactivationVersion && |
| *latestRunawayVersion > activationVersion) |
| { |
| /* |
| * The process is deactivated, but there is a pending runaway event before |
| * the deactivation for which this process never cleaned up |
| */ |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /* |
| * Determine if the runaway cleanup should be handled by aborting the current |
| * query or must be ignored. Since the cleanup can be attempted from multiple |
| * places, it is important to first validate if calling elog(ERROR) is safe and |
| * of value. |
| */ |
| static bool |
| RunawayCleaner_ShouldCancelQuery() |
| { |
| /* VMEM tracker not being used */ |
| if (!vmemTrackerInited) |
| return false; |
| |
| /* In critical section or when holding off on handling interrupts */ |
| if (CritSectionCount != 0 || InterruptHoldoffCount != 0) |
| return false; |
| |
| /* |
| * Cleaning up QEs that are not executing a valid command may cause the QD to |
| * get stuck [MPP-24950] |
| */ |
| if (gp_command_count <= 0) |
| return false; |
| |
| /* |
| * If not currently executing a transaction, aborting it won't release any |
| * more resources. |
| */ |
| if (!IsTransactionState()) |
| return false; |
| |
| /* Ok, we are actively executing a query */ |
| |
| if (MySessionState->runawayStatus == RunawayStatus_PrimaryRunawaySession) |
| { |
| /* |
| * Abort the query if it is actively executing and has been flagged as |
| * consuming the most memory |
| */ |
| return true; |
| } |
| else |
| { |
| Assert(MySessionState->runawayStatus == RunawayStatus_SecondaryRunawaySession); |
| |
| /* |
| * If this process was flagged as a runaway session inspite another session |
| * using more memory, only abort this query if the current user is not a |
| * superuser. This is to ensure that critical administrative commands (such |
| * as database restarts), which are done as superuser, are not interrupted |
| * by the runaway cleaner. |
| */ |
| return !superuser(); |
| } |
| } |
| |
| /* |
| * Starts a runaway cleanup by triggering an ERROR if the VMEM tracker is active |
| * and a commit is not already in progress. Otherwise, it marks the process as clean |
| */ |
| void |
| RunawayCleaner_StartCleanup() |
| { |
| /* |
| * Cleanup can be attempted from multiple places, such as before deactivating |
| * a process (if a pending runaway event) or periodically from CHECK_FOR_INTERRUPTS |
| * (indirectly via RedZoneHandler_DetectRunaway). We don't carry multiple cleanup |
| * for a single runaway event. Every time we *start* a cleanup process, we set the |
| * beginCleanupRunawayVersion to the runaway version for which we started cleaning |
| * up. Later on, if we reenter this method (e.g., another CHECK_FOR_INTERRUPTS() |
| * during cleanup), we can observe that the cleanup already started from this runaway |
| * event, and therefore we skip duplicate cleanup |
| */ |
| if (RunawayCleaner_ShouldStartRunawayCleanup()) |
| { |
| Assert(beginCleanupRunawayVersion < *latestRunawayVersion); |
| Assert(endCleanupRunawayVersion < *latestRunawayVersion); |
| |
| /* We don't want to clean up multiple times for same runaway event */ |
| beginCleanupRunawayVersion = *latestRunawayVersion; |
| |
| if (RunawayCleaner_ShouldCancelQuery()) |
| { |
| SIMPLE_FAULT_INJECTOR("runaway_cleanup"); |
| |
| ereport(ERROR, (errmsg("Canceling query because of high VMEM usage. Used: %dMB, available %dMB, red zone: %dMB", |
| VmemTracker_ConvertVmemChunksToMB(MySessionState->sessionVmem), VmemTracker_GetAvailableVmemMB(), |
| RedZoneHandler_GetRedZoneLimitMB()), errprintstack(true))); |
| } |
| |
| /* |
| * If we cannot error out because of a critical section or because we are a super user |
| * or for some other reason (such as the QE is not running any valid command, i.e., |
| * gp_command_count is not positive) simply declare this process as clean |
| */ |
| RunawayCleaner_RunawayCleanupDoneForProcess(true /* ignoredCleanup */); |
| } |
| } |
| |
| /* |
| * Resets the runaway flag and enables runaway detector. |
| * |
| * Note: this method should not need any additional locks. |
| * Either the MySessionState entry is being released, and |
| * we already have a lock on SessionState, and therefore, |
| * no new runaway detector can run until the lock is released. |
| * |
| * Alternatively, we may reset this while still in a live |
| * session. In such case, our runaway event versioning should |
| * ensure that every process of this session would do another round |
| * of cleanup if it is detected as a runaway session again. |
| */ |
| void |
| RunawayCleaner_RunawayCleanupDoneForSession() |
| { |
| Assert(NULL != MySessionState); |
| if (MySessionState->runawayStatus != RunawayStatus_NotRunaway) |
| { |
| /* The last runaway cleanup should have finished */ |
| Assert(endCleanupRunawayVersion == beginCleanupRunawayVersion); |
| Assert(endCleanupRunawayVersion == *latestRunawayVersion); |
| Assert(CLEANUP_COUNTDOWN_BEFORE_RUNAWAY == MySessionState->cleanupCountdown); |
| |
| MySessionState->runawayStatus = RunawayStatus_NotRunaway; |
| MySessionState->sessionVmemRunaway = 0; |
| MySessionState->commandCountRunaway = 0; |
| |
| /* |
| * Reset the exclusive runaway detector flag so that |
| * another runaway detector can be chosen |
| */ |
| *isRunawayDetector = 0; |
| } |
| } |
| |
| /* |
| * Marks the current process as clean. If all the processes are marked |
| * as clean for this session (i.e., cleanupCountdown == 0 in the |
| * MySessionState) then we reset session's runaway status as well as |
| * the runaway detector flag (i.e., a new runaway detector can run). |
| * |
| * Parameters: |
| * ignoredCleanup: whether the cleanup was ignored, i.e., no elog(ERROR, ...) |
| * was thrown. In such case a deactivated process is not reactivated as the |
| * deactivation didn't get interrupted. |
| */ |
| void |
| RunawayCleaner_RunawayCleanupDoneForProcess(bool ignoredCleanup) |
| { |
| /* |
| * We don't do anything if we don't have an ongoing cleanup, or we already finished |
| * cleanup once for the current runaway event |
| */ |
| if (beginCleanupRunawayVersion != *latestRunawayVersion || |
| endCleanupRunawayVersion == beginCleanupRunawayVersion) |
| { |
| /* Either we never started cleanup, or we already finished */ |
| return; |
| } |
| |
| /* Disable repeating call */ |
| endCleanupRunawayVersion = beginCleanupRunawayVersion; |
| |
| Assert(NULL != MySessionState); |
| /* |
| * As the current cleanup holds leverage on the cleanupCountdown, |
| * the session must stay as runaway at least until the current |
| * process marks itself clean |
| */ |
| Assert(MySessionState->runawayStatus != RunawayStatus_NotRunaway); |
| |
| /* We only cleanup if we were active when the runaway event happened */ |
| Assert((!isProcessActive && *latestRunawayVersion < deactivationVersion && |
| *latestRunawayVersion > activationVersion) || |
| (*latestRunawayVersion > activationVersion && |
| (activationVersion >= deactivationVersion && isProcessActive))); |
| |
| /* |
| * We don't reactivate if the process is already active or a deactivated |
| * process never errored out during deactivation (i.e., failed to complete |
| * deactivation) |
| */ |
| if (!isProcessActive && !ignoredCleanup) |
| { |
| Assert(1 == *isRunawayDetector); |
| Assert(0 < MySessionState->cleanupCountdown); |
| /* |
| * As the process threw ERROR instead of going into ReadCommand() blocking |
| * state, we have to reactivate the process from its current Deactivated |
| * state |
| */ |
| IdleTracker_ActivateProcess(); |
| } |
| |
| Assert(0 < MySessionState->cleanupCountdown); |
| #if USE_ASSERT_CHECKING |
| int cleanProgress = |
| #endif |
| pg_atomic_add_fetch_u32((pg_atomic_uint32 *)&MySessionState->cleanupCountdown, -1); |
| Assert(0 <= cleanProgress); |
| |
| uint32 expected = 0; |
| bool finalCleaner = pg_atomic_compare_exchange_u32((pg_atomic_uint32 *) &MySessionState->cleanupCountdown, |
| &expected, CLEANUP_COUNTDOWN_BEFORE_RUNAWAY); |
| |
| if (finalCleaner) |
| { |
| /* |
| * The final cleaner is responsible to reset the runaway flag, |
| * and enable the runaway detection process. |
| */ |
| RunawayCleaner_RunawayCleanupDoneForSession(); |
| } |
| |
| /* |
| * Finally we are done with all critical cleanup, which includes releasing all our memory and |
| * releasing our cleanup counter so that another session can be marked as runaway, if needed. |
| * Now, we have some head room to actually record our usage. |
| */ |
| write_stderr("Logging memory usage because of runaway cleanup. Note, this is a post-cleanup logging and may be incomplete."); |
| MemoryContextStats(TopMemoryContext); |
| } |
| |
| /* |
| * Returns true if a cleanup is in progress (i.e., endCleanupRunawayVersion |
| * is smaller than beginCleanupRunawayVersion). |
| */ |
| bool |
| RunawayCleaner_IsCleanupInProgress() |
| { |
| Assert(endCleanupRunawayVersion <= beginCleanupRunawayVersion); |
| return endCleanupRunawayVersion < beginCleanupRunawayVersion; |
| } |