| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.sysds.runtime.instructions.gpu.context; |
| |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| import org.apache.sysds.api.DMLScript; |
| import org.apache.sysds.runtime.lineage.LineageCacheEntry; |
| |
| public class GPUMemoryEviction implements Runnable |
| { |
| int numEvicts; |
| |
| public GPUMemoryEviction(int num) { |
| numEvicts = num; |
| } |
| |
| public GPUMemoryEviction() { |
| numEvicts = 0; |
| } |
| |
| @SuppressWarnings("unused") |
| @Override |
| public void run() { |
| //long currentAvailableMemory = allocator.getAvailableMemory(); |
| List<LineageCacheEntry> lockedOrLiveEntries = new ArrayList<>(); |
| int count = 0; |
| |
| // Stop if 1) Evicted the request number of entries, 2) The parallel |
| // CPU instruction is ended, and 3) No non-live entries left in the cache. |
| long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0; |
| /*while (!LineageGPUCacheEviction.isGPUCacheEmpty()) |
| { |
| if (LineageCacheConfig.STOPBACKGROUNDEVICTION) |
| // This logic reduces #evictions if the cpu instructions is so small |
| // that it ends before the background thread reaches this condition. |
| // However, this check decreases race conditions. |
| break; |
| |
| if (numEvicts > 0 && count > numEvicts) |
| break; |
| |
| LineageCacheEntry le = LineageGPUCacheEviction.pollFirstEntry(); |
| GPUObject cachedGpuObj = le.getGPUObject(); |
| GPUObject headGpuObj = cachedGpuObj.lineageCachedChainHead != null |
| ? cachedGpuObj.lineageCachedChainHead : cachedGpuObj; |
| // Check and continue if any object in the linked list is locked |
| boolean lockedOrLive = false; |
| GPUObject nextgpuObj = headGpuObj; |
| while (nextgpuObj!= null) { |
| if (!nextgpuObj.isrmVarPending() || nextgpuObj.isLocked()) // live or locked |
| lockedOrLive = true; |
| nextgpuObj = nextgpuObj.nextLineageCachedEntry; |
| } |
| if (lockedOrLive) { |
| lockedOrLiveEntries.add(le); |
| continue; |
| } |
| |
| // TODO: First remove the gobj chains that don't contain any live and dirty objects. |
| //currentAvailableMemory += headGpuObj.getSizeOnDevice(); |
| |
| // Copy from device to host for all live and dirty objects |
| boolean copied = false; |
| nextgpuObj = headGpuObj; |
| while (nextgpuObj!= null) { |
| // Keeping isLinCached as True here will save data deletion by copyFromDeviceToHost |
| if (!nextgpuObj.isrmVarPending() && nextgpuObj.isDirty()) { //live and dirty |
| nextgpuObj.copyFromDeviceToHost(null, true, true); |
| copied = true; |
| } |
| nextgpuObj.setIsLinCached(false); |
| nextgpuObj = nextgpuObj.nextLineageCachedEntry; |
| } |
| |
| // Copy from device cache to CPU lineage cache if not already copied |
| LineageGPUCacheEviction.copyToHostCache(le, null, copied); |
| |
| // For all the other objects, remove and clear data (only once) |
| nextgpuObj = headGpuObj; |
| boolean freed = false; |
| synchronized (nextgpuObj.getGPUContext().getMemoryManager().getGPUMatrixMemoryManager().gpuObjects) { |
| while (nextgpuObj!= null) { |
| // If not live or live but not dirty |
| if (nextgpuObj.isrmVarPending() || !nextgpuObj.isDirty()) { |
| if (!freed) { |
| nextgpuObj.clearData(null, true); |
| //FIXME: adding to rmVar cache causes multiple failures due to concurrent |
| //access to the rmVar cache and other data structures. VariableCP instruction |
| //and other instruction free memory and add to rmVar cache in parallel to |
| //the background eviction task, which needs to be synchronized. |
| freed = true; |
| } |
| else |
| nextgpuObj.clearGPUObject(); |
| } |
| nextgpuObj = nextgpuObj.nextLineageCachedEntry; |
| } |
| } |
| // Clear the GPUOjects chain |
| GPUObject currgpuObj = headGpuObj; |
| while (currgpuObj.nextLineageCachedEntry != null) { |
| nextgpuObj = currgpuObj.nextLineageCachedEntry; |
| currgpuObj.lineageCachedChainHead = null; |
| currgpuObj.nextLineageCachedEntry = null; |
| nextgpuObj.lineageCachedChainHead = null; |
| currgpuObj = nextgpuObj; |
| } |
| |
| //if(currentAvailableMemory >= size) |
| // This doesn't guarantee allocation due to fragmented freed memory |
| // A = cudaMallocNoWarn(tmpA, size, null); |
| if (DMLScript.STATISTICS) { |
| LineageCacheStatistics.incrementGpuAsyncEvicts(); |
| } |
| count++; |
| } |
| |
| // Add the locked entries back to the eviction queue |
| if (!lockedOrLiveEntries.isEmpty()) |
| LineageGPUCacheEviction.addEntryList(lockedOrLiveEntries); |
| |
| if (DMLScript.STATISTICS) //TODO: dedicated statistics for lineage |
| GPUStatistics.cudaEvictTime.add(System.nanoTime() - t0); */ |
| } |
| } |