blob: d12f4dde227a75f351c152677a45dd0ecefbf236 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.utils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.LongAdder;
import org.apache.sysml.api.DMLScript;
/**
* Measures performance numbers when GPU mode is enabled
* Printed as part of {@link Statistics}.
*/
public class GPUStatistics {
private static int iNoOfExecutedGPUInst = 0;
public static long cudaInitTime = 0;
public static long cudaLibrariesInitTime = 0;
public static LongAdder cudaSparseToDenseTime = new LongAdder(); // time spent in converting sparse matrix block to dense
public static LongAdder cudaDenseToSparseTime = new LongAdder(); // time spent in converting dense matrix block to sparse
public static LongAdder cudaSparseConversionTime = new LongAdder(); // time spent in converting between sparse block types
public static LongAdder cudaSparseToDenseCount = new LongAdder();
public static LongAdder cudaDenseToSparseCount = new LongAdder();
public static LongAdder cudaSparseConversionCount = new LongAdder();
public static LongAdder cudaAllocTime = new LongAdder(); // time spent in allocating memory on the GPU
public static LongAdder cudaDeAllocTime = new LongAdder(); // time spent in deallocating memory on the GPU
public static LongAdder cudaMemSet0Time = new LongAdder(); // time spent in setting memory to 0 on the GPU (part of reusing and for new allocates)
public static LongAdder cudaToDevTime = new LongAdder(); // time spent in copying data from host (CPU) to device (GPU) memory
public static LongAdder cudaFromDevTime = new LongAdder(); // time spent in copying data from device to host
public static LongAdder cudaEvictTime = new LongAdder(); // time spent in eviction
public static LongAdder cudaForcedClearLazyFreedEvictTime = new LongAdder(); // time spent in forced lazy eviction
public static LongAdder cudaForcedClearUnpinnedEvictTime = new LongAdder(); // time spent in forced unpinned eviction
public static LongAdder cudaAllocCount = new LongAdder();
public static LongAdder cudaDeAllocCount = new LongAdder();
public static LongAdder cudaMemSet0Count = new LongAdder();
public static LongAdder cudaToDevCount = new LongAdder();
public static LongAdder cudaFromDevCount = new LongAdder();
public static LongAdder cudaEvictionCount = new LongAdder();
// Per instruction miscellaneous timers.
// Used to record events in a CP Heavy Hitter instruction and
// provide a breakdown of how time was spent in that instruction
private static HashMap<String, HashMap<String, Long>> _cpInstMiscTime = new HashMap<> ();
private static HashMap<String, HashMap<String, Long>> _cpInstMiscCount = new HashMap<> ();
/**
* Resets the miscellaneous timers & counters
*/
public static void resetMiscTimers(){
_cpInstMiscTime.clear();
_cpInstMiscCount.clear();
}
/**
* Resets all the cuda counters and timers, including the misc timers & counters
*/
public static void reset(){
cudaInitTime = 0;
cudaLibrariesInitTime = 0;
cudaAllocTime.reset();
cudaDeAllocTime.reset();
cudaMemSet0Time.reset();
cudaMemSet0Count.reset();
cudaToDevTime.reset();
cudaFromDevTime.reset();
cudaEvictTime.reset();
cudaForcedClearLazyFreedEvictTime.reset();
cudaForcedClearUnpinnedEvictTime.reset();
cudaAllocCount.reset();
cudaDeAllocCount.reset();
cudaToDevCount.reset();
cudaFromDevCount.reset();
cudaEvictionCount.reset();
resetMiscTimers();
}
public static synchronized void setNoOfExecutedGPUInst(int numJobs) {
iNoOfExecutedGPUInst = numJobs;
}
public static synchronized void incrementNoOfExecutedGPUInst() {
iNoOfExecutedGPUInst ++;
}
public static synchronized int getNoOfExecutedGPUInst() {
return iNoOfExecutedGPUInst;
}
/**
* "Maintains" or adds time to miscellaneous timers per instruction/op, also increments associated count
* @param instructionName name of the instruction/op
* @param miscTimer name of the miscellaneous timer
* @param timeNanos time in nano seconds
* @param incrementCount how much to increment the count of the miscTimer by
*/
public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos, long incrementCount)
{
if (!(DMLScript.FINEGRAINED_STATISTICS))
return;
HashMap<String, Long> miscTimesMap = _cpInstMiscTime.get(instructionName);
if (miscTimesMap == null) {
miscTimesMap = new HashMap<>();
_cpInstMiscTime.put(instructionName, miscTimesMap);
}
Long oldVal = miscTimesMap.get(miscTimer);
Long newVal = timeNanos + ((oldVal!=null) ? oldVal : 0);
miscTimesMap.put(miscTimer, newVal);
HashMap<String, Long> miscCountMap = _cpInstMiscCount.get(instructionName);
if (miscCountMap == null){
miscCountMap = new HashMap<>();
_cpInstMiscCount.put(instructionName, miscCountMap);
}
Long oldCnt = miscCountMap.get(miscTimer);
Long newCnt = incrementCount + ((oldCnt!=null) ? oldCnt : 0);
miscCountMap.put(miscTimer, newCnt);
}
/**
* "Maintains" or adds time to miscellaneous timers per instruction/op, also increments associated count by 1
* @param instructionName name of the instruction/op
* @param miscTimer name of the miscellaneous timer
* @param timeNanos time in nano seconds
*/
public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos){
maintainCPMiscTimes(instructionName, miscTimer, timeNanos, 1);
}
/**
* Used to print misc timers (and their counts) for a given instruction/op
* @param instructionName name of the instruction/op
* @return a formatted string of misc timers for a given instruction/op
*/
public static String getStringForCPMiscTimesPerInstruction(String instructionName) {
StringBuffer sb = new StringBuffer();
HashMap<String, Long> miscTimerMap = _cpInstMiscTime.get(instructionName);
if (miscTimerMap != null) {
List<Map.Entry<String, Long>> sortedList = new ArrayList<>(miscTimerMap.entrySet());
// Sort the times to display by the most expensive first
Collections.sort(sortedList, new Comparator<Map.Entry<String, Long>>() {
@Override
public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) {
return (int) (o1.getValue() - o2.getValue());
}
});
Iterator<Map.Entry<String, Long>> miscTimeIter = sortedList.iterator();
HashMap<String, Long> miscCountMap = _cpInstMiscCount.get(instructionName);
while (miscTimeIter.hasNext()) {
Map.Entry<String, Long> e = miscTimeIter.next();
String miscTimerName = e.getKey();
Long miscTimerTime = e.getValue();
Long miscCount = miscCountMap.get(miscTimerName);
sb.append(miscTimerName + "[" + String.format("%.3f", (double) miscTimerTime / 1000000000.0) + "s," + miscCount + "]");
if (miscTimeIter.hasNext())
sb.append(", ");
}
}
return sb.toString();
}
/**
* Used to print out cuda timers & counters
* @return a formatted string of cuda timers & counters
*/
public static String getStringForCudaTimers() {
StringBuffer sb = new StringBuffer();
sb.append("CUDA/CuLibraries init time:\t" + String.format("%.3f", cudaInitTime*1e-9) + "/"
+ String.format("%.3f", cudaLibrariesInitTime*1e-9) + " sec.\n");
sb.append("Number of executed GPU inst:\t" + getNoOfExecutedGPUInst() + ".\n");
sb.append("GPU mem tx time (alloc/dealloc/set0/toDev/fromDev/evict):\t"
+ String.format("%.3f", cudaAllocTime.longValue()*1e-9) + "/"
+ String.format("%.3f", cudaDeAllocTime.longValue()*1e-9) + "/"
+ String.format("%.3f", cudaMemSet0Time.longValue()*1e-9) + "/"
+ String.format("%.3f", cudaToDevTime.longValue()*1e-9) + "/"
+ String.format("%.3f", cudaFromDevTime.longValue()*1e-9) + "/"
+ String.format("%.3f", cudaEvictTime.longValue()*1e-9) + " sec.\n");
sb.append("GPU mem tx count (alloc/dealloc/set0/toDev/fromDev/evict):\t"
+ cudaAllocCount.longValue() + "/"
+ cudaDeAllocCount.longValue() + "/"
+ cudaMemSet0Count.longValue() + "/"
+ cudaSparseConversionCount.longValue() + "/"
+ cudaToDevCount.longValue() + "/"
+ cudaFromDevCount.longValue() + "/"
+ cudaEvictionCount.longValue() + ".\n");
sb.append("GPU conversion time (sparseConv/sp2dense/dense2sp):\t"
+ String.format("%.3f", cudaSparseConversionTime.longValue()*1e-9) + "/"
+ String.format("%.3f", cudaSparseToDenseTime.longValue()*1e-9) + "/"
+ String.format("%.3f", cudaDenseToSparseTime.longValue()*1e-9) + " sec.\n");
sb.append("GPU conversion count (sparseConv/sp2dense/dense2sp):\t"
+ cudaSparseConversionCount.longValue() + "/"
+ cudaSparseToDenseCount.longValue() + "/"
+ cudaDenseToSparseCount.longValue() + ".\n");
return sb.toString();
}
}