add some logging
diff --git a/processing/src/main/java/io/druid/query/aggregation/gpu/AbstractFloatKernelAggregator.java b/processing/src/main/java/io/druid/query/aggregation/gpu/AbstractFloatKernelAggregator.java
index 5723cdf..52f3ec0 100644
--- a/processing/src/main/java/io/druid/query/aggregation/gpu/AbstractFloatKernelAggregator.java
+++ b/processing/src/main/java/io/druid/query/aggregation/gpu/AbstractFloatKernelAggregator.java
@@ -41,6 +41,7 @@
protected final CLBuffer<Float> totalBuffer;
protected List<CLEvent> copyEvents;
private int totalBufferOffset = 0;
+ protected long t = 0;
public AbstractFloatKernelAggregator(
CLQueue queue,
@@ -64,10 +65,13 @@
CLBuffer<Float> buf = context.createFloatBuffer(CLMem.Usage.Input, Pointer.pointerToFloats(currentBuffer));
int bufRemaining = currentBuffer.remaining();
+
+ long t0 = System.nanoTime();
CLEvent copyEvent = buf.copyTo(queue, 0, bufRemaining, totalBuffer, totalBufferOffset);
- totalBufferOffset += bufRemaining;
- copyEvents.add(copyEvent);
copyEvent.waitFor();
+ t += System.nanoTime() - t0;
+ copyEvents.add(copyEvent);
+ totalBufferOffset += bufRemaining;
}
@Override
diff --git a/processing/src/main/java/io/druid/query/aggregation/gpu/FloatKernelAggregator.java b/processing/src/main/java/io/druid/query/aggregation/gpu/FloatKernelAggregator.java
index a8b98df..bbaf21b 100644
--- a/processing/src/main/java/io/druid/query/aggregation/gpu/FloatKernelAggregator.java
+++ b/processing/src/main/java/io/druid/query/aggregation/gpu/FloatKernelAggregator.java
@@ -19,6 +19,7 @@
package io.druid.query.aggregation.gpu;
+import com.metamx.common.logger.Logger;
import com.nativelibs4java.opencl.CLBuffer;
import com.nativelibs4java.opencl.CLContext;
import com.nativelibs4java.opencl.CLEvent;
@@ -37,6 +38,8 @@
private final CLProgram program;
private final CLKernel kernel;
+ private static final Logger log = new Logger(FloatKernelAggregator.class);
+
public FloatKernelAggregator(
FloatBufferSelector selector,
CLContext context,
@@ -53,6 +56,7 @@
@Override
public void run(IntBuffer buckets, ByteBuffer out, int position)
{
+ long t0 = System.nanoTime();
final int nBuckets = buckets.remaining() / 2;
final int n = (int)totalBuffer.getElementCount();
@@ -66,6 +70,9 @@
final Pointer<Float> outPtr = Pointer.pointerToFloats(out.asFloatBuffer());
CLEvent readEvt = kernelOut.read(queue, outPtr, false, addEvt);
readEvt.waitFor();
+
+ log.debug("Memory copy took: %d ns for %d bytes", this.t, totalBuffer.getByteCount());
+ log.debug("Computation took: %d ns", System.nanoTime() - t0);
}
@Override