src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixCuDNNPoolingDescriptors.java - systemds - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.sysds.runtime.matrix.data;

 import static jcuda.jcudnn.JCudnn.cudnnCreatePoolingDescriptor;
 import static jcuda.jcudnn.JCudnn.cudnnCreateTensorDescriptor;
 import static jcuda.jcudnn.JCudnn.cudnnDestroyTensorDescriptor;
 import static jcuda.jcudnn.JCudnn.cudnnSetPooling2dDescriptor;
 import static jcuda.jcudnn.JCudnn.cudnnSetTensor4dDescriptor;
 import static jcuda.jcudnn.cudnnNanPropagation.CUDNN_PROPAGATE_NAN;
 import static jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_MAX;
 import static jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
 import static jcuda.jcudnn.cudnnTensorFormat.CUDNN_TENSOR_NCHW;

 import org.apache.sysds.runtime.instructions.gpu.context.GPUContext;
 import org.apache.sysds.runtime.matrix.data.LibMatrixDNN.PoolingType;

 import jcuda.jcudnn.cudnnPoolingDescriptor;
 import jcuda.jcudnn.cudnnTensorDescriptor;

 /**
  * This class is a wrapper that contain necessary data structures to invoke
  * a cudnn convolution* functions (such as cudnnConvolutionForward, etc)
  *
  * It implements autocloseable to simplify the LibMatrixCuDNN code and also avoids potential memory leaks.
  */
 public class LibMatrixCuDNNPoolingDescriptors implements java.lang.AutoCloseable {

 	public cudnnTensorDescriptor xDesc;
 	public cudnnTensorDescriptor yDesc;
 	public cudnnTensorDescriptor dxDesc;
 	public cudnnTensorDescriptor dyDesc;
 	public cudnnPoolingDescriptor poolingDesc;

 	@Override
 	public void close() {
 		if(xDesc != null)
 			cudnnDestroyTensorDescriptor(xDesc);
 		if(yDesc != null)
 			cudnnDestroyTensorDescriptor(yDesc);
 		if(dxDesc != null)
 			cudnnDestroyTensorDescriptor(dxDesc);
 		if(dyDesc != null)
 			cudnnDestroyTensorDescriptor(dyDesc);
 		if(poolingDesc != null)
 			jcuda.jcudnn.JCudnn.cudnnDestroyPoolingDescriptor(poolingDesc);
 	}

 	/**
 	 * Get descriptors for maxpooling backward operation
 	 *
 	 * @param gCtx gpu context
 	 * @param instName instruction name
 	 * @param N				batch size
 	 * @param C				number of channels
 	 * @param H				height of image
 	 * @param W				width of image
 	 * @param K				number of filters
 	 * @param R				height of filter
 	 * @param S				width of filter
 	 * @param pad_h			vertical padding
 	 * @param pad_w			horizontal padding
 	 * @param stride_h		horizontal stride
 	 * @param stride_w		vertical stride
 	 * @param P				(H - R + 1 + 2*pad_h)/stride_h
 	 * @param Q				(W - S + 1 + 2*pad_w)/stride_w
 	 * @param poolingType	type of pooling
 	 * @return decriptor wrapper
 	 */
 	public static LibMatrixCuDNNPoolingDescriptors cudnnPoolingBackwardDescriptors(GPUContext gCtx,
 			String instName, int N, int C, int H, int W, int K, int R,
 			int S, int pad_h, int pad_w, int stride_h, int stride_w, int P,
 			int Q, PoolingType poolingType) {
 		LibMatrixCuDNNPoolingDescriptors ret = new LibMatrixCuDNNPoolingDescriptors();
 		ret.xDesc = allocateTensorDescriptor(N, C, H, W);
 		ret.yDesc = allocateTensorDescriptor(N, C, P, Q);
 		ret.dxDesc = allocateTensorDescriptor(N, C, H, W);
 		ret.dyDesc = allocateTensorDescriptor(N, C, P, Q);
 		ret.poolingDesc = allocatePoolingDescriptor(R, S, pad_h, pad_w, stride_h, stride_w, poolingType);
 		return ret;
 	}

 	/**
 	 * Get descriptors for maxpooling operation
 	 *
 	 * @param gCtx gpu context
 	 * @param instName instruction name
 	 * @param N				batch size
 	 * @param C				number of channels
 	 * @param H				height of image
 	 * @param W				width of image
 	 * @param K				number of filters
 	 * @param R				height of filter
 	 * @param S				width of filter
 	 * @param pad_h			vertical padding
 	 * @param pad_w			horizontal padding
 	 * @param stride_h		horizontal stride
 	 * @param stride_w		vertical stride
 	 * @param P				(H - R + 1 + 2*pad_h)/stride_h
 	 * @param Q				(W - S + 1 + 2*pad_w)/stride_w
 	 * @param poolingType 	type of pooling
 	 * @return decriptor wrapper
 	 */
 	public static LibMatrixCuDNNPoolingDescriptors cudnnPoolingDescriptors(GPUContext gCtx,
 			String instName, int N, int C, int H, int W, int K, int R,
 			int S, int pad_h, int pad_w, int stride_h, int stride_w, int P,
 			int Q, PoolingType poolingType) {
 		LibMatrixCuDNNPoolingDescriptors ret = new LibMatrixCuDNNPoolingDescriptors();
 		ret.xDesc = allocateTensorDescriptor(N, C, H, W);
 		ret.yDesc = allocateTensorDescriptor(N, C, P, Q);
 		ret.poolingDesc = allocatePoolingDescriptor(R, S, pad_h, pad_w, stride_h, stride_w, poolingType);
 		return ret;
 	}

 	/**
 	 * Convenience method to get tensor descriptor
 	 * @param N number of images
 	 * @param C number of channels
 	 * @param H height
 	 * @param W width
 	 * @return cudnn tensor descriptor
 	 */
 	private static cudnnTensorDescriptor allocateTensorDescriptor(int N, int C, int H, int W) {
 		cudnnTensorDescriptor tensorDescriptor = new cudnnTensorDescriptor();
 		cudnnCreateTensorDescriptor(tensorDescriptor);
 		cudnnSetTensor4dDescriptor(tensorDescriptor, CUDNN_TENSOR_NCHW, LibMatrixCUDA.CUDNN_DATA_TYPE, N, C, H, W);
 		return tensorDescriptor;
 	}

 	/**
 	 * allocates pooling descriptor, used in poolingForward and poolingBackward
 	 * @param R			pooling window height
 	 * @param S			pooling window width
 	 * @param pad_h		vertical padding
 	 * @param pad_w		horizontal padding
 	 * @param stride_h	pooling vertical stride
 	 * @param stride_w	pooling horizontal stride
 	 * @param poolingType type of pooling
 	 * @return cudnn pooling descriptor
 	 */
 	private static cudnnPoolingDescriptor allocatePoolingDescriptor(int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, PoolingType poolingType) {
 		cudnnPoolingDescriptor poolingDesc = new cudnnPoolingDescriptor();
 		cudnnCreatePoolingDescriptor(poolingDesc);
 		int CUDNN_POOLING = (poolingType == PoolingType.MAX) ? CUDNN_POOLING_MAX : CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
 		cudnnSetPooling2dDescriptor(poolingDesc, CUDNN_POOLING, CUDNN_PROPAGATE_NAN, R, S, pad_h, pad_w, stride_h, stride_w);
 		return poolingDesc;
 	}
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.sysds.runtime.matrix.data;

	import static jcuda.jcudnn.JCudnn.cudnnCreatePoolingDescriptor;
	import static jcuda.jcudnn.JCudnn.cudnnCreateTensorDescriptor;
	import static jcuda.jcudnn.JCudnn.cudnnDestroyTensorDescriptor;
	import static jcuda.jcudnn.JCudnn.cudnnSetPooling2dDescriptor;
	import static jcuda.jcudnn.JCudnn.cudnnSetTensor4dDescriptor;
	import static jcuda.jcudnn.cudnnNanPropagation.CUDNN_PROPAGATE_NAN;
	import static jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_MAX;
	import static jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
	import static jcuda.jcudnn.cudnnTensorFormat.CUDNN_TENSOR_NCHW;

	import org.apache.sysds.runtime.instructions.gpu.context.GPUContext;
	import org.apache.sysds.runtime.matrix.data.LibMatrixDNN.PoolingType;

	import jcuda.jcudnn.cudnnPoolingDescriptor;
	import jcuda.jcudnn.cudnnTensorDescriptor;

	/**
	* This class is a wrapper that contain necessary data structures to invoke
	* a cudnn convolution* functions (such as cudnnConvolutionForward, etc)
	*
	* It implements autocloseable to simplify the LibMatrixCuDNN code and also avoids potential memory leaks.
	*/
	public class LibMatrixCuDNNPoolingDescriptors implements java.lang.AutoCloseable {

	public cudnnTensorDescriptor xDesc;
	public cudnnTensorDescriptor yDesc;
	public cudnnTensorDescriptor dxDesc;
	public cudnnTensorDescriptor dyDesc;
	public cudnnPoolingDescriptor poolingDesc;

	@Override
	public void close() {
	if(xDesc != null)
	cudnnDestroyTensorDescriptor(xDesc);
	if(yDesc != null)
	cudnnDestroyTensorDescriptor(yDesc);
	if(dxDesc != null)
	cudnnDestroyTensorDescriptor(dxDesc);
	if(dyDesc != null)
	cudnnDestroyTensorDescriptor(dyDesc);
	if(poolingDesc != null)
	jcuda.jcudnn.JCudnn.cudnnDestroyPoolingDescriptor(poolingDesc);
	}

	/**
	* Get descriptors for maxpooling backward operation
	*
	* @param gCtx gpu context
	* @param instName instruction name
	* @param N batch size
	* @param C number of channels
	* @param H height of image
	* @param W width of image
	* @param K number of filters
	* @param R height of filter
	* @param S width of filter
	* @param pad_h vertical padding
	* @param pad_w horizontal padding
	* @param stride_h horizontal stride
	* @param stride_w vertical stride
	* @param P (H - R + 1 + 2*pad_h)/stride_h
	* @param Q (W - S + 1 + 2*pad_w)/stride_w
	* @param poolingType type of pooling
	* @return decriptor wrapper
	*/
	public static LibMatrixCuDNNPoolingDescriptors cudnnPoolingBackwardDescriptors(GPUContext gCtx,
	String instName, int N, int C, int H, int W, int K, int R,
	int S, int pad_h, int pad_w, int stride_h, int stride_w, int P,
	int Q, PoolingType poolingType) {
	LibMatrixCuDNNPoolingDescriptors ret = new LibMatrixCuDNNPoolingDescriptors();
	ret.xDesc = allocateTensorDescriptor(N, C, H, W);
	ret.yDesc = allocateTensorDescriptor(N, C, P, Q);
	ret.dxDesc = allocateTensorDescriptor(N, C, H, W);
	ret.dyDesc = allocateTensorDescriptor(N, C, P, Q);
	ret.poolingDesc = allocatePoolingDescriptor(R, S, pad_h, pad_w, stride_h, stride_w, poolingType);
	return ret;
	}

	/**
	* Get descriptors for maxpooling operation
	*
	* @param gCtx gpu context
	* @param instName instruction name
	* @param N batch size
	* @param C number of channels
	* @param H height of image
	* @param W width of image
	* @param K number of filters
	* @param R height of filter
	* @param S width of filter
	* @param pad_h vertical padding
	* @param pad_w horizontal padding
	* @param stride_h horizontal stride
	* @param stride_w vertical stride
	* @param P (H - R + 1 + 2*pad_h)/stride_h
	* @param Q (W - S + 1 + 2*pad_w)/stride_w
	* @param poolingType type of pooling
	* @return decriptor wrapper
	*/
	public static LibMatrixCuDNNPoolingDescriptors cudnnPoolingDescriptors(GPUContext gCtx,
	String instName, int N, int C, int H, int W, int K, int R,
	int S, int pad_h, int pad_w, int stride_h, int stride_w, int P,
	int Q, PoolingType poolingType) {
	LibMatrixCuDNNPoolingDescriptors ret = new LibMatrixCuDNNPoolingDescriptors();
	ret.xDesc = allocateTensorDescriptor(N, C, H, W);
	ret.yDesc = allocateTensorDescriptor(N, C, P, Q);
	ret.poolingDesc = allocatePoolingDescriptor(R, S, pad_h, pad_w, stride_h, stride_w, poolingType);
	return ret;
	}

	/**
	* Convenience method to get tensor descriptor
	* @param N number of images
	* @param C number of channels
	* @param H height
	* @param W width
	* @return cudnn tensor descriptor
	*/
	private static cudnnTensorDescriptor allocateTensorDescriptor(int N, int C, int H, int W) {
	cudnnTensorDescriptor tensorDescriptor = new cudnnTensorDescriptor();
	cudnnCreateTensorDescriptor(tensorDescriptor);
	cudnnSetTensor4dDescriptor(tensorDescriptor, CUDNN_TENSOR_NCHW, LibMatrixCUDA.CUDNN_DATA_TYPE, N, C, H, W);
	return tensorDescriptor;
	}

	/**
	* allocates pooling descriptor, used in poolingForward and poolingBackward
	* @param R pooling window height
	* @param S pooling window width
	* @param pad_h vertical padding
	* @param pad_w horizontal padding
	* @param stride_h pooling vertical stride
	* @param stride_w pooling horizontal stride
	* @param poolingType type of pooling
	* @return cudnn pooling descriptor
	*/
	private static cudnnPoolingDescriptor allocatePoolingDescriptor(int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, PoolingType poolingType) {
	cudnnPoolingDescriptor poolingDesc = new cudnnPoolingDescriptor();
	cudnnCreatePoolingDescriptor(poolingDesc);
	int CUDNN_POOLING = (poolingType == PoolingType.MAX) ? CUDNN_POOLING_MAX : CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
	cudnnSetPooling2dDescriptor(poolingDesc, CUDNN_POOLING, CUDNN_PROPAGATE_NAN, R, S, pad_h, pad_w, stride_h, stride_w);
	return poolingDesc;
	}
	}