src/operator/cudnn_activation-inl.h - mxnet-test - Git at Google

 /*!
  * Copyright (c) 2015 by Contributors
  * \file cudnn_activation-inl.h
  * \brief
  * \author Bing Xu
 */

 #ifndef MXNET_OPERATOR_CUDNN_ACTIVATION_INL_H_
 #define MXNET_OPERATOR_CUDNN_ACTIVATION_INL_H_
 #include <algorithm>
 #include <vector>
 #include "./activation-inl.h"

 namespace mxnet {
 namespace op {
 template<typename DType>
 class CuDNNActivationOp : public Operator {
  public:
   explicit CuDNNActivationOp(ActivationParam param) {
     param_ = param;
     init_cudnn_ = false;
     dtype_ = mshadow::DataType<DType>::kCudnnFlag;
     switch (param_.act_type) {
       case activation::kReLU:
         mode_ = CUDNN_ACTIVATION_RELU;
         break;
       case activation::kSigmoid:
         mode_ = CUDNN_ACTIVATION_SIGMOID;
         break;
       case activation::kTanh:
         mode_ = CUDNN_ACTIVATION_TANH;
         break;
       default:
         LOG(FATAL) << "Not implmented";
         break;
     }
     #if CUDNN_MAJOR >= 5
     nan_prop_ = CUDNN_NOT_PROPAGATE_NAN;
     CUDNN_CALL(cudnnCreateActivationDescriptor(&desc_));
     CUDNN_CALL(cudnnSetActivationDescriptor(desc_, mode_, nan_prop_, relu_ceil_));
     #endif
   }

   ~CuDNNActivationOp() {
     if (init_cudnn_) {
       CUDNN_CALL(cudnnDestroyTensorDescriptor(shape_desc_));
       #if CUDNN_MAJOR >= 5
       CUDNN_CALL(cudnnDestroyActivationDescriptor(desc_));
       #endif
     }
   }

   virtual void Forward(const OpContext &ctx,
                        const std::vector<TBlob> &in_data,
                        const std::vector<OpReqType> &req,
                        const std::vector<TBlob> &out_data,
                        const std::vector<TBlob> &aux_args) {
     using namespace mshadow;
     using namespace mshadow::expr;
     CHECK_EQ(in_data.size(), 1U);
     CHECK_EQ(out_data.size(), 1U);
     Stream<gpu> *s = ctx.get_stream<gpu>();
     Tensor<gpu, 4, DType> data;
     Tensor<gpu, 4, DType> out;
     if (in_data[activation::kData].ndim() == 2) {
       Shape<4> dshape = Shape4(in_data[activation::kData].shape_[0],
                                in_data[activation::kData].shape_[1], 1, 1);
       data = in_data[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
       out = out_data[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
     } else {
       Shape<4> dshape;
       index_t size_left = in_data[activation::kData].Size();
       for (int i = 0; i < 3; ++i) {
         if (i < in_data[activation::kData].ndim()) {
           dshape[i] = in_data[activation::kData].shape_[i];
         } else {
           dshape[i] = 1;
         }
         size_left /= dshape[i];
       }
       dshape[3] = size_left;
       data = in_data[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
       out = out_data[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
     }
     typename DataType<DType>::ScaleType alpha = 1.0f;
     typename DataType<DType>::ScaleType beta = 0.0f;
     CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream<gpu>::OwnHandle);
     if (!init_cudnn_) {
       init_cudnn_ = true;
       CUDNN_CALL(cudnnCreateTensorDescriptor(&shape_desc_));
       CUDNN_CALL(cudnnSetTensor4dDescriptor(shape_desc_,
                                             CUDNN_TENSOR_NCHW,
                                             dtype_,
                                             data.shape_[0],
                                             data.shape_[1],
                                             data.shape_[2],
                                             data.shape_[3]));
     }
     #if CUDNN_MAJOR <= 4
     CUDNN_CALL(cudnnActivationForward(s->dnn_handle_,
                                       mode_,
                                       &alpha,
                                       shape_desc_,
                                       data.dptr_,
                                       &beta,
                                       shape_desc_,
                                       out.dptr_));
     #elif CUDNN_MAJOR >= 5
     CUDNN_CALL(cudnnActivationForward(s->dnn_handle_,
                                      desc_,
                                      &alpha,
                                      shape_desc_,
                                      data.dptr_,
                                      &beta,
                                      shape_desc_,
                                      out.dptr_));
     #endif
   }

   virtual void Backward(const OpContext &ctx,
                         const std::vector<TBlob> &out_grad,
                         const std::vector<TBlob> &in_data,
                         const std::vector<TBlob> &out_data,
                         const std::vector<OpReqType> &req,
                         const std::vector<TBlob> &in_grad,
                         const std::vector<TBlob> &aux_args) {
     using namespace mshadow;
     using namespace mshadow::expr;
     CHECK_EQ(out_grad.size(), 1U);
     CHECK_EQ(in_data.size(), 1U);
     CHECK_EQ(out_data.size(), 1U);
     CHECK_EQ(req.size(), 1U);
     CHECK_EQ(in_grad.size(), 1U);
     typename DataType<DType>::ScaleType alpha = 1.0f;
     typename DataType<DType>::ScaleType beta = 0.0f;
     Stream<gpu> *s = ctx.get_stream<gpu>();
     Tensor<gpu, 4, DType> grad;
     Tensor<gpu, 4, DType> data;
     Tensor<gpu, 4, DType> output_data;
     Tensor<gpu, 4, DType> input_grad;
     if (in_grad[activation::kData].ndim() == 2) {
       Shape<4> dshape = Shape4(in_grad[activation::kData].shape_[0],
                                in_grad[activation::kData].shape_[1], 1, 1);
       data = in_data[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
       grad = out_grad[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
       output_data = out_data[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
       input_grad = in_grad[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
     } else {
       Shape<4> dshape;
       index_t size_left = in_grad[activation::kData].Size();
       for (int i = 0; i < 3; ++i) {
         if (i < in_grad[activation::kData].ndim()) {
           dshape[i] = in_grad[activation::kData].shape_[i];
         } else {
           dshape[i] = 1;
         }
         size_left /= dshape[i];
       }
       dshape[3] = size_left;
       data = in_data[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
       output_data = out_data[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
       grad = out_grad[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
       input_grad = in_grad[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
     }
     CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream<gpu>::OwnHandle);
     #if CUDNN_MAJOR <= 4
     CUDNN_CALL(cudnnActivationBackward(s->dnn_handle_,
                                        mode_,
                                        &alpha,
                                        shape_desc_,
                                        output_data.dptr_,
                                        shape_desc_,
                                        grad.dptr_,
                                        shape_desc_,
                                        data.dptr_,
                                        &beta,
                                        shape_desc_,
                                        input_grad.dptr_));
     #elif CUDNN_MAJOR >= 5
     CUDNN_CALL(cudnnActivationBackward(s->dnn_handle_,
                                        desc_,
                                        &alpha,
                                        shape_desc_,
                                        output_data.dptr_,
                                        shape_desc_,
                                        grad.dptr_,
                                        shape_desc_,
                                        data.dptr_,
                                        &beta,
                                        shape_desc_,
                                        input_grad.dptr_));
     #endif
   }

  private:
   bool init_cudnn_;
   cudnnDataType_t dtype_;
   cudnnActivationMode_t mode_;
   cudnnTensorDescriptor_t shape_desc_;
   ActivationParam param_;
 #if CUDNN_MAJOR >= 5
   cudnnActivationDescriptor_t desc_;
   cudnnNanPropagation_t nan_prop_;
   double relu_ceil_;
 #endif
 };  // class CuDNNActivationOp
 }  // namespace op
 }  // namespace mxnet
 #endif  // MXNET_OPERATOR_CUDNN_ACTIVATION_INL_H_
	/*!
	* Copyright (c) 2015 by Contributors
	* \file cudnn_activation-inl.h
	* \brief
	* \author Bing Xu
	*/

	#ifndef MXNET_OPERATOR_CUDNN_ACTIVATION_INL_H_
	#define MXNET_OPERATOR_CUDNN_ACTIVATION_INL_H_
	#include <algorithm>
	#include <vector>
	#include "./activation-inl.h"

	namespace mxnet {
	namespace op {
	template<typename DType>
	class CuDNNActivationOp : public Operator {
	public:
	explicit CuDNNActivationOp(ActivationParam param) {
	param_ = param;
	init_cudnn_ = false;
	dtype_ = mshadow::DataType<DType>::kCudnnFlag;
	switch (param_.act_type) {
	case activation::kReLU:
	mode_ = CUDNN_ACTIVATION_RELU;
	break;
	case activation::kSigmoid:
	mode_ = CUDNN_ACTIVATION_SIGMOID;
	break;
	case activation::kTanh:
	mode_ = CUDNN_ACTIVATION_TANH;
	break;
	default:
	LOG(FATAL) << "Not implmented";
	break;
	}
	#if CUDNN_MAJOR >= 5
	nan_prop_ = CUDNN_NOT_PROPAGATE_NAN;
	CUDNN_CALL(cudnnCreateActivationDescriptor(&desc_));
	CUDNN_CALL(cudnnSetActivationDescriptor(desc_, mode_, nan_prop_, relu_ceil_));
	#endif
	}

	~CuDNNActivationOp() {
	if (init_cudnn_) {
	CUDNN_CALL(cudnnDestroyTensorDescriptor(shape_desc_));
	#if CUDNN_MAJOR >= 5
	CUDNN_CALL(cudnnDestroyActivationDescriptor(desc_));
	#endif
	}
	}

	virtual void Forward(const OpContext &ctx,
	const std::vector<TBlob> &in_data,
	const std::vector<OpReqType> &req,
	const std::vector<TBlob> &out_data,
	const std::vector<TBlob> &aux_args) {
	using namespace mshadow;
	using namespace mshadow::expr;
	CHECK_EQ(in_data.size(), 1U);
	CHECK_EQ(out_data.size(), 1U);
	Stream<gpu> *s = ctx.get_stream<gpu>();
	Tensor<gpu, 4, DType> data;
	Tensor<gpu, 4, DType> out;
	if (in_data[activation::kData].ndim() == 2) {
	Shape<4> dshape = Shape4(in_data[activation::kData].shape_[0],
	in_data[activation::kData].shape_[1], 1, 1);
	data = in_data[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
	out = out_data[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
	} else {
	Shape<4> dshape;
	index_t size_left = in_data[activation::kData].Size();
	for (int i = 0; i < 3; ++i) {
	if (i < in_data[activation::kData].ndim()) {
	dshape[i] = in_data[activation::kData].shape_[i];
	} else {
	dshape[i] = 1;
	}
	size_left /= dshape[i];
	}
	dshape[3] = size_left;
	data = in_data[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
	out = out_data[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
	}
	typename DataType<DType>::ScaleType alpha = 1.0f;
	typename DataType<DType>::ScaleType beta = 0.0f;
	CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream<gpu>::OwnHandle);
	if (!init_cudnn_) {
	init_cudnn_ = true;
	CUDNN_CALL(cudnnCreateTensorDescriptor(&shape_desc_));
	CUDNN_CALL(cudnnSetTensor4dDescriptor(shape_desc_,
	CUDNN_TENSOR_NCHW,
	dtype_,
	data.shape_[0],
	data.shape_[1],
	data.shape_[2],
	data.shape_[3]));
	}
	#if CUDNN_MAJOR <= 4
	CUDNN_CALL(cudnnActivationForward(s->dnn_handle_,
	mode_,
	&alpha,
	shape_desc_,
	data.dptr_,
	&beta,
	shape_desc_,
	out.dptr_));
	#elif CUDNN_MAJOR >= 5
	CUDNN_CALL(cudnnActivationForward(s->dnn_handle_,
	desc_,
	&alpha,
	shape_desc_,
	data.dptr_,
	&beta,
	shape_desc_,
	out.dptr_));
	#endif
	}

	virtual void Backward(const OpContext &ctx,
	const std::vector<TBlob> &out_grad,
	const std::vector<TBlob> &in_data,
	const std::vector<TBlob> &out_data,
	const std::vector<OpReqType> &req,
	const std::vector<TBlob> &in_grad,
	const std::vector<TBlob> &aux_args) {
	using namespace mshadow;
	using namespace mshadow::expr;
	CHECK_EQ(out_grad.size(), 1U);
	CHECK_EQ(in_data.size(), 1U);
	CHECK_EQ(out_data.size(), 1U);
	CHECK_EQ(req.size(), 1U);
	CHECK_EQ(in_grad.size(), 1U);
	typename DataType<DType>::ScaleType alpha = 1.0f;
	typename DataType<DType>::ScaleType beta = 0.0f;
	Stream<gpu> *s = ctx.get_stream<gpu>();
	Tensor<gpu, 4, DType> grad;
	Tensor<gpu, 4, DType> data;
	Tensor<gpu, 4, DType> output_data;
	Tensor<gpu, 4, DType> input_grad;
	if (in_grad[activation::kData].ndim() == 2) {
	Shape<4> dshape = Shape4(in_grad[activation::kData].shape_[0],
	in_grad[activation::kData].shape_[1], 1, 1);
	data = in_data[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
	grad = out_grad[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
	output_data = out_data[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
	input_grad = in_grad[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
	} else {
	Shape<4> dshape;
	index_t size_left = in_grad[activation::kData].Size();
	for (int i = 0; i < 3; ++i) {
	if (i < in_grad[activation::kData].ndim()) {
	dshape[i] = in_grad[activation::kData].shape_[i];
	} else {
	dshape[i] = 1;
	}
	size_left /= dshape[i];
	}
	dshape[3] = size_left;
	data = in_data[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
	output_data = out_data[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
	grad = out_grad[activation::kOut].get_with_shape<gpu, 4, DType>(dshape, s);
	input_grad = in_grad[activation::kData].get_with_shape<gpu, 4, DType>(dshape, s);
	}
	CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream<gpu>::OwnHandle);
	#if CUDNN_MAJOR <= 4
	CUDNN_CALL(cudnnActivationBackward(s->dnn_handle_,
	mode_,
	&alpha,
	shape_desc_,
	output_data.dptr_,
	shape_desc_,
	grad.dptr_,
	shape_desc_,
	data.dptr_,
	&beta,
	shape_desc_,
	input_grad.dptr_));
	#elif CUDNN_MAJOR >= 5
	CUDNN_CALL(cudnnActivationBackward(s->dnn_handle_,
	desc_,
	&alpha,
	shape_desc_,
	output_data.dptr_,
	shape_desc_,
	grad.dptr_,
	shape_desc_,
	data.dptr_,
	&beta,
	shape_desc_,
	input_grad.dptr_));
	#endif
	}

	private:
	bool init_cudnn_;
	cudnnDataType_t dtype_;
	cudnnActivationMode_t mode_;
	cudnnTensorDescriptor_t shape_desc_;
	ActivationParam param_;
	#if CUDNN_MAJOR >= 5
	cudnnActivationDescriptor_t desc_;
	cudnnNanPropagation_t nan_prop_;
	double relu_ceil_;
	#endif
	}; // class CuDNNActivationOp
	} // namespace op
	} // namespace mxnet
	#endif // MXNET_OPERATOR_CUDNN_ACTIVATION_INL_H_