src/operator/cudnn_bilinear_sampler-inl.h - mxnet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * Copyright (c) 2016 by Contributors
  * \file cudnn_bilinear_sampler-inl.h
  * \brief
  * \author Xu Dong
 */
 #ifndef MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_
 #define MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_

 #include <algorithm>
 #include <vector>
 #include "./bilinear_sampler-inl.h"
 namespace mxnet {
 namespace op {
 #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
 template<typename DType>
 class CuDNNBilinearSamplerOp : public Operator {
  public:
   explicit CuDNNBilinearSamplerOp(BilinearSamplerParam param) {
     this->param_ = param;
     init_cudnn_ = false;
     dtype_ = mshadow::DataType<DType>::kCudnnFlag;
     sampler_ = CUDNN_SAMPLER_BILINEAR;
   }

   ~CuDNNBilinearSamplerOp() {
     if (init_cudnn_) {
       CUDNN_CALL(cudnnDestroySpatialTransformerDescriptor(st_desc_));
       CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc_));
       CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc_));
     }
   }

   virtual void Forward(const OpContext &ctx,
                        const std::vector<TBlob> &in_data,
                        const std::vector<OpReqType> &req,
                        const std::vector<TBlob> &out_data,
                        const std::vector<TBlob> &aux_args) {
     using namespace mshadow;
     CHECK_EQ(req[bs::kOut], kWriteTo);
     CHECK_EQ(in_data.size(), 2U);
     CHECK_EQ(out_data.size(), 2U);
     Stream<gpu> *s = ctx.get_stream<gpu>();

     Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
     Tensor<gpu, 4, DType> grid = in_data[bs::kGrid].get<gpu, 4, DType>(s);
     Tensor<gpu, 4, DType> grid_tmp = out_data[bs::kTmp].get<gpu, 4, DType>(s);
     Tensor<gpu, 4, DType> out = out_data[bs::kOut].get<gpu, 4, DType>(s);
     // grid_tmp : (batch, h, w, 2)
     grid_tmp = transpose(grid, Shape4(0, 2, 3, 1));
     if (!init_cudnn_) {
      Init(s, in_data, out_data);
     }
     CHECK_EQ(data.CheckContiguous(), true);
     CHECK_EQ(out.CheckContiguous(), true);
     CHECK_EQ(grid_tmp.CheckContiguous(), true);
     typename DataType<DType>::ScaleType alpha = 1.0f;
     typename DataType<DType>::ScaleType beta = 0.0f;
     CUDNN_CALL(cudnnSpatialTfSamplerForward(s->dnn_handle_,
                                             st_desc_,
                                             &alpha,
                                             in_desc_,
                                             data.dptr_,
                                             grid_tmp.dptr_,
                                             &beta,
                                             out_desc_,
                                             out.dptr_));
   }

   virtual void Backward(const OpContext &ctx,
                         const std::vector<TBlob> &out_grad,
                         const std::vector<TBlob> &in_data,
                         const std::vector<TBlob> &out_data,
                         const std::vector<OpReqType> &req,
                         const std::vector<TBlob> &in_grad,
                         const std::vector<TBlob> &aux_args) {
     using namespace mshadow;
     CHECK_NE(req[bs::kData], kWriteInplace);
     CHECK_NE(req[bs::kGrid], kWriteInplace);
     CHECK_EQ(in_data.size(), 2U);
     CHECK_EQ(out_data.size(), 2U);
     CHECK_EQ(out_grad.size(), 1U);
     Stream<gpu> *s = ctx.get_stream<gpu>();
     Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
     Tensor<gpu, 4, DType> grid_tmp = out_data[bs::kTmp].get<gpu, 4, DType>(s);
     Tensor<gpu, 4, DType> gdata = in_grad[bs::kData].get<gpu, 4, DType>(s);
     Tensor<gpu, 4, DType> ggrid = in_grad[bs::kGrid].get<gpu, 4, DType>(s);
     Tensor<gpu, 4, DType> grad = out_grad[bs::kOut].get<gpu, 4, DType>(s);

     typename DataType<DType>::ScaleType alpha = (req[bs::kData] == kNullOp) ? 0.0f : 1.0f;
     typename DataType<DType>::ScaleType beta = (req[bs::kData] == kAddTo) ? 1.0f : 0.0f;
     typename DataType<DType>::ScaleType alpha_dgrid = 1.0f;
     typename DataType<DType>::ScaleType beta_dgrid = 0.0f;
     CUDNN_CALL(cudnnSpatialTfSamplerBackward(s->dnn_handle_,
                                              st_desc_,
                                              &alpha,
                                              in_desc_,
                                              data.dptr_,
                                              &beta,
                                              in_desc_/*reuse in_desc_*/,
                                              gdata.dptr_/*output*/,
                                              &alpha_dgrid,
                                              out_desc_/*reuse out_desc_*/,
                                              grad.dptr_,
                                              grid_tmp.dptr_,
                                              &beta_dgrid,
                                              grid_tmp.dptr_));
     Assign(ggrid, req[bs::kGrid], transpose(grid_tmp, Shape4(0, 3, 1, 2)));
   }

  private:
   inline void Init(mshadow::Stream<gpu> *s,
                    const std::vector<TBlob> &in_data,
                    const std::vector<TBlob> &out_data) {
     using namespace mshadow;
     #if CUDNN_MAJOR >= 5
     format_ = CUDNN_TENSOR_NCHW;
     #endif
     CHECK_EQ(in_data.size(), 2U);
     CHECK_EQ(out_data.size(), 2U);
     if (!init_cudnn_) {
       init_cudnn_ = true;
       Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
       Tensor<gpu, 4, DType> out = out_data[bs::kOut].get<gpu, 4, DType>(s);
       CUDNN_CALL(cudnnCreateSpatialTransformerDescriptor(&st_desc_));
       CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc_));
       CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc_));
       CUDNN_CALL(cudnnSetTensor4dDescriptor(in_desc_,
                                             format_,
                                             dtype_,
                                             data.size(0),
                                             data.size(1),
                                             data.size(2),
                                             data.size(3)));
       CUDNN_CALL(cudnnSetTensor4dDescriptor(out_desc_,
                                             format_,
                                             dtype_,
                                             out.size(0),
                                             out.size(1),
                                             out.size(2),
                                             out.size(3)));
       int dim[] = {static_cast<int>(out.size(0)), static_cast<int>(out.size(1)),
                    static_cast<int>(out.size(2)), static_cast<int>(out.size(3))};
       CUDNN_CALL(cudnnSetSpatialTransformerNdDescriptor(st_desc_,
                                                         sampler_,
                                                         dtype_,
                                                         4,
                                                         dim));
     }
   }

   bool init_cudnn_;
   cudnnDataType_t dtype_;
   cudnnSpatialTransformerDescriptor_t st_desc_;
   cudnnTensorDescriptor_t in_desc_;
   cudnnTensorDescriptor_t out_desc_;
   cudnnSamplerType_t sampler_;
   #if CUDNN_MAJOR >= 5
   cudnnTensorFormat_t format_;
   #endif
   BilinearSamplerParam param_;
 };
 #endif  // __CUDACC__ && CUDNN
 }  // namespace op
 }  // namespace mxnet

 #endif  // MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* Copyright (c) 2016 by Contributors
	* \file cudnn_bilinear_sampler-inl.h
	* \brief
	* \author Xu Dong
	*/
	#ifndef MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_
	#define MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_

	#include <algorithm>
	#include <vector>
	#include "./bilinear_sampler-inl.h"
	namespace mxnet {
	namespace op {
	#if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
	template<typename DType>
	class CuDNNBilinearSamplerOp : public Operator {
	public:
	explicit CuDNNBilinearSamplerOp(BilinearSamplerParam param) {
	this->param_ = param;
	init_cudnn_ = false;
	dtype_ = mshadow::DataType<DType>::kCudnnFlag;
	sampler_ = CUDNN_SAMPLER_BILINEAR;
	}

	~CuDNNBilinearSamplerOp() {
	if (init_cudnn_) {
	CUDNN_CALL(cudnnDestroySpatialTransformerDescriptor(st_desc_));
	CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc_));
	CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc_));
	}
	}

	virtual void Forward(const OpContext &ctx,
	const std::vector<TBlob> &in_data,
	const std::vector<OpReqType> &req,
	const std::vector<TBlob> &out_data,
	const std::vector<TBlob> &aux_args) {
	using namespace mshadow;
	CHECK_EQ(req[bs::kOut], kWriteTo);
	CHECK_EQ(in_data.size(), 2U);
	CHECK_EQ(out_data.size(), 2U);
	Stream<gpu> *s = ctx.get_stream<gpu>();

	Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
	Tensor<gpu, 4, DType> grid = in_data[bs::kGrid].get<gpu, 4, DType>(s);
	Tensor<gpu, 4, DType> grid_tmp = out_data[bs::kTmp].get<gpu, 4, DType>(s);
	Tensor<gpu, 4, DType> out = out_data[bs::kOut].get<gpu, 4, DType>(s);
	// grid_tmp : (batch, h, w, 2)
	grid_tmp = transpose(grid, Shape4(0, 2, 3, 1));
	if (!init_cudnn_) {
	Init(s, in_data, out_data);
	}
	CHECK_EQ(data.CheckContiguous(), true);
	CHECK_EQ(out.CheckContiguous(), true);
	CHECK_EQ(grid_tmp.CheckContiguous(), true);
	typename DataType<DType>::ScaleType alpha = 1.0f;
	typename DataType<DType>::ScaleType beta = 0.0f;
	CUDNN_CALL(cudnnSpatialTfSamplerForward(s->dnn_handle_,
	st_desc_,
	&alpha,
	in_desc_,
	data.dptr_,
	grid_tmp.dptr_,
	&beta,
	out_desc_,
	out.dptr_));
	}

	virtual void Backward(const OpContext &ctx,
	const std::vector<TBlob> &out_grad,
	const std::vector<TBlob> &in_data,
	const std::vector<TBlob> &out_data,
	const std::vector<OpReqType> &req,
	const std::vector<TBlob> &in_grad,
	const std::vector<TBlob> &aux_args) {
	using namespace mshadow;
	CHECK_NE(req[bs::kData], kWriteInplace);
	CHECK_NE(req[bs::kGrid], kWriteInplace);
	CHECK_EQ(in_data.size(), 2U);
	CHECK_EQ(out_data.size(), 2U);
	CHECK_EQ(out_grad.size(), 1U);
	Stream<gpu> *s = ctx.get_stream<gpu>();
	Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
	Tensor<gpu, 4, DType> grid_tmp = out_data[bs::kTmp].get<gpu, 4, DType>(s);
	Tensor<gpu, 4, DType> gdata = in_grad[bs::kData].get<gpu, 4, DType>(s);
	Tensor<gpu, 4, DType> ggrid = in_grad[bs::kGrid].get<gpu, 4, DType>(s);
	Tensor<gpu, 4, DType> grad = out_grad[bs::kOut].get<gpu, 4, DType>(s);

	typename DataType<DType>::ScaleType alpha = (req[bs::kData] == kNullOp) ? 0.0f : 1.0f;
	typename DataType<DType>::ScaleType beta = (req[bs::kData] == kAddTo) ? 1.0f : 0.0f;
	typename DataType<DType>::ScaleType alpha_dgrid = 1.0f;
	typename DataType<DType>::ScaleType beta_dgrid = 0.0f;
	CUDNN_CALL(cudnnSpatialTfSamplerBackward(s->dnn_handle_,
	st_desc_,
	&alpha,
	in_desc_,
	data.dptr_,
	&beta,
	in_desc_/reuse in_desc_/,
	gdata.dptr_/output/,
	&alpha_dgrid,
	out_desc_/reuse out_desc_/,
	grad.dptr_,
	grid_tmp.dptr_,
	&beta_dgrid,
	grid_tmp.dptr_));
	Assign(ggrid, req[bs::kGrid], transpose(grid_tmp, Shape4(0, 3, 1, 2)));
	}

	private:
	inline void Init(mshadow::Stream<gpu> *s,
	const std::vector<TBlob> &in_data,
	const std::vector<TBlob> &out_data) {
	using namespace mshadow;
	#if CUDNN_MAJOR >= 5
	format_ = CUDNN_TENSOR_NCHW;
	#endif
	CHECK_EQ(in_data.size(), 2U);
	CHECK_EQ(out_data.size(), 2U);
	if (!init_cudnn_) {
	init_cudnn_ = true;
	Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
	Tensor<gpu, 4, DType> out = out_data[bs::kOut].get<gpu, 4, DType>(s);
	CUDNN_CALL(cudnnCreateSpatialTransformerDescriptor(&st_desc_));
	CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc_));
	CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc_));
	CUDNN_CALL(cudnnSetTensor4dDescriptor(in_desc_,
	format_,
	dtype_,
	data.size(0),
	data.size(1),
	data.size(2),
	data.size(3)));
	CUDNN_CALL(cudnnSetTensor4dDescriptor(out_desc_,
	format_,
	dtype_,
	out.size(0),
	out.size(1),
	out.size(2),
	out.size(3)));
	int dim[] = {static_cast<int>(out.size(0)), static_cast<int>(out.size(1)),
	static_cast<int>(out.size(2)), static_cast<int>(out.size(3))};
	CUDNN_CALL(cudnnSetSpatialTransformerNdDescriptor(st_desc_,
	sampler_,
	dtype_,
	4,
	dim));
	}
	}

	bool init_cudnn_;
	cudnnDataType_t dtype_;
	cudnnSpatialTransformerDescriptor_t st_desc_;
	cudnnTensorDescriptor_t in_desc_;
	cudnnTensorDescriptor_t out_desc_;
	cudnnSamplerType_t sampler_;
	#if CUDNN_MAJOR >= 5
	cudnnTensorFormat_t format_;
	#endif
	BilinearSamplerParam param_;
	};
	#endif // __CUDACC__ && CUDNN
	} // namespace op
	} // namespace mxnet

	#endif // MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_