src/operator/nn/deconvolution.cu - mxnet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * Copyright (c) 2015 by Contributors
  * \file deconvolution.cu
  * \brief
  * \author Wei Wu, Da Zheng
 */

 #include "./deconvolution-inl.h"
 #if MXNET_USE_CUDNN == 1
 #include "./cudnn/cudnn_deconvolution-inl.h"
 #endif  // MXNET_USE_CUDNN

 namespace mxnet {
 namespace op {

 #if MXNET_USE_CUDNN == 1
 template<typename DType>
 static CuDNNDeconvolutionOp<DType> &GetCuDNNDeconvOp(const DeconvolutionParam& param,
                                                      int forward_compute_type,
                                                      int backward_compute_type,
                                                      const mxnet::ShapeVector& in_shape,
                                                      const mxnet::ShapeVector& out_shape,
                                                      const RunContext& rctx,
                                                      bool add_to_weight) {
 #if DMLC_CXX11_THREAD_LOCAL
   static thread_local std::unordered_map<DeconvSignature,
                                          std::shared_ptr<CuDNNDeconvolutionOp<DType> >,
                                          OpHash> ops;
 #else
   static MX_THREAD_LOCAL std::unordered_map<DeconvSignature,
                                             std::shared_ptr<CuDNNDeconvolutionOp<DType> >,
                                             OpHash> ops;
 #endif
   DeconvSignature key(param);
   size_t ndim = 0;
   for (auto &s : in_shape)
     ndim += s.ndim();
   for (auto &s : out_shape)
     ndim += s.ndim();
   key.Reserve(1 /* for forward_compute_type */ +
               1 /* for backward_compute_type */ +
               ndim /* for in and out shapes */ +
               1 /* for dev_id */ +
               1 /* for add_to_weight */);

   key.AddSign(forward_compute_type);
   key.AddSign(backward_compute_type);
   key.AddSign(in_shape);
   key.AddSign(out_shape);
   key.AddSign(rctx.ctx.dev_id);
   key.AddSign(add_to_weight ? 1 : 0);

   auto it = ops.find(key);
   if (it == ops.end()) {
     std::shared_ptr<CuDNNDeconvolutionOp<DType>> op(new CuDNNDeconvolutionOp<DType>());
     auto ins_ret = ops.insert(
             std::pair<DeconvSignature, std::shared_ptr<CuDNNDeconvolutionOp<DType>>>(key, op));
     CHECK(ins_ret.second);
     it = ins_ret.first;
     it->second->Init(param, forward_compute_type, backward_compute_type, in_shape,
                      out_shape, rctx, add_to_weight);
   }
   return *it->second;
 }
 #endif

 template<>
 void DeconvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
                                const OpContext& ctx,
                                const std::vector<TBlob>& inputs,
                                const std::vector<OpReqType>& req,
                                const std::vector<TBlob>& outputs) {
   const DeconvolutionParam& param = nnvm::get<DeconvolutionParam>(attrs.parsed);
   int dtype = inputs[0].type_flag_;

 #if MXNET_USE_CUDNN == 1
   // On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
   int compute_type = (dtype == mshadow::kFloat16) ? mshadow::kFloat32 : dtype;

   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     if (param.cudnn_off) {
       DeconvolutionOp<gpu, DType> op;
       op.Init(param);
       op.Forward(ctx, inputs, req, outputs);
     } else if (!CuDNNDeconvolutionOp<DType>::Supports(param,
           compute_type, compute_type, ctx.run_ctx.ctx.dev_id)) {
       LOG(WARNING) <<
         "This deconvolution is not supported by cudnn, MXNET deconvolution is applied.";
       DeconvolutionOp<gpu, DType> op;
       op.Init(param);
       op.Forward(ctx, inputs, req, outputs);
     } else {
       mxnet::ShapeVector in_shape(inputs.size());
       mxnet::ShapeVector out_shape(1, outputs[0].shape_);
       for (size_t i = 0; i < in_shape.size(); i++) {
         in_shape[i] = inputs[i].shape_;
       }
       // req[deconv::kWeight] is only set for backward, so assume the typical 'write' for now.
       auto add_to_weight = false;
       GetCuDNNDeconvOp<DType>(param, compute_type, compute_type,
           in_shape, out_shape, ctx.run_ctx, add_to_weight).Forward(ctx, inputs, req, outputs);
     }
   })
 #else
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     DeconvolutionOp<gpu, DType> op;
     op.Init(param);
     op.Forward(ctx, inputs, req, outputs);
   })
 #endif  // MXNET_USE_CUDNN
 }

 template<>
 void DeconvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
                                    const OpContext& ctx,
                                    const std::vector<TBlob>& inputs,
                                    const std::vector<OpReqType>& req,
                                    const std::vector<TBlob>& outputs) {
   const DeconvolutionParam& param = nnvm::get<DeconvolutionParam>(attrs.parsed);
   std::vector<TBlob> in_data(inputs.begin() + 1, inputs.end());
   const TBlob &out_grad = inputs[0];
   const std::vector<TBlob> &in_grad = outputs;
   int dtype = out_grad.type_flag_;

 #if MXNET_USE_CUDNN == 1
   // On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
   int compute_type = (dtype == mshadow::kFloat16) ? mshadow::kFloat32 : dtype;

   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     if (param.cudnn_off) {
       DeconvolutionOp<gpu, DType> op;
       op.Init(param);
       op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
     } else if (!CuDNNDeconvolutionOp<DType>::Supports(param,
           compute_type, compute_type, ctx.run_ctx.ctx.dev_id)) {
       LOG(WARNING) <<
         "This deconvolution is not supported by cudnn, MXNET deconvolution is applied.";
       DeconvolutionOp<gpu, DType> op;
       op.Init(param);
       op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
     } else {
       mxnet::ShapeVector in_shape(in_data.size());
       mxnet::ShapeVector out_shape(1, out_grad.shape_);
       for (size_t i = 0; i < in_shape.size(); i++) {
         in_shape[i] = in_data[i].shape_;
       }
       auto add_to_weight = req[deconv::kWeight] == kAddTo;
       GetCuDNNDeconvOp<DType>(param, compute_type, compute_type,
           in_shape, out_shape, ctx.run_ctx, add_to_weight).Backward(ctx,
             std::vector<TBlob>{out_grad}, in_data, req, in_grad);
     }
   })
 #else
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     DeconvolutionOp<gpu, DType> op;
     op.Init(param);
     op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
   })
 #endif  // MXNET_USE_CUDNN
 }

 NNVM_REGISTER_OP(Deconvolution)
 .set_attr<FCompute>("FCompute<gpu>", DeconvolutionCompute<gpu>);

 NNVM_REGISTER_OP(_backward_Deconvolution)
 .set_attr<FCompute>("FCompute<gpu>", DeconvolutionGradCompute<gpu>);

 }  // namespace op
 }  // namespace mxnet
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* Copyright (c) 2015 by Contributors
	* \file deconvolution.cu
	* \brief
	* \author Wei Wu, Da Zheng
	*/

	#include "./deconvolution-inl.h"
	#if MXNET_USE_CUDNN == 1
	#include "./cudnn/cudnn_deconvolution-inl.h"
	#endif // MXNET_USE_CUDNN

	namespace mxnet {
	namespace op {

	#if MXNET_USE_CUDNN == 1
	template<typename DType>
	static CuDNNDeconvolutionOp<DType> &GetCuDNNDeconvOp(const DeconvolutionParam& param,
	int forward_compute_type,
	int backward_compute_type,
	const mxnet::ShapeVector& in_shape,
	const mxnet::ShapeVector& out_shape,
	const RunContext& rctx,
	bool add_to_weight) {
	#if DMLC_CXX11_THREAD_LOCAL
	static thread_local std::unordered_map<DeconvSignature,
	std::shared_ptr<CuDNNDeconvolutionOp<DType> >,
	OpHash> ops;
	#else
	static MX_THREAD_LOCAL std::unordered_map<DeconvSignature,
	std::shared_ptr<CuDNNDeconvolutionOp<DType> >,
	OpHash> ops;
	#endif
	DeconvSignature key(param);
	size_t ndim = 0;
	for (auto &s : in_shape)
	ndim += s.ndim();
	for (auto &s : out_shape)
	ndim += s.ndim();
	key.Reserve(1 /* for forward_compute_type */ +
	1 /* for backward_compute_type */ +
	ndim /* for in and out shapes */ +
	1 /* for dev_id */ +
	1 /* for add_to_weight */);

	key.AddSign(forward_compute_type);
	key.AddSign(backward_compute_type);
	key.AddSign(in_shape);
	key.AddSign(out_shape);
	key.AddSign(rctx.ctx.dev_id);
	key.AddSign(add_to_weight ? 1 : 0);

	auto it = ops.find(key);
	if (it == ops.end()) {
	std::shared_ptr<CuDNNDeconvolutionOp<DType>> op(new CuDNNDeconvolutionOp<DType>());
	auto ins_ret = ops.insert(
	std::pair<DeconvSignature, std::shared_ptr<CuDNNDeconvolutionOp<DType>>>(key, op));
	CHECK(ins_ret.second);
	it = ins_ret.first;
	it->second->Init(param, forward_compute_type, backward_compute_type, in_shape,
	out_shape, rctx, add_to_weight);
	}
	return *it->second;
	}
	#endif

	template<>
	void DeconvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
	const OpContext& ctx,
	const std::vector<TBlob>& inputs,
	const std::vector<OpReqType>& req,
	const std::vector<TBlob>& outputs) {
	const DeconvolutionParam& param = nnvm::get<DeconvolutionParam>(attrs.parsed);
	int dtype = inputs[0].type_flag_;

	#if MXNET_USE_CUDNN == 1
	// On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
	int compute_type = (dtype == mshadow::kFloat16) ? mshadow::kFloat32 : dtype;

	MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
	if (param.cudnn_off) {
	DeconvolutionOp<gpu, DType> op;
	op.Init(param);
	op.Forward(ctx, inputs, req, outputs);
	} else if (!CuDNNDeconvolutionOp<DType>::Supports(param,
	compute_type, compute_type, ctx.run_ctx.ctx.dev_id)) {
	LOG(WARNING) <<
	"This deconvolution is not supported by cudnn, MXNET deconvolution is applied.";
	DeconvolutionOp<gpu, DType> op;
	op.Init(param);
	op.Forward(ctx, inputs, req, outputs);
	} else {
	mxnet::ShapeVector in_shape(inputs.size());
	mxnet::ShapeVector out_shape(1, outputs[0].shape_);
	for (size_t i = 0; i < in_shape.size(); i++) {
	in_shape[i] = inputs[i].shape_;
	}
	// req[deconv::kWeight] is only set for backward, so assume the typical 'write' for now.
	auto add_to_weight = false;
	GetCuDNNDeconvOp<DType>(param, compute_type, compute_type,
	in_shape, out_shape, ctx.run_ctx, add_to_weight).Forward(ctx, inputs, req, outputs);
	}
	})
	#else
	MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
	DeconvolutionOp<gpu, DType> op;
	op.Init(param);
	op.Forward(ctx, inputs, req, outputs);
	})
	#endif // MXNET_USE_CUDNN
	}

	template<>
	void DeconvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
	const OpContext& ctx,
	const std::vector<TBlob>& inputs,
	const std::vector<OpReqType>& req,
	const std::vector<TBlob>& outputs) {
	const DeconvolutionParam& param = nnvm::get<DeconvolutionParam>(attrs.parsed);
	std::vector<TBlob> in_data(inputs.begin() + 1, inputs.end());
	const TBlob &out_grad = inputs[0];
	const std::vector<TBlob> &in_grad = outputs;
	int dtype = out_grad.type_flag_;

	#if MXNET_USE_CUDNN == 1
	// On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
	int compute_type = (dtype == mshadow::kFloat16) ? mshadow::kFloat32 : dtype;

	MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
	if (param.cudnn_off) {
	DeconvolutionOp<gpu, DType> op;
	op.Init(param);
	op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
	} else if (!CuDNNDeconvolutionOp<DType>::Supports(param,
	compute_type, compute_type, ctx.run_ctx.ctx.dev_id)) {
	LOG(WARNING) <<
	"This deconvolution is not supported by cudnn, MXNET deconvolution is applied.";
	DeconvolutionOp<gpu, DType> op;
	op.Init(param);
	op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
	} else {
	mxnet::ShapeVector in_shape(in_data.size());
	mxnet::ShapeVector out_shape(1, out_grad.shape_);
	for (size_t i = 0; i < in_shape.size(); i++) {
	in_shape[i] = in_data[i].shape_;
	}
	auto add_to_weight = req[deconv::kWeight] == kAddTo;
	GetCuDNNDeconvOp<DType>(param, compute_type, compute_type,
	in_shape, out_shape, ctx.run_ctx, add_to_weight).Backward(ctx,
	std::vector<TBlob>{out_grad}, in_data, req, in_grad);
	}
	})
	#else
	MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
	DeconvolutionOp<gpu, DType> op;
	op.Init(param);
	op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
	})
	#endif // MXNET_USE_CUDNN
	}

	NNVM_REGISTER_OP(Deconvolution)
	.set_attr<FCompute>("FCompute<gpu>", DeconvolutionCompute<gpu>);

	NNVM_REGISTER_OP(_backward_Deconvolution)
	.set_attr<FCompute>("FCompute<gpu>", DeconvolutionGradCompute<gpu>);

	} // namespace op
	} // namespace mxnet