blob: 60cf0ad5a21a7a7dba665bfde6f6bcdbec67863f [file] [log] [blame]
/*!
* Copyright (c) 2015 by Contributors
* \file deconvolution.cu
* \brief
* \author Wei Wu
*/
#include "./deconvolution-inl.h"
#if MXNET_USE_CUDNN == 1
#include "./cudnn_deconvolution-inl.h"
#endif // MXNET_USE_CUDNN
namespace mxnet {
namespace op {
template<>
Operator* CreateOp<gpu>(DeconvolutionParam param, int dtype,
std::vector<TShape> *in_shape,
std::vector<TShape> *out_shape,
Context ctx) {
// Logic here parallels that in Convolution.cu
Operator *op = NULL;
// If 1D deconvolution, use MXNet implementation
if (param.kernel.ndim() == 1) {
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
op = new DeconvolutionOp<gpu, DType>(param);
})
return op;
}
#if MXNET_USE_CUDNN == 1
// The NVIDIA Pascal architecture was the first to include 16-bit ALUs.
// Thus, when the framework is compiled with MSHADOW_USE_PASCAL == 1, we
// perform the deconvolution calculation in 16-bit when the tensor type is
// also 16-bit. For NVIDIA architectures earlier than Pascal (so Maxwell
// and Kepler), the computation precision is always at least 32-bits.
#if MSHADOW_USE_PASCAL == 1
// true fp16
int desired_forward_compute_type = dtype;
int desired_backward_compute_type = dtype;
#else
// pseudo fp16
int desired_forward_compute_type =
(dtype == mshadow::kFloat16) ? mshadow::kFloat32 : dtype;
int desired_backward_compute_type =
(dtype == mshadow::kFloat16) ? mshadow::kFloat32 : dtype;
#endif // MSHADOW_USE_PASCAL == 1
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
if (param.cudnn_off) {
op = new DeconvolutionOp<gpu, DType>(param);
} else {
int forward_compute_type = desired_forward_compute_type;
int backward_compute_type = desired_backward_compute_type;
bool deconvolutionIsSupported = CuDNNDeconvolutionOp<DType>::Supports(param,
forward_compute_type,
backward_compute_type);
// If cuDNN can't handle this case with fp16 backprop kernels, try fp32 backprop.
if (!deconvolutionIsSupported && backward_compute_type == mshadow::kFloat16) {
backward_compute_type = mshadow::kFloat32;
deconvolutionIsSupported = CuDNNDeconvolutionOp<DType>::Supports(param,
forward_compute_type,
backward_compute_type);
}
// If cuDNN can't handle this case with fp16 forward kernels, try fp32
if (!deconvolutionIsSupported && forward_compute_type == mshadow::kFloat16) {
forward_compute_type = mshadow::kFloat32;
deconvolutionIsSupported = CuDNNDeconvolutionOp<DType>::Supports(param,
forward_compute_type,
backward_compute_type);
}
if (!deconvolutionIsSupported) {
LOG(WARNING) <<
"This deconvolution is not supported by cudnn, MXNET deconvolution is applied.";
op = new DeconvolutionOp<gpu, DType>(param);
} else {
if ((forward_compute_type != desired_forward_compute_type) ||
(backward_compute_type != desired_backward_compute_type)) {
LOG(WARNING) <<
"True fp16 deconvolution by cudnn not supported in this configuration. " <<
"Falling back to pseudo fp16.";
}
op = new CuDNNDeconvolutionOp<DType>(param,
forward_compute_type,
backward_compute_type,
*in_shape, *out_shape, ctx);
}
}
})
#else
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
op = new DeconvolutionOp<gpu, DType>(param);
})
#endif // MXNET_USE_CUDNN
return op;
}
} // namespace op
} // namespace mxnet