| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * Copyright (c) 2017 by Contributors |
| * \file convolution.cu |
| * \brief |
| * \author Bing Xu, Jun Wu, Da Zheng |
| */ |
| |
| #include "./convolution-inl.h" |
| #include <vector> |
| #include "./depthwise_convolution-inl.h" |
| #if MXNET_USE_CUDNN == 1 |
| #include "./cudnn/cudnn_convolution-inl.h" |
| #endif // MXNET_USE_CUDNN |
| |
| namespace mxnet { |
| namespace op { |
| |
| #if MXNET_USE_CUDNN == 1 |
| template<typename DType> |
| static CuDNNConvolutionOp<DType> &GetCuDNNConvOp(const ConvolutionParam& param, |
| int forward_compute_type, int backward_compute_type, |
| const std::vector<TShape>& in_shape, const std::vector<TShape>& out_shape, |
| const Context& ctx) { |
| #if DMLC_CXX11_THREAD_LOCAL |
| static thread_local CuDNNConvolutionOp<DType> op; |
| #else |
| static MX_THREAD_LOCAL CuDNNConvolutionOp<DType> op; |
| #endif |
| op.Init(param, forward_compute_type, backward_compute_type, |
| in_shape, out_shape, ctx); |
| return op; |
| } |
| #endif |
| |
| template<> |
| void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| const ConvolutionParam& param = nnvm::get<ConvolutionParam>(attrs.parsed); |
| int dtype = inputs[conv::kData].type_flag_; |
| |
| // If 1D convolution, use MXNet implementation |
| if (param.kernel.ndim() == 1) { |
| MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { |
| ConvolutionOp<gpu, DType> op; |
| op.Init(param); |
| op.Forward(ctx, inputs, req, outputs); |
| }) |
| return; |
| } else if (param.num_filter == param.num_group && |
| param.layout.value() == mshadow::kNCHW && |
| param.num_filter == inputs[conv::kData].shape_[1] && |
| param.kernel.ndim() == 2 && |
| param.dilate == mshadow::Shape2(1, 1) && |
| dtype == mshadow::kFloat32) { |
| std::vector<TShape> in_shape(inputs.size()); |
| std::vector<TShape> out_shape(1, outputs[0].shape_); |
| for (size_t i = 0; i < in_shape.size(); i++) |
| in_shape[i] = inputs[i].shape_; |
| DepthwiseConvolutionOp<float> op; |
| op.Init(param, in_shape, out_shape); |
| op.Forward(ctx, inputs, req, outputs); |
| return; |
| } |
| |
| #if MXNET_USE_CUDNN == 1 |
| // On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16). |
| int compute_type = (dtype == mshadow::kFloat16) ? mshadow::kFloat32 : dtype; |
| |
| MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { |
| if (param.cudnn_off) { |
| ConvolutionOp<gpu, DType> op; |
| op.Init(param); |
| op.Forward(ctx, inputs, req, outputs); |
| } else if (!CuDNNConvolutionOp<DType>::Supports(param, |
| compute_type, compute_type, ctx.run_ctx.ctx)) { |
| LOG(WARNING) << "This convolution is not supported by cudnn, MXNET convolution is applied."; |
| ConvolutionOp<gpu, DType> op; |
| op.Init(param); |
| op.Forward(ctx, inputs, req, outputs); |
| } else { |
| std::vector<TShape> in_shape(inputs.size()); |
| std::vector<TShape> out_shape(1, outputs[0].shape_); |
| for (size_t i = 0; i < in_shape.size(); i++) |
| in_shape[i] = inputs[i].shape_; |
| CuDNNConvolutionOp<DType> &op = GetCuDNNConvOp<DType>(param, |
| compute_type, compute_type, in_shape, out_shape, ctx.run_ctx.ctx); |
| op.Forward(ctx, inputs, req, outputs); |
| } |
| }) |
| #else |
| MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { |
| ConvolutionOp<gpu, DType> op; |
| op.Init(param); |
| op.Forward(ctx, inputs, req, outputs); |
| }) |
| #endif // MXNET_USE_CUDNN |
| } |
| |
| template<> |
| void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| const ConvolutionParam& param = nnvm::get<ConvolutionParam>(attrs.parsed); |
| std::vector<TBlob> in_data(inputs.begin() + 1, inputs.end()); |
| const TBlob &out_grad = inputs[0]; |
| const std::vector<TBlob> &in_grad = outputs; |
| int dtype = out_grad.type_flag_; |
| |
| // If 1D convolution, use MXNet implementation |
| if (param.kernel.ndim() == 1) { |
| MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { |
| ConvolutionOp<gpu, DType> op; |
| op.Init(param); |
| op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad); |
| }) |
| return; |
| } else if (param.num_filter == param.num_group && |
| param.layout.value() == mshadow::kNCHW && |
| param.num_filter == in_data[conv::kData].shape_[1] && |
| param.kernel.ndim() == 2 && |
| param.dilate == mshadow::Shape2(1, 1) && |
| dtype == mshadow::kFloat32) { |
| // The first element stores out grad. |
| std::vector<TShape> in_shape(in_data.size()); |
| std::vector<TShape> out_shape(1, out_grad.shape_); |
| for (size_t i = 0; i < in_shape.size(); i++) |
| in_shape[i] = in_data[i].shape_; |
| DepthwiseConvolutionOp<float> op; |
| op.Init(param, in_shape, out_shape); |
| op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad); |
| return; |
| } |
| |
| #if MXNET_USE_CUDNN == 1 |
| // On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16). |
| int compute_type = (dtype == mshadow::kFloat16) ? mshadow::kFloat32 : dtype; |
| |
| MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { |
| if (param.cudnn_off) { |
| ConvolutionOp<gpu, DType> op; |
| op.Init(param); |
| op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad); |
| } else if (!CuDNNConvolutionOp<DType>::Supports(param, |
| compute_type, compute_type, ctx.run_ctx.ctx)) { |
| LOG(WARNING) << "This convolution is not supported by cudnn, MXNET convolution is applied."; |
| ConvolutionOp<gpu, DType> op; |
| op.Init(param); |
| op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad); |
| } else { |
| // The first element stores out grad. |
| std::vector<TShape> in_shape(in_data.size()); |
| std::vector<TShape> out_shape(1, out_grad.shape_); |
| for (size_t i = 0; i < in_shape.size(); i++) |
| in_shape[i] = in_data[i].shape_; |
| CuDNNConvolutionOp<DType> &op = GetCuDNNConvOp<DType>(param, |
| compute_type, compute_type, in_shape, out_shape, ctx.run_ctx.ctx); |
| op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad); |
| } |
| }) |
| #else |
| MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { |
| ConvolutionOp<gpu, DType> op; |
| op.Init(param); |
| op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad); |
| }) |
| #endif // MXNET_USE_CUDNN |
| } |
| |
| NNVM_REGISTER_OP(Convolution) |
| .set_attr<FCompute>("FCompute<gpu>", ConvolutionCompute<gpu>); |
| |
| NNVM_REGISTER_OP(_backward_Convolution) |
| .set_attr<FCompute>("FCompute<gpu>", ConvolutionGradCompute<gpu>); |
| |
| } // namespace op |
| } // namespace mxnet |
| |