| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * Copyright (c) 2016 by Contributors |
| * \file cudnn_bilinear_sampler-inl.h |
| * \brief |
| * \author Xu Dong |
| */ |
| #ifndef MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_ |
| #define MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_ |
| |
| #include <algorithm> |
| #include <vector> |
| #include "./bilinear_sampler-inl.h" |
| namespace mxnet { |
| namespace op { |
| #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5 |
| template<typename DType> |
| class CuDNNBilinearSamplerOp : public Operator { |
| public: |
| explicit CuDNNBilinearSamplerOp(BilinearSamplerParam param) { |
| this->param_ = param; |
| init_cudnn_ = false; |
| dtype_ = mshadow::DataType<DType>::kCudnnFlag; |
| sampler_ = CUDNN_SAMPLER_BILINEAR; |
| } |
| |
| ~CuDNNBilinearSamplerOp() { |
| if (init_cudnn_) { |
| CUDNN_CALL(cudnnDestroySpatialTransformerDescriptor(st_desc_)); |
| CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc_)); |
| CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc_)); |
| } |
| } |
| |
| virtual void Forward(const OpContext &ctx, |
| const std::vector<TBlob> &in_data, |
| const std::vector<OpReqType> &req, |
| const std::vector<TBlob> &out_data, |
| const std::vector<TBlob> &aux_args) { |
| using namespace mshadow; |
| CHECK_EQ(req[bs::kOut], kWriteTo); |
| CHECK_EQ(in_data.size(), 2U); |
| CHECK_EQ(out_data.size(), 2U); |
| Stream<gpu> *s = ctx.get_stream<gpu>(); |
| |
| Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s); |
| Tensor<gpu, 4, DType> grid = in_data[bs::kGrid].get<gpu, 4, DType>(s); |
| Tensor<gpu, 4, DType> grid_tmp = out_data[bs::kTmp].get<gpu, 4, DType>(s); |
| Tensor<gpu, 4, DType> out = out_data[bs::kOut].get<gpu, 4, DType>(s); |
| // grid_tmp : (batch, h, w, 2) |
| grid_tmp = transpose(grid, Shape4(0, 2, 3, 1)); |
| if (!init_cudnn_) { |
| Init(s, in_data, out_data); |
| } |
| CHECK_EQ(data.CheckContiguous(), true); |
| CHECK_EQ(out.CheckContiguous(), true); |
| CHECK_EQ(grid_tmp.CheckContiguous(), true); |
| typename DataType<DType>::ScaleType alpha = 1.0f; |
| typename DataType<DType>::ScaleType beta = 0.0f; |
| CUDNN_CALL(cudnnSpatialTfSamplerForward(s->dnn_handle_, |
| st_desc_, |
| &alpha, |
| in_desc_, |
| data.dptr_, |
| grid_tmp.dptr_, |
| &beta, |
| out_desc_, |
| out.dptr_)); |
| } |
| |
| virtual void Backward(const OpContext &ctx, |
| const std::vector<TBlob> &out_grad, |
| const std::vector<TBlob> &in_data, |
| const std::vector<TBlob> &out_data, |
| const std::vector<OpReqType> &req, |
| const std::vector<TBlob> &in_grad, |
| const std::vector<TBlob> &aux_args) { |
| using namespace mshadow; |
| CHECK_NE(req[bs::kData], kWriteInplace); |
| CHECK_NE(req[bs::kGrid], kWriteInplace); |
| CHECK_EQ(in_data.size(), 2U); |
| CHECK_EQ(out_data.size(), 2U); |
| CHECK_EQ(out_grad.size(), 1U); |
| Stream<gpu> *s = ctx.get_stream<gpu>(); |
| Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s); |
| Tensor<gpu, 4, DType> grid_tmp = out_data[bs::kTmp].get<gpu, 4, DType>(s); |
| Tensor<gpu, 4, DType> gdata = in_grad[bs::kData].get<gpu, 4, DType>(s); |
| Tensor<gpu, 4, DType> ggrid = in_grad[bs::kGrid].get<gpu, 4, DType>(s); |
| Tensor<gpu, 4, DType> grad = out_grad[bs::kOut].get<gpu, 4, DType>(s); |
| |
| typename DataType<DType>::ScaleType alpha = (req[bs::kData] == kNullOp) ? 0.0f : 1.0f; |
| typename DataType<DType>::ScaleType beta = (req[bs::kData] == kAddTo) ? 1.0f : 0.0f; |
| typename DataType<DType>::ScaleType alpha_dgrid = 1.0f; |
| typename DataType<DType>::ScaleType beta_dgrid = 0.0f; |
| CUDNN_CALL(cudnnSpatialTfSamplerBackward(s->dnn_handle_, |
| st_desc_, |
| &alpha, |
| in_desc_, |
| data.dptr_, |
| &beta, |
| in_desc_/*reuse in_desc_*/, |
| gdata.dptr_/*output*/, |
| &alpha_dgrid, |
| out_desc_/*reuse out_desc_*/, |
| grad.dptr_, |
| grid_tmp.dptr_, |
| &beta_dgrid, |
| grid_tmp.dptr_)); |
| Assign(ggrid, req[bs::kGrid], transpose(grid_tmp, Shape4(0, 3, 1, 2))); |
| } |
| |
| private: |
| inline void Init(mshadow::Stream<gpu> *s, |
| const std::vector<TBlob> &in_data, |
| const std::vector<TBlob> &out_data) { |
| using namespace mshadow; |
| #if CUDNN_MAJOR >= 5 |
| format_ = CUDNN_TENSOR_NCHW; |
| #endif |
| CHECK_EQ(in_data.size(), 2U); |
| CHECK_EQ(out_data.size(), 2U); |
| if (!init_cudnn_) { |
| init_cudnn_ = true; |
| Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s); |
| Tensor<gpu, 4, DType> out = out_data[bs::kOut].get<gpu, 4, DType>(s); |
| CUDNN_CALL(cudnnCreateSpatialTransformerDescriptor(&st_desc_)); |
| CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc_)); |
| CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc_)); |
| CUDNN_CALL(cudnnSetTensor4dDescriptor(in_desc_, |
| format_, |
| dtype_, |
| data.size(0), |
| data.size(1), |
| data.size(2), |
| data.size(3))); |
| CUDNN_CALL(cudnnSetTensor4dDescriptor(out_desc_, |
| format_, |
| dtype_, |
| out.size(0), |
| out.size(1), |
| out.size(2), |
| out.size(3))); |
| int dim[] = {static_cast<int>(out.size(0)), static_cast<int>(out.size(1)), |
| static_cast<int>(out.size(2)), static_cast<int>(out.size(3))}; |
| CUDNN_CALL(cudnnSetSpatialTransformerNdDescriptor(st_desc_, |
| sampler_, |
| dtype_, |
| 4, |
| dim)); |
| } |
| } |
| |
| bool init_cudnn_; |
| cudnnDataType_t dtype_; |
| cudnnSpatialTransformerDescriptor_t st_desc_; |
| cudnnTensorDescriptor_t in_desc_; |
| cudnnTensorDescriptor_t out_desc_; |
| cudnnSamplerType_t sampler_; |
| #if CUDNN_MAJOR >= 5 |
| cudnnTensorFormat_t format_; |
| #endif |
| BilinearSamplerParam param_; |
| }; |
| #endif // __CUDACC__ && CUDNN |
| } // namespace op |
| } // namespace mxnet |
| |
| #endif // MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_ |