| /*! |
| * Copyright (c) 2017 by Contributors |
| * \file mxnet_op.h |
| * \brief |
| * \author Junyuan Xie |
| */ |
| #ifndef MXNET_OPERATOR_MXNET_OP_H_ |
| #define MXNET_OPERATOR_MXNET_OP_H_ |
| |
| #include <mxnet/base.h> |
| #include <algorithm> |
| |
| namespace mxnet { |
| namespace op { |
| namespace mxnet_op { |
| #ifdef __CUDA_ARCH__ |
| __constant__ const float PI = 3.14159265358979323846; |
| #else |
| const float PI = 3.14159265358979323846; |
| using std::isnan; |
| #endif |
| |
| |
| template<typename OP, typename xpu> |
| struct Kernel; |
| |
| template<typename OP> |
| struct Kernel<OP, cpu> { |
| template<typename ...Args> |
| inline static void Launch(mshadow::Stream<cpu> *s, int N, Args... args) { |
| #if (MXNET_USE_CUDA == 0) |
| #pragma omp parallel for |
| #endif |
| for (int i = 0; i < N; ++i) { |
| OP::Map(i, args...); |
| } |
| } |
| }; |
| |
| #ifdef __CUDACC__ |
| template<typename OP, typename ...Args> |
| __global__ void mxnet_generic_kernel(int N, Args... args) { |
| for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { |
| OP::Map(i, args...); |
| } |
| } |
| |
| template<typename OP> |
| struct Kernel<OP, gpu> { |
| template<typename ...Args> |
| inline static void Launch(mshadow::Stream<gpu> *s, int N, Args... args) { |
| using namespace mshadow::cuda; |
| int ngrid = std::min(kMaxGridNum, (N + kBaseThreadNum - 1) / kBaseThreadNum); |
| mxnet_generic_kernel<OP, Args...> |
| <<<ngrid, kBaseThreadNum, 0, mshadow::Stream<gpu>::GetStream(s)>>>( |
| N, args...); |
| } |
| }; |
| #endif // __CUDACC__ |
| |
| /*! \brief operator request type switch */ |
| #define MXNET_ASSIGN_REQ_SWITCH(req, ReqType, ...) \ |
| switch (req) { \ |
| case kNullOp: \ |
| break; \ |
| case kWriteInplace: \ |
| case kWriteTo: \ |
| { \ |
| const int ReqType = kWriteTo; \ |
| {__VA_ARGS__} \ |
| } \ |
| break; \ |
| case kAddTo: \ |
| { \ |
| const int ReqType = kAddTo; \ |
| {__VA_ARGS__} \ |
| } \ |
| break; \ |
| default: \ |
| break; \ |
| } |
| |
| /*! |
| * \brief assign the val to out according |
| * to request in Kernel::Launch |
| * \param out the data to be assigned |
| * \param req the assignment request |
| * \param val the value to be assigned to out |
| * \tparam OType output type |
| * \tparam VType value type |
| */ |
| #define KERNEL_ASSIGN(out, req, val) \ |
| { \ |
| switch (req) { \ |
| case kNullOp: \ |
| break; \ |
| case kWriteTo: \ |
| case kWriteInplace: \ |
| (out) = (val); \ |
| break; \ |
| case kAddTo: \ |
| (out) += (val); \ |
| break; \ |
| default: \ |
| break; \ |
| } \ |
| } |
| |
| struct clip { |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(int i, DType* out, const DType* datas, |
| DType a_min, DType a_max) { |
| DType data = datas[i]; |
| if (data > a_max) { |
| out[i] = a_max; |
| } else if (data < a_min) { |
| out[i] = a_min; |
| } else { |
| out[i] = data; |
| } |
| } |
| }; |
| |
| struct clip_grad { |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(int i, DType* out, const DType* grad, const DType* datas, |
| DType a_min, DType a_max) { |
| DType data = datas[i]; |
| if (data > a_max) { |
| out[i] = 0; |
| } else if (data < a_min) { |
| out[i] = 0; |
| } else { |
| out[i] = grad[i]; |
| } |
| } |
| }; |
| |
| #define REVERSE_MAX_DIM 10U |
| |
| struct reverse { |
| MSHADOW_XINLINE static int ReverseIndex(index_t idx, |
| index_t nreversedim, |
| const index_t * stride_, |
| const index_t * trailing_) { |
| index_t outputIndex = idx; |
| for (index_t i = 0; i < nreversedim; ++i) { |
| const index_t low = outputIndex % trailing_[i]; |
| index_t high = outputIndex / trailing_[i]; |
| const index_t x = high%stride_[i]; |
| high /= stride_[i]; |
| outputIndex = (high*stride_[i] + stride_[i] - 1 - x)*trailing_[i] + low; |
| } |
| return outputIndex; |
| } |
| #ifdef __CUDACC__ |
| template<typename DType> |
| __device__ static void Map(int index, index_t nreversedim, const DType *src, DType *dst, |
| const index_t * stride_, |
| const index_t * trailing_) { |
| __shared__ index_t stride_share[REVERSE_MAX_DIM]; |
| __shared__ index_t trailing_share[REVERSE_MAX_DIM]; |
| if (threadIdx.x < REVERSE_MAX_DIM) { |
| stride_share[threadIdx.x] = stride_[threadIdx.x]; |
| trailing_share[threadIdx.x] = trailing_[threadIdx.x]; |
| } |
| __syncthreads(); |
| index_t new_idx = ReverseIndex(index, nreversedim, stride_share, trailing_share); |
| dst[new_idx] = src[index]; |
| } |
| #else |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(int index, index_t nreversedim, const DType *src, DType *dst, |
| const index_t * stride_, |
| const index_t * trailing_) { |
| index_t new_idx = ReverseIndex(index, nreversedim, stride_, trailing_); |
| dst[new_idx] = src[index]; |
| } |
| #endif |
| }; |
| |
| } // namespace mxnet_op |
| } // namespace op |
| } // namespace mxnet |
| #endif // MXNET_OPERATOR_MXNET_OP_H_ |