src/operator/mxnet_op.h - mxnet-test - Git at Google

 /*!
  * Copyright (c) 2017 by Contributors
  * \file mxnet_op.h
  * \brief
  * \author Junyuan Xie
 */
 #ifndef MXNET_OPERATOR_MXNET_OP_H_
 #define MXNET_OPERATOR_MXNET_OP_H_

 #include <mxnet/base.h>
 #include <algorithm>

 namespace mxnet {
 namespace op {
 namespace mxnet_op {
 #ifdef __CUDA_ARCH__
 __constant__ const float PI = 3.14159265358979323846;
 #else
 const float PI = 3.14159265358979323846;
 using std::isnan;
 #endif


 template<typename OP, typename xpu>
 struct Kernel;

 template<typename OP>
 struct Kernel<OP, cpu> {
   template<typename ...Args>
   inline static void Launch(mshadow::Stream<cpu> *s, int N, Args... args) {
 #if (MXNET_USE_CUDA == 0)
     #pragma omp parallel for
 #endif
     for (int i = 0; i < N; ++i) {
       OP::Map(i, args...);
     }
   }
 };

 #ifdef __CUDACC__
 template<typename OP, typename ...Args>
 __global__ void mxnet_generic_kernel(int N, Args... args) {
   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) {
     OP::Map(i, args...);
   }
 }

 template<typename OP>
 struct Kernel<OP, gpu> {
   template<typename ...Args>
   inline static void Launch(mshadow::Stream<gpu> *s, int N, Args... args) {
     using namespace mshadow::cuda;
     int ngrid = std::min(kMaxGridNum, (N + kBaseThreadNum - 1) / kBaseThreadNum);
     mxnet_generic_kernel<OP, Args...>
       <<<ngrid, kBaseThreadNum, 0, mshadow::Stream<gpu>::GetStream(s)>>>(
         N, args...);
   }
 };
 #endif  // __CUDACC__

 /*! \brief operator request type switch */
 #define MXNET_ASSIGN_REQ_SWITCH(req, ReqType, ...)  \
   switch (req) {                                    \
   case kNullOp:                                     \
     break;                                          \
   case kWriteInplace:                               \
   case kWriteTo:                                    \
     {                                               \
       const int ReqType = kWriteTo;                 \
       {__VA_ARGS__}                                 \
     }                                               \
     break;                                          \
   case kAddTo:                                      \
     {                                               \
       const int ReqType = kAddTo;                   \
       {__VA_ARGS__}                                 \
     }                                               \
     break;                                          \
   default:                                          \
     break;                                          \
   }

 /*!
  * \brief assign the val to out according
  * to request in Kernel::Launch
  * \param out the data to be assigned
  * \param req the assignment request
  * \param val the value to be assigned to out
  * \tparam OType output type
  * \tparam VType value type
  */
 #define KERNEL_ASSIGN(out, req, val)  \
   {                                   \
     switch (req) {                    \
       case kNullOp:                   \
         break;                        \
       case kWriteTo:                  \
       case kWriteInplace:             \
         (out) = (val);                \
         break;                        \
       case kAddTo:                    \
         (out) += (val);               \
         break;                        \
       default:                        \
         break;                        \
     }                                 \
   }

 struct clip {
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* out, const DType* datas,
                                   DType a_min, DType a_max) {
     DType data = datas[i];
     if (data > a_max) {
       out[i] = a_max;
     } else if (data < a_min) {
       out[i] = a_min;
     } else {
       out[i] = data;
     }
   }
 };

 struct clip_grad {
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* out, const DType* grad, const DType* datas,
                                   DType a_min, DType a_max) {
     DType data = datas[i];
     if (data > a_max) {
       out[i] = 0;
     } else if (data < a_min) {
       out[i] = 0;
     } else {
       out[i] = grad[i];
     }
   }
 };

 #define REVERSE_MAX_DIM 10U

 struct reverse {
   MSHADOW_XINLINE static int ReverseIndex(index_t idx,
                                           index_t nreversedim,
                                           const index_t * stride_,
                                           const index_t * trailing_) {
     index_t outputIndex = idx;
     for (index_t i = 0; i < nreversedim; ++i) {
       const index_t low = outputIndex % trailing_[i];
       index_t high = outputIndex / trailing_[i];
       const index_t x = high%stride_[i];
       high /= stride_[i];
       outputIndex = (high*stride_[i] + stride_[i] - 1 - x)*trailing_[i] + low;
     }
     return outputIndex;
   }
 #ifdef __CUDACC__
   template<typename DType>
   __device__  static void Map(int index, index_t nreversedim, const DType *src, DType *dst,
                               const index_t * stride_,
                               const index_t * trailing_) {
     __shared__ index_t stride_share[REVERSE_MAX_DIM];
     __shared__ index_t trailing_share[REVERSE_MAX_DIM];
     if (threadIdx.x < REVERSE_MAX_DIM) {
       stride_share[threadIdx.x] = stride_[threadIdx.x];
       trailing_share[threadIdx.x] = trailing_[threadIdx.x];
     }
     __syncthreads();
     index_t new_idx = ReverseIndex(index, nreversedim, stride_share, trailing_share);
     dst[new_idx] = src[index];
   }
 #else
   template<typename DType>
   MSHADOW_XINLINE  static void Map(int index, index_t nreversedim, const DType *src, DType *dst,
                                    const index_t * stride_,
                                    const index_t * trailing_) {
     index_t new_idx = ReverseIndex(index, nreversedim, stride_, trailing_);
     dst[new_idx] = src[index];
   }
 #endif
 };

 }  // namespace mxnet_op
 }  // namespace op
 }  // namespace mxnet
 #endif  // MXNET_OPERATOR_MXNET_OP_H_
	/*!
	* Copyright (c) 2017 by Contributors
	* \file mxnet_op.h
	* \brief
	* \author Junyuan Xie
	*/
	#ifndef MXNET_OPERATOR_MXNET_OP_H_
	#define MXNET_OPERATOR_MXNET_OP_H_

	#include <mxnet/base.h>
	#include <algorithm>

	namespace mxnet {
	namespace op {
	namespace mxnet_op {
	#ifdef __CUDA_ARCH__
	__constant__ const float PI = 3.14159265358979323846;
	#else
	const float PI = 3.14159265358979323846;
	using std::isnan;
	#endif


	template<typename OP, typename xpu>
	struct Kernel;

	template<typename OP>
	struct Kernel<OP, cpu> {
	template<typename ...Args>
	inline static void Launch(mshadow::Stream<cpu> *s, int N, Args... args) {
	#if (MXNET_USE_CUDA == 0)
	#pragma omp parallel for
	#endif
	for (int i = 0; i < N; ++i) {
	OP::Map(i, args...);
	}
	}
	};

	#ifdef __CUDACC__
	template<typename OP, typename ...Args>
	__global__ void mxnet_generic_kernel(int N, Args... args) {
	for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) {
	OP::Map(i, args...);
	}
	}

	template<typename OP>
	struct Kernel<OP, gpu> {
	template<typename ...Args>
	inline static void Launch(mshadow::Stream<gpu> *s, int N, Args... args) {
	using namespace mshadow::cuda;
	int ngrid = std::min(kMaxGridNum, (N + kBaseThreadNum - 1) / kBaseThreadNum);
	mxnet_generic_kernel<OP, Args...>
	<<<ngrid, kBaseThreadNum, 0, mshadow::Stream<gpu>::GetStream(s)>>>(
	N, args...);
	}
	};
	#endif // __CUDACC__

	/! \brief operator request type switch /
	#define MXNET_ASSIGN_REQ_SWITCH(req, ReqType, ...) \
	switch (req) { \
	case kNullOp: \
	break; \
	case kWriteInplace: \
	case kWriteTo: \
	{ \
	const int ReqType = kWriteTo; \
	{__VA_ARGS__} \
	} \
	break; \
	case kAddTo: \
	{ \
	const int ReqType = kAddTo; \
	{__VA_ARGS__} \
	} \
	break; \
	default: \
	break; \
	}

	/*!
	* \brief assign the val to out according
	* to request in Kernel::Launch
	* \param out the data to be assigned
	* \param req the assignment request
	* \param val the value to be assigned to out
	* \tparam OType output type
	* \tparam VType value type
	*/
	#define KERNEL_ASSIGN(out, req, val) \
	{ \
	switch (req) { \
	case kNullOp: \
	break; \
	case kWriteTo: \
	case kWriteInplace: \
	(out) = (val); \
	break; \
	case kAddTo: \
	(out) += (val); \
	break; \
	default: \
	break; \
	} \
	}

	struct clip {
	template<typename DType>
	MSHADOW_XINLINE static void Map(int i, DType* out, const DType* datas,
	DType a_min, DType a_max) {
	DType data = datas[i];
	if (data > a_max) {
	out[i] = a_max;
	} else if (data < a_min) {
	out[i] = a_min;
	} else {
	out[i] = data;
	}
	}
	};

	struct clip_grad {
	template<typename DType>
	MSHADOW_XINLINE static void Map(int i, DType* out, const DType* grad, const DType* datas,
	DType a_min, DType a_max) {
	DType data = datas[i];
	if (data > a_max) {
	out[i] = 0;
	} else if (data < a_min) {
	out[i] = 0;
	} else {
	out[i] = grad[i];
	}
	}
	};

	#define REVERSE_MAX_DIM 10U

	struct reverse {
	MSHADOW_XINLINE static int ReverseIndex(index_t idx,
	index_t nreversedim,
	const index_t * stride_,
	const index_t * trailing_) {
	index_t outputIndex = idx;
	for (index_t i = 0; i < nreversedim; ++i) {
	const index_t low = outputIndex % trailing_[i];
	index_t high = outputIndex / trailing_[i];
	const index_t x = high%stride_[i];
	high /= stride_[i];
	outputIndex = (highstride_[i] + stride_[i] - 1 - x)trailing_[i] + low;
	}
	return outputIndex;
	}
	#ifdef __CUDACC__
	template<typename DType>
	__device__ static void Map(int index, index_t nreversedim, const DType src, DType dst,
	const index_t * stride_,
	const index_t * trailing_) {
	__shared__ index_t stride_share[REVERSE_MAX_DIM];
	__shared__ index_t trailing_share[REVERSE_MAX_DIM];
	if (threadIdx.x < REVERSE_MAX_DIM) {
	stride_share[threadIdx.x] = stride_[threadIdx.x];
	trailing_share[threadIdx.x] = trailing_[threadIdx.x];
	}
	__syncthreads();
	index_t new_idx = ReverseIndex(index, nreversedim, stride_share, trailing_share);
	dst[new_idx] = src[index];
	}
	#else
	template<typename DType>
	MSHADOW_XINLINE static void Map(int index, index_t nreversedim, const DType src, DType dst,
	const index_t * stride_,
	const index_t * trailing_) {
	index_t new_idx = ReverseIndex(index, nreversedim, stride_, trailing_);
	dst[new_idx] = src[index];
	}
	#endif
	};

	} // namespace mxnet_op
	} // namespace op
	} // namespace mxnet
	#endif // MXNET_OPERATOR_MXNET_OP_H_