src/operator/nn/sequence_mask-inl.h - mxnet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * \file sequence_mask-inl.h
  * \brief
 */
 #ifndef MXNET_OPERATOR_NN_SEQUENCE_MASK_INL_H_
 #define MXNET_OPERATOR_NN_SEQUENCE_MASK_INL_H_

 namespace mxnet {
 namespace op {
 namespace mxnet_op {

 template <typename DType, typename LType>
 inline void SequenceMask(const mshadow::Tensor<cpu, 3, DType> &dst,
                          const mshadow::Tensor<cpu, 1, LType> label, DType value) {
   for (index_t b = 0; b < dst.size(1); ++b)
     for (index_t s = label[b]; s < dst.size(0); ++s)
       for (index_t r = 0; r < dst.size(2); ++r)
         dst[s][b][r] = value;
 }

 #ifdef __CUDACC__
 template<int n_bits, typename DType, typename LType>
 __global__ void SequenceMaskKernel(mshadow::Tensor<gpu, 3, DType> dst,
                                    const mshadow::Tensor<gpu, 1, LType> lengths, DType value) {
   const index_t smax = dst.size(0);
   const index_t bmax = lengths.size(1);
   const index_t nmax = dst.size(2);
   unsigned int batch = threadIdx.x + blockIdx.x * blockDim.x;

   // early return if out of bounds
   if (batch >= bmax)
     return;

   // loop over batches
     for (index_t s = lengths[batch]; s < smax; ++s)
       for (index_t r = 0; r < nmax; ++r)
       dst[s][batch][r] = value;
 }

 template<typename DType, typename LType>
 inline void SequenceMask(const mshadow::Tensor<gpu, 3, DType> &dst,
                          const mshadow::Tensor<gpu, 1, LType> &lengths, DType value) {
   using namespace mshadow;
   using namespace mshadow::cuda;
   dim3 dimBlock(kBaseThreadNum);
   dim3 dimGrid(dst.size(1));
   CheckLaunchParam(dimGrid, dimBlock, "SequenceMask");
   cudaStream_t stream = Stream<gpu>::GetStream(dst.stream_);
   SequenceMaskKernel<kBaseThreadBits, DType><<<dimGrid, dimBlock, 0, stream>>>(dst, lengths, value);
   MSHADOW_CUDA_POST_KERNEL_CHECK(SequenceMaskKernel);
 }
 #endif

 }  // namespace mxnet_op

 }  // namespace op
 }  // namespace mxnet

 #endif  // MXNET_OPERATOR_NN_SEQUENCE_MASK_INL_H_
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* \file sequence_mask-inl.h
	* \brief
	*/
	#ifndef MXNET_OPERATOR_NN_SEQUENCE_MASK_INL_H_
	#define MXNET_OPERATOR_NN_SEQUENCE_MASK_INL_H_

	namespace mxnet {
	namespace op {
	namespace mxnet_op {

	template <typename DType, typename LType>
	inline void SequenceMask(const mshadow::Tensor<cpu, 3, DType> &dst,
	const mshadow::Tensor<cpu, 1, LType> label, DType value) {
	for (index_t b = 0; b < dst.size(1); ++b)
	for (index_t s = label[b]; s < dst.size(0); ++s)
	for (index_t r = 0; r < dst.size(2); ++r)
	dst[s][b][r] = value;
	}

	#ifdef __CUDACC__
	template<int n_bits, typename DType, typename LType>
	__global__ void SequenceMaskKernel(mshadow::Tensor<gpu, 3, DType> dst,
	const mshadow::Tensor<gpu, 1, LType> lengths, DType value) {
	const index_t smax = dst.size(0);
	const index_t bmax = lengths.size(1);
	const index_t nmax = dst.size(2);
	unsigned int batch = threadIdx.x + blockIdx.x * blockDim.x;

	// early return if out of bounds
	if (batch >= bmax)
	return;

	// loop over batches
	for (index_t s = lengths[batch]; s < smax; ++s)
	for (index_t r = 0; r < nmax; ++r)
	dst[s][batch][r] = value;
	}

	template<typename DType, typename LType>
	inline void SequenceMask(const mshadow::Tensor<gpu, 3, DType> &dst,
	const mshadow::Tensor<gpu, 1, LType> &lengths, DType value) {
	using namespace mshadow;
	using namespace mshadow::cuda;
	dim3 dimBlock(kBaseThreadNum);
	dim3 dimGrid(dst.size(1));
	CheckLaunchParam(dimGrid, dimBlock, "SequenceMask");
	cudaStream_t stream = Stream<gpu>::GetStream(dst.stream_);
	SequenceMaskKernel<kBaseThreadBits, DType><<<dimGrid, dimBlock, 0, stream>>>(dst, lengths, value);
	MSHADOW_CUDA_POST_KERNEL_CHECK(SequenceMaskKernel);
	}
	#endif

	} // namespace mxnet_op

	} // namespace op
	} // namespace mxnet

	#endif // MXNET_OPERATOR_NN_SEQUENCE_MASK_INL_H_