src/model/layer/cudnn_pooling.cc - singa - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 #include "./cudnn_pooling.h"
 #ifdef USE_CUDNN

 #include <cudnn.h>
 #include <chrono>

 #include "./cudnn_utils.h"
 #include "singa/utils/logging.h"

 namespace singa {
 RegisterLayerClass(cudnn_pooling, CudnnPooling);
 CudnnPooling::~CudnnPooling() {
   if (pool_desc_ != nullptr)
     CUDNN_CHECK(cudnnDestroyPoolingDescriptor(pool_desc_));
   if (x_desc_ != nullptr) CUDNN_CHECK(cudnnDestroyTensorDescriptor(x_desc_));
   if (y_desc_ != nullptr) CUDNN_CHECK(cudnnDestroyTensorDescriptor(y_desc_));
 }

 void CudnnPooling::Setup(const Shape& in_sample, const LayerConf &conf) {
   Pooling::Setup(in_sample, conf);
   PoolingConf pool_conf = conf.pooling_conf();
   if (pool_conf.nan_prop())
     nan_prop_ = CUDNN_PROPAGATE_NAN;
   else
     nan_prop_ = CUDNN_NOT_PROPAGATE_NAN;
 }

 void CudnnPooling::InitCudnn(const Tensor &input) {
   DataType dtype = input.data_type();
   size_t batchsize = input.shape(0);
   if (!has_init_cudnn_) {
     CUDNN_CHECK(cudnnCreateTensorDescriptor(&x_desc_));
     CUDNN_CHECK(cudnnCreateTensorDescriptor(&y_desc_));
     CUDNN_CHECK(cudnnCreatePoolingDescriptor(&pool_desc_));
   }

   CUDNN_CHECK(cudnnSetTensor4dDescriptor(x_desc_, CUDNN_TENSOR_NCHW,
                                          GetCudnnDataType(dtype), batchsize,
                                          channels_, height_, width_));
   CUDNN_CHECK(cudnnSetTensor4dDescriptor(
       y_desc_, CUDNN_TENSOR_NCHW, GetCudnnDataType(dtype), batchsize, channels_,
       pooled_height_, pooled_width_));
   auto pool_method = CUDNN_POOLING_MAX;
   if (pool_ == PoolingConf_PoolMethod_MAX)
     pool_method = CUDNN_POOLING_MAX;
   else if (pool_ == PoolingConf_PoolMethod_AVE)
     pool_method = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
   else
     LOG(FATAL) << "Not implemented!";

   CUDNN_CHECK(cudnnSetPooling2dDescriptor(pool_desc_, pool_method, nan_prop_,
                                           kernel_h_, kernel_w_, pad_h_, pad_w_,
                                           stride_h_, stride_w_));
   has_init_cudnn_ = true;
 }

 const Tensor CudnnPooling::Forward(int flag, const Tensor &input) {
   CHECK(buf_.empty());
   CHECK_EQ(input.device()->lang(), kCuda);
   CHECK_EQ(input.nDim(), 4u);
   size_t batchsize = input.shape(0);
   DataType dtype = input.data_type();
   auto dev = input.device();
   if (!has_init_cudnn_) {
     InitCudnn(input);
   } else {
     int n, c, h, w, s;
     cudnnDataType_t type;
     CUDNN_CHECK(cudnnGetTensor4dDescriptor(x_desc_, &type, &n, &c, &h, &w,
           &s, &s, &s, &s));
     if (batchsize != static_cast<size_t>(n))
       InitCudnn(input);
     CHECK(input.shape(1) == static_cast<size_t>(c)
         && input.shape(2) == static_cast<size_t>(h)
         && input.shape(3) == static_cast<size_t>(w))
       << "input sample shape should not change"
       << "previous shape " << c << ", " << h << ", " << w
       << "current shape " << input.shape(1) << ", " << input.shape(2) << ", "
       << input.shape(3);
   }

   Shape shape{batchsize, channels_, pooled_height_, pooled_width_};
   Tensor output = Tensor(shape, dev, dtype);
   output.device()->Exec([input, output, this](Context *ctx) {
     Block *inblock = input.block(), *outblock = output.block();
     float alpha = 1.0f, beta = 0.0f;
     cudnnPoolingForward(ctx->cudnn_handle, this->pool_desc_, &alpha,
                         this->x_desc_, inblock->data(), &beta, this->y_desc_,
                         outblock->mutable_data());
   }, {input.block()}, {output.block()});
   if (flag & kTrain) {
     buf_.push(input);
     buf_.push(output);
   }
   return output;
 }

 const std::pair<Tensor, vector<Tensor>> CudnnPooling::Backward(
     int flag, const Tensor &grad) {
   CHECK_EQ(grad.device()->lang(), kCuda);
   CHECK_EQ(grad.nDim(), 4u);
   vector<Tensor> param_grad;
   CHECK(!buf_.empty());
   Tensor y = buf_.top();
   buf_.pop();
   Tensor x = buf_.top();
   buf_.pop();
   Tensor dx;
   dx.ResetLike(x);

   dx.device()->Exec([dx, grad, x, y, this](Context *ctx) {
     Block *dyblock = grad.block(), *dxblock = dx.block(), *yblock = y.block(),
           *xblock = x.block();
     float alpha = 1.0f, beta = 0.0f;
     cudnnPoolingBackward(ctx->cudnn_handle, this->pool_desc_, &alpha,
                          this->y_desc_, yblock->data(), this->y_desc_,
                          dyblock->data(), this->x_desc_, xblock->data(), &beta,
                          this->x_desc_, dxblock->mutable_data());
   }, {grad.block(), y.block(), x.block()}, {dx.block()});

   return std::make_pair(dx, param_grad);
 }
 }  // namespace singa
 #endif  // USE_CUDNN
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	#include "./cudnn_pooling.h"
	#ifdef USE_CUDNN

	#include <cudnn.h>
	#include <chrono>

	#include "./cudnn_utils.h"
	#include "singa/utils/logging.h"

	namespace singa {
	RegisterLayerClass(cudnn_pooling, CudnnPooling);
	CudnnPooling::~CudnnPooling() {
	if (pool_desc_ != nullptr)
	CUDNN_CHECK(cudnnDestroyPoolingDescriptor(pool_desc_));
	if (x_desc_ != nullptr) CUDNN_CHECK(cudnnDestroyTensorDescriptor(x_desc_));
	if (y_desc_ != nullptr) CUDNN_CHECK(cudnnDestroyTensorDescriptor(y_desc_));
	}

	void CudnnPooling::Setup(const Shape& in_sample, const LayerConf &conf) {
	Pooling::Setup(in_sample, conf);
	PoolingConf pool_conf = conf.pooling_conf();
	if (pool_conf.nan_prop())
	nan_prop_ = CUDNN_PROPAGATE_NAN;
	else
	nan_prop_ = CUDNN_NOT_PROPAGATE_NAN;
	}

	void CudnnPooling::InitCudnn(const Tensor &input) {
	DataType dtype = input.data_type();
	size_t batchsize = input.shape(0);
	if (!has_init_cudnn_) {
	CUDNN_CHECK(cudnnCreateTensorDescriptor(&x_desc_));
	CUDNN_CHECK(cudnnCreateTensorDescriptor(&y_desc_));
	CUDNN_CHECK(cudnnCreatePoolingDescriptor(&pool_desc_));
	}

	CUDNN_CHECK(cudnnSetTensor4dDescriptor(x_desc_, CUDNN_TENSOR_NCHW,
	GetCudnnDataType(dtype), batchsize,
	channels_, height_, width_));
	CUDNN_CHECK(cudnnSetTensor4dDescriptor(
	y_desc_, CUDNN_TENSOR_NCHW, GetCudnnDataType(dtype), batchsize, channels_,
	pooled_height_, pooled_width_));
	auto pool_method = CUDNN_POOLING_MAX;
	if (pool_ == PoolingConf_PoolMethod_MAX)
	pool_method = CUDNN_POOLING_MAX;
	else if (pool_ == PoolingConf_PoolMethod_AVE)
	pool_method = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
	else
	LOG(FATAL) << "Not implemented!";

	CUDNN_CHECK(cudnnSetPooling2dDescriptor(pool_desc_, pool_method, nan_prop_,
	kernel_h_, kernel_w_, pad_h_, pad_w_,
	stride_h_, stride_w_));
	has_init_cudnn_ = true;
	}

	const Tensor CudnnPooling::Forward(int flag, const Tensor &input) {
	CHECK(buf_.empty());
	CHECK_EQ(input.device()->lang(), kCuda);
	CHECK_EQ(input.nDim(), 4u);
	size_t batchsize = input.shape(0);
	DataType dtype = input.data_type();
	auto dev = input.device();
	if (!has_init_cudnn_) {
	InitCudnn(input);
	} else {
	int n, c, h, w, s;
	cudnnDataType_t type;
	CUDNN_CHECK(cudnnGetTensor4dDescriptor(x_desc_, &type, &n, &c, &h, &w,
	&s, &s, &s, &s));
	if (batchsize != static_cast<size_t>(n))
	InitCudnn(input);
	CHECK(input.shape(1) == static_cast<size_t>(c)
	&& input.shape(2) == static_cast<size_t>(h)
	&& input.shape(3) == static_cast<size_t>(w))
	<< "input sample shape should not change"
	<< "previous shape " << c << ", " << h << ", " << w
	<< "current shape " << input.shape(1) << ", " << input.shape(2) << ", "
	<< input.shape(3);
	}

	Shape shape{batchsize, channels_, pooled_height_, pooled_width_};
	Tensor output = Tensor(shape, dev, dtype);
	output.device()->Exec([input, output, this](Context *ctx) {
	Block inblock = input.block(), outblock = output.block();
	float alpha = 1.0f, beta = 0.0f;
	cudnnPoolingForward(ctx->cudnn_handle, this->pool_desc_, &alpha,
	this->x_desc_, inblock->data(), &beta, this->y_desc_,
	outblock->mutable_data());
	}, {input.block()}, {output.block()});
	if (flag & kTrain) {
	buf_.push(input);
	buf_.push(output);
	}
	return output;
	}

	const std::pair<Tensor, vector<Tensor>> CudnnPooling::Backward(
	int flag, const Tensor &grad) {
	CHECK_EQ(grad.device()->lang(), kCuda);
	CHECK_EQ(grad.nDim(), 4u);
	vector<Tensor> param_grad;
	CHECK(!buf_.empty());
	Tensor y = buf_.top();
	buf_.pop();
	Tensor x = buf_.top();
	buf_.pop();
	Tensor dx;
	dx.ResetLike(x);

	dx.device()->Exec([dx, grad, x, y, this](Context *ctx) {
	Block dyblock = grad.block(), dxblock = dx.block(), *yblock = y.block(),
	*xblock = x.block();
	float alpha = 1.0f, beta = 0.0f;
	cudnnPoolingBackward(ctx->cudnn_handle, this->pool_desc_, &alpha,
	this->y_desc_, yblock->data(), this->y_desc_,
	dyblock->data(), this->x_desc_, xblock->data(), &beta,
	this->x_desc_, dxblock->mutable_data());
	}, {grad.block(), y.block(), x.block()}, {dx.block()});

	return std::make_pair(dx, param_grad);
	}
	} // namespace singa
	#endif // USE_CUDNN