blob: 560ee6312dcd0ab6cfb56e3e2be1d8bdf82001c2 [file] [log] [blame]
/************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
#include "singa/neuralnet/neuron_layer.h"
namespace singa {
CudnnConvLayer::~CudnnConvLayer() {
if (has_init_cudnn_) {
CHECK_CUDNN(cudnnDestroyTensorDescriptor(bias_desc_));
CHECK_CUDNN(cudnnDestroyFilterDescriptor(filter_desc_));
CHECK_CUDNN(cudnnDestroyConvolutionDescriptor(conv_desc_));
}
}
void CudnnConvLayer::InitCudnn() {
CudnnBase::InitCudnn();
// convert MB to bytes
workspace_byte_limit_
= layer_conf_.convolution_conf().workspace_byte_limit() << 20;
CHECK_CUDNN(cudnnCreateTensorDescriptor(&bias_desc_));
CHECK_CUDNN(cudnnCreateFilterDescriptor(&filter_desc_));
CHECK_CUDNN(cudnnCreateConvolutionDescriptor(&conv_desc_));
CHECK_CUDNN(cudnnSetConvolution2dDescriptor(conv_desc_,
pad_y_,
pad_x_,
stride_y_,
stride_x_,
1,
1,
CUDNN_CROSS_CORRELATION));
CHECK_CUDNN(cudnnSetFilter4dDescriptor(filter_desc_,
CUDNN_DATA_FLOAT,
num_filters_,
channels_,
kernel_y_,
kernel_x_));
if (bias_) {
CHECK_CUDNN(cudnnSetTensor4dDescriptor(bias_desc_,
CUDNN_TENSOR_NCHW,
CUDNN_DATA_FLOAT,
1,
num_filters_,
1,
1));
}
CHECK_CUDNN(cudnnSetTensor4dDescriptor(src_desc_,
CUDNN_TENSOR_NCHW,
CUDNN_DATA_FLOAT,
batchsize_,
channels_,
height_,
width_));
CHECK_CUDNN(cudnnSetTensor4dDescriptor(my_desc_,
CUDNN_TENSOR_NCHW,
CUDNN_DATA_FLOAT,
batchsize_,
num_filters_,
conv_height_,
conv_width_));
CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithm(handle_,
src_desc_,
filter_desc_,
conv_desc_,
my_desc_,
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
workspace_byte_limit_,
&fp_alg_));
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(handle_,
src_desc_,
my_desc_,
conv_desc_,
filter_desc_,
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
workspace_byte_limit_,
&bp_filter_alg_));
CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(handle_,
filter_desc_,
my_desc_,
conv_desc_,
src_desc_,
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
workspace_byte_limit_,
&bp_data_alg_));
size_t fp_byte, bp_data_byte, bp_filter_byte;
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(handle_,
src_desc_,
filter_desc_,
conv_desc_,
my_desc_,
fp_alg_,
&fp_byte));
CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(handle_,
filter_desc_,
my_desc_,
conv_desc_,
src_desc_,
bp_data_alg_,
&bp_data_byte));
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(handle_,
src_desc_,
my_desc_,
conv_desc_,
filter_desc_,
bp_filter_alg_,
&bp_filter_byte));
workspace_count_ = std::max(std::max(fp_byte, bp_data_byte), bp_filter_byte)
/ sizeof(float) + 1;
}
void CudnnConvLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
if (!has_init_cudnn_)
InitCudnn();
float alpha = 1.f, beta = 0.f;
Blob<float> workspace(vector<int>{static_cast<int>(workspace_count_)});
CHECK_CUDNN(cudnnConvolutionForward(handle_,
&alpha,
src_desc_,
srclayers[0]->data(this).gpu_data(),
filter_desc_,
weight_->data().gpu_data(),
conv_desc_,
fp_alg_,
workspace.mutable_gpu_data(),
workspace_count_ * sizeof(float),
&beta,
my_desc_,
data_.mutable_gpu_data()));
if (bias_) {
beta = 1.f;
#if CUDNN_MAJOR == 4
CHECK_CUDNN(cudnnAddTensor(handle_,
&alpha,
bias_desc_,
bias_->data().gpu_data(),
&beta,
my_desc_,
data_.mutable_gpu_data()));
#else
CHECK_CUDNN(cudnnAddTensor(handle_,
CUDNN_ADD_SAME_C,
&alpha,
bias_desc_,
bias_->data().gpu_data(),
&beta,
my_desc_,
data_.mutable_gpu_data()));
#endif
}
}
void
CudnnConvLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
float alpha = 1.f, beta = 0.f;
Blob<float> workspace(vector<int>{static_cast<int>(workspace_count_)});
// LOG(ERROR) << "backward bias";
if (bias_) {
CHECK_CUDNN(cudnnConvolutionBackwardBias(handle_,
&alpha,
my_desc_,
grad_.gpu_data(),
&beta,
bias_desc_,
bias_->mutable_grad()->mutable_gpu_data()));
}
// LOG(ERROR) << "backward w";
CHECK_CUDNN(cudnnConvolutionBackwardFilter_v3(handle_,
&alpha,
src_desc_,
srclayers[0]->data(this).gpu_data(),
my_desc_,
grad_.gpu_data(),
conv_desc_,
bp_filter_alg_,
workspace.mutable_gpu_data(),
workspace_count_ * sizeof(float),
&beta,
filter_desc_,
weight_->mutable_grad()->mutable_gpu_data()));
// LOG(ERROR) << "backward src";
if (srclayers[0]->mutable_grad(this) != nullptr) {
CHECK_CUDNN(cudnnConvolutionBackwardData_v3(handle_,
&alpha,
filter_desc_,
weight_->data().gpu_data(),
my_desc_,
grad_.gpu_data(),
conv_desc_,
bp_data_alg_,
workspace.mutable_gpu_data(),
workspace_count_ * sizeof(float),
&beta,
src_desc_,
srclayers[0]->mutable_grad(this)->mutable_gpu_data()));
}
}
} // namespace singa