blob: 0f813bc4b3d9bc1a963c04ce8349f047d29b0531 [file] [log] [blame]
/*******************************************************************************
* Copyright 2016 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* \file mkl_pooling-inl.h
* \brief
* \author zhenlin.luo@intel.com
* lingyan.guo@intel.com
*
*******************************************************************************/
#ifndef MXNET_OPERATOR_MKL_MKL_POOLING_INL_H_
#define MXNET_OPERATOR_MKL_MKL_POOLING_INL_H_
#include <vector>
#include <string>
#include <utility>
#include "../operator_common.h"
#include "../pooling-inl.h"
#include "./mkl_util-inl.h"
namespace mxnet {
namespace op {
template<typename xpu, typename DType>
class MKLPoolingOp : public Operator {
public:
static std::string getName() {
return "MKLPoolingOp";
}
explicit MKLPoolingOp(PoolingParam p) {
poolingFwd = static_cast<dnnPrimitive_t>(NULL);
poolingBwd = static_cast<dnnPrimitive_t>(NULL);
max_idx_data = static_cast<DType*>(NULL);
fwd_top_data = MKLData<DType>::create();
fwd_bottom_data = MKLData<DType>::create();
bwd_top_diff = MKLData<DType>::create();
bwd_bottom_diff = MKLData<DType>::create();
this->param_ = p;
init_mkldnn_ = false;
}
virtual ~MKLPoolingOp() {
if (poolingFwd != NULL) {
dnnDelete<DType>(poolingFwd);
poolingFwd = NULL;
}
if (poolingBwd != NULL) {
dnnDelete<DType>(poolingBwd);
poolingBwd = NULL;
}
if (max_idx_data != NULL) {
dnnReleaseBuffer<DType>(max_idx_data);
max_idx_data = NULL;
}
}
private:
void LayerSetUp(const mshadow::Tensor<xpu, 4, DType> &data,
const mshadow::Tensor<xpu, 4, DType> &out) {
channels_ = data.shape_[1];
height_ = data.shape_[2];
width_ = data.shape_[3];
num_ = data.shape_[0];
global_pooling_ = param_.global_pool;
if (global_pooling_) {
kernel_h_ = height_;
kernel_w_ = width_;
} else {
kernel_h_ = param_.kernel[0];
kernel_w_ = param_.kernel[1];
}
CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";
CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";
pad_h_ = param_.pad[0];
pad_w_ = param_.pad[1];
if (global_pooling_) {
stride_h_ = stride_w_ = 1;
} else {
stride_h_ = param_.stride[0];
stride_w_ = param_.stride[1];
}
if (global_pooling_) {
CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1)
<< "With Global_pooling: true; only pad = 0 and stride = 1";
}
if (pad_h_ != 0 || pad_w_ != 0) {
CHECK(param_.pool_type == pool_enum::kAvgPooling
|| param_.pool_type == pool_enum::kMaxPooling)
<< "Padding implemented only for average and max pooling.";
CHECK_LT(pad_h_, kernel_h_);
CHECK_LT(pad_w_, kernel_w_);
}
pooled_height_ = out.shape_[2];
pooled_width_ = out.shape_[3];
size_t dim = 4;
size_t src_sizes[4], src_strides[4];
size_t dst_sizes[4], dst_strides[4];
src_sizes[0] = width_;
src_sizes[1] = height_;
src_sizes[2] = channels_;
src_sizes[3] = num_;
src_strides[0] = 1;
src_strides[1] = src_sizes[0];
src_strides[2] = src_sizes[0] * src_sizes[1];
src_strides[3] = src_sizes[0] * src_sizes[1] * src_sizes[2];
dst_sizes[0] = pooled_width_;
dst_sizes[1] = pooled_height_;
dst_sizes[2] = src_sizes[2];
dst_sizes[3] = src_sizes[3];
dst_strides[0] = 1;
dst_strides[1] = dst_sizes[0];
dst_strides[2] = dst_sizes[0] * dst_sizes[1];
dst_strides[3] = dst_sizes[0] * dst_sizes[1] * dst_sizes[2];
src_offset[0] = -pad_w_;
src_offset[1] = -pad_h_;
src_offset[2] = -pad_w_;
src_offset[3] = -pad_h_;
kernel_stride[0] = stride_w_;
kernel_stride[1] = stride_h_;
kernel_size[0] = kernel_w_;
kernel_size[1] = kernel_h_;
// Names are for debugging only
fwd_bottom_data->name = "fwd_bottom_data @ " + getName();
fwd_top_data->name = "fwd_top_data @ " + getName();
bwd_top_diff->name = "bwd_top_diff @ " + getName();
bwd_bottom_diff->name = "bwd_bottom_diff @ " + getName();
fwd_bottom_data->create_user_layout(dim, src_sizes, src_strides);
fwd_top_data->create_user_layout(dim, dst_sizes, dst_strides);
bwd_bottom_diff->create_user_layout(dim, src_sizes, src_strides);
bwd_top_diff->create_user_layout(dim, dst_sizes, dst_strides);
// Primitives will be allocated during the first fwd pass
poolingFwd = NULL;
poolingBwd = NULL;
max_idx_data = NULL;
}
public:
virtual void Forward(const OpContext &ctx,
const std::vector<TBlob> &in_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &out_data,
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow::expr;
CHECK_EQ(in_data.size(), 1);
CHECK_EQ(out_data.size(), 1);
Stream<xpu> *s = ctx.get_stream<xpu>();
if (param_.kernel.ndim() >= 3) {
LOG(FATAL) << "Not implmented";
}
Tensor<xpu, 4, DType> data = mkl_experimental_direct_get<xpu, 4, DType>(
in_data[pool_enum::kData], s);
Tensor<xpu, 4, DType> out = mkl_experimental_direct_get<xpu, 4, DType>(
out_data[pool_enum::kOut], s);
if (!init_mkldnn_) {
LayerSetUp(data, out);
init_mkldnn_ = true;
}
auto first_pass = false;
if (poolingFwd == NULL) first_pass = true;
dnnAlgorithm_t algorithm = dnnAlgorithmPoolingMax;
switch (param_.pool_type) {
case pool_enum::kMaxPooling:
algorithm = dnnAlgorithmPoolingMax;
break;
case pool_enum::kAvgPooling:
algorithm = (param_.pooling_convention == pool_enum::kValid) ?
dnnAlgorithmPoolingAvgIncludePadding : dnnAlgorithmPoolingAvg;
break;
default:
LOG(FATAL) << "Unknown pooling method.";
}
dnnError_t status;
void* pooling_res[dnnResourceNumber];
void* bottom_data = NULL;
#if MKL_EXPERIMENTAL == 1
bottom_data =
reinterpret_cast<void *>(mkl_prv_data<DType>(in_data[pool_enum::kData]));
#endif
dnnBorder_t border_type = dnnBorderZerosAsymm;
switch (param_.pooling_convention) {
case pool_enum::kFull:
border_type = dnnBorderZeros;
break;
case pool_enum::kValid:
border_type = dnnBorderZerosAsymm;
break;
default:
border_type = dnnBorderZerosAsymm;
break;
}
if (NULL == bottom_data) {
bottom_data = data.dptr_;
if (NULL == poolingFwd) {
status = dnnPoolingCreateForward<DType>(&poolingFwd, NULL,
algorithm, fwd_bottom_data->layout_usr,
kernel_size, kernel_stride,
src_offset, border_type);
CHECK_EQ(status, E_SUCCESS);
// Now create poolingBwd
status = dnnPoolingCreateBackward<DType>(&poolingBwd, NULL,
algorithm, fwd_bottom_data->layout_usr,
kernel_size, kernel_stride,
src_offset, border_type);
CHECK_EQ(status, E_SUCCESS);
}
}
#if MKL_EXPERIMENTAL == 1
if (NULL != bottom_data) {
if (NULL == poolingFwd) {
std::shared_ptr<MKLMemHolder> bottom_data_mem = in_data[pool_enum::kData].Mkl_mem_;
std::shared_ptr<PrvMemDescr> bottom_prv_descriptor =
bottom_data_mem->get_prv_descriptor();
CHECK_EQ(bottom_prv_descriptor->get_descr_type(),
PrvMemDescr::PRV_DESCR_MKL2017);
std::shared_ptr<MKLData<DType> > mem_descr
= std::static_pointer_cast<MKLData<DType>>(bottom_prv_descriptor);
CHECK(mem_descr != nullptr);
fwd_bottom_data = mem_descr;
status = dnnPoolingCreateForward<DType>(&poolingFwd, NULL,
algorithm, fwd_bottom_data->layout_int,
kernel_size, kernel_stride,
src_offset, border_type);
CHECK_EQ(status, E_SUCCESS);
fwd_top_data->create_internal_layout(poolingFwd, dnnResourceDst);
// Now create poolingBwd
status = dnnPoolingCreateBackward<DType>(&poolingBwd, NULL,
algorithm, fwd_bottom_data->layout_int,
kernel_size, kernel_stride,
src_offset, border_type);
CHECK_EQ(status, E_SUCCESS);
bwd_top_diff->create_internal_layout(poolingFwd, dnnResourceDst);
bwd_bottom_diff->create_internal_layout(poolingFwd, dnnResourceSrc);
}
}
#endif
if (first_pass) {
dnnLayout_t max_idx_datal = NULL;
status = dnnLayoutCreateFromPrimitive<DType>(
&max_idx_datal, poolingFwd, dnnResourceWorkspace);
CHECK_EQ(status, E_SUCCESS);
status = dnnAllocateBuffer<DType>(reinterpret_cast<void**>(&max_idx_data), max_idx_datal);
CHECK_EQ(status, E_SUCCESS);
#if MKL_EXPERIMENTAL == 0
fwd_bottom_data->create_internal_layout(poolingFwd, dnnResourceSrc);
fwd_top_data->create_internal_layout(poolingFwd, dnnResourceDst);
bwd_top_diff->create_internal_layout(poolingBwd, dnnResourceDiffDst);
bwd_bottom_diff->create_internal_layout(poolingBwd, dnnResourceDiffSrc);
#endif
dnnLayoutDelete<DType>(max_idx_datal);
first_pass = false;
}
pooling_res[dnnResourceSrc] = bottom_data;
pooling_res[dnnResourceWorkspace] = max_idx_data;
std::shared_ptr<MKLMemHolder> top_mem = NULL;
#if MKL_EXPERIMENTAL == 1
top_mem = out_data[pool_enum::kOut].Mkl_mem_;
#endif
pooling_res[dnnResourceDst] = fwd_top_data->get_output_ptr(
out.dptr_, fwd_top_data, top_mem);
status = dnnExecute<DType>(poolingFwd, pooling_res);
CHECK_EQ(status, E_SUCCESS);
#if MKL_EXPERIMENTAL == 0
if (fwd_top_data->conversion_needed()) {
fwd_top_data->convert_from_prv(out.dptr_);
}
#endif
}
virtual void Backward(const OpContext &ctx,
const std::vector<TBlob> &out_grad,
const std::vector<TBlob> &in_data,
const std::vector<TBlob> &out_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &in_grad,
const std::vector<TBlob> &aux_args) {
if (!req[0]) {
return;
}
using namespace mshadow;
using namespace mshadow::expr;
CHECK_EQ(out_grad.size(), 1);
CHECK_EQ(in_data.size(), 1);
CHECK_EQ(out_data.size(), 1);
CHECK_EQ(req.size(), 1);
CHECK_EQ(in_grad.size(), 1);
if (param_.kernel.ndim() >= 3) {
LOG(FATAL) << "Not implmented";
}
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 4, DType> grad = mkl_experimental_direct_get<xpu, 4, DType>(
out_grad[pool_enum::kOut], s);
Tensor<xpu, 4, DType> input_grad = mkl_experimental_direct_get<xpu, 4, DType>(
in_grad[pool_enum::kData], s);
dnnError_t e;
void* pooling_res[dnnResourceNumber];
pooling_res[dnnResourceWorkspace] = reinterpret_cast<void *>(max_idx_data);
pooling_res[dnnResourceDiffDst] =
bwd_top_diff->get_converted_prv(grad.dptr_, true, out_grad[pool_enum::kOut]);
std::shared_ptr<MKLMemHolder> bottom_diff_mem = NULL;
#if MKL_EXPERIMENTAL == 1
bottom_diff_mem = in_grad[pool_enum::kData].Mkl_mem_;
#endif
pooling_res[dnnResourceDiffSrc] = bwd_bottom_diff->get_output_ptr(
input_grad.dptr_, bwd_bottom_diff, bottom_diff_mem);
e = dnnExecute<DType>(poolingBwd, pooling_res);
CHECK_EQ(e, E_SUCCESS);
#if MKL_EXPERIMENTAL == 0
if (bwd_bottom_diff->conversion_needed()) {
bwd_bottom_diff->convert_from_prv(input_grad.dptr_);
}
#endif
}
private:
PoolingParam param_;
int kernel_h_, kernel_w_;
int stride_h_, stride_w_;
int pad_h_, pad_w_;
int channels_, num_;
int height_, width_;
int pooled_height_, pooled_width_;
bool global_pooling_;
private:
size_t kernel_size[2],
kernel_stride[4];
int src_offset[4]; // 2*(dimension-2)
dnnPrimitive_t poolingFwd, poolingBwd;
DType *max_idx_data;
std::shared_ptr<MKLData<DType> > fwd_top_data;
std::shared_ptr<MKLData<DType> > fwd_bottom_data;
std::shared_ptr<MKLData<DType> > bwd_top_diff;
std::shared_ptr<MKLData<DType> > bwd_bottom_diff;
bool init_mkldnn_;
}; // class MKLPoolingOp
} // namespace op
} // namespace mxnet
#endif // MXNET_OPERATOR_MKL_MKL_POOLING_INL_H_