| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "opencl_pooling.h" |
| |
| #ifdef USE_OPENCL |
| |
| namespace singa { |
| |
| RegisterLayerClass(singacl_pooling, OpenclPooling); |
| |
| const Tensor OpenclPooling::Forward(int flag, const Tensor &input) { |
| CHECK(buf_.empty()); |
| CHECK_EQ(input.device()->lang(), kOpencl); |
| CHECK_EQ(input.nDim(), 4u); |
| |
| auto batchsize = input.shape(0); |
| auto data_type = input.data_type(); |
| auto device = input.device(); |
| // TODO(wangwei) update the layer config if the input sample shape changes |
| CHECK(input.shape(1) == channels_ && input.shape(2) == height_ && |
| input.shape(3) == width_) << "input sample shape should not change"; |
| |
| Shape shape{batchsize, channels_, pooled_height_, pooled_width_}; |
| Tensor output = Tensor(shape, device, data_type); |
| |
| output.device()->Exec([input, output, flag, this](Context *ctx) { |
| Block* in_block = input.block(); |
| Block* outblock = output.block(); |
| |
| if (pool_ == PoolingConf_PoolMethod_MAX) { |
| Tensor mask; |
| mask.ResetLike(output); |
| |
| Pooling_Forward_Max((int)output.Size(), in_block, mask.block(), |
| height_, width_, |
| pooled_height_, pooled_width_, |
| kernel_h_, kernel_w_, |
| stride_h_, stride_w_, |
| pad_h_, pad_w_, |
| outblock, channels_, ctx); |
| |
| if (flag & kTrain) |
| buf_.push(mask); |
| |
| } else if (pool_ == PoolingConf_PoolMethod_AVE) { |
| Pooling_Forward_Ave((int)output.Size(), in_block, outblock, |
| height_, width_, pooled_height_, pooled_width_, |
| kernel_h_, kernel_w_, stride_h_, stride_w_, |
| pad_h_, pad_w_, channels_, ctx); |
| } else |
| LOG(FATAL) << "Unknown pooling method."; |
| |
| }, {input.block()}, {output.block()}); |
| |
| return output; |
| } |
| |
| |
| const std::pair<Tensor, std::vector<Tensor>> |
| OpenclPooling::Backward(int flag, const Tensor &grad) { |
| CHECK_EQ(grad.device()->lang(), kOpencl); |
| CHECK_EQ(grad.nDim(), 4u); |
| |
| std::vector<Tensor> param_grad; |
| |
| auto batchsize = grad.shape(0); |
| auto data_type = grad.data_type(); |
| auto device = grad.device(); |
| Shape shape{batchsize, channels_, height_, width_}; |
| |
| Tensor dx(shape, device, data_type); |
| |
| dx.device()->Exec([dx, grad, this](Context *ctx) { |
| if (pool_ == PoolingConf_PoolMethod_MAX) { |
| CHECK(!buf_.empty()); |
| Tensor mask = buf_.top(); |
| buf_.pop(); |
| |
| Pooling_Backward_Max(grad.block(), mask.block(), |
| dx.Size(), channels_, |
| height_, width_, |
| pooled_height_, pooled_width_, |
| kernel_h_, kernel_w_, |
| pad_h_, pad_w_, |
| stride_h_, stride_w_, |
| dx.block(), ctx); |
| |
| } else if (pool_ == PoolingConf_PoolMethod_AVE) { |
| Pooling_Backward_Ave(grad.block(), grad.shape(0), channels_, |
| height_, width_, |
| pooled_height_, pooled_width_, |
| kernel_h_, kernel_w_, |
| pad_h_, pad_w_, |
| stride_h_, stride_w_, |
| dx.block(), ctx); |
| |
| } else |
| LOG(FATAL) << "Unknown pooling method."; |
| |
| }, {grad.block()}, {dx.block()}); |
| |
| return std::make_pair(dx, param_grad); |
| } |
| |
| |
| void OpenclPooling::Setup(const Shape& in_sample, const LayerConf &conf) { |
| Pooling::Setup(in_sample, conf); |
| auto pool_conf = conf.pooling_conf(); |
| } |
| |
| |
| void OpenclPooling::Pooling_Forward_Max(const int num, Block* src, Block* mask, |
| const int height, const int width, |
| const int pooled_h, const int pooled_w, |
| const int kernel_h, const int kernel_w, |
| const int stride_h, const int stride_w, |
| const int pad_h, const int pad_w, |
| Block* dst, const int channels, |
| Context* ctx) { |
| auto ocl_ctx = viennacl::ocl::get_context(ctx->vcl_ctx_id); |
| auto kernel = ocl_ctx.get_kernel("opencl_pooling", "max_pool_forward"); |
| |
| auto src_buf = WrapHandle(static_cast<cl_mem>(src->mutable_data()), ocl_ctx); |
| auto dst_buf = WrapHandle(static_cast<cl_mem>(dst->mutable_data()), ocl_ctx); |
| auto maskbuf = WrapHandle(static_cast<cl_mem>(mask->mutable_data()), ocl_ctx); |
| |
| viennacl::ocl::enqueue(kernel(num, src_buf, channels, |
| height, width, pooled_h, pooled_w, |
| kernel_h, kernel_w, stride_h, stride_w, |
| pad_h, pad_w, dst_buf, maskbuf)); |
| } |
| |
| |
| void OpenclPooling::Pooling_Forward_Ave(const int num, Block* src, Block* dst, |
| const int height, const int width, |
| const int pooled_h, const int pooled_w, |
| const int kernel_h, const int kernel_w, |
| const int stride_h, const int stride_w, |
| const int pad_h, const int pad_w, |
| const int channels, Context* ctx) { |
| auto ocl_ctx = viennacl::ocl::get_context(ctx->vcl_ctx_id); |
| auto kernel = ocl_ctx.get_kernel("opencl_pooling", "ave_pool_forward"); |
| |
| auto src_buf = WrapHandle(static_cast<cl_mem>(src->mutable_data()), ocl_ctx); |
| auto dst_buf = WrapHandle(static_cast<cl_mem>(dst->mutable_data()), ocl_ctx); |
| |
| viennacl::ocl::enqueue(kernel(num, src_buf, channels, |
| height, width, pooled_h, pooled_w, |
| kernel_h, kernel_w, stride_h, stride_w, |
| pad_h, pad_w, dst_buf)); |
| } |
| |
| |
| void OpenclPooling::Pooling_Forward_Sto_Train(Block* src, Block* rand, |
| const int height, const int width, |
| const int pooled_h, const int pooled_w, |
| const int kernel_h, const int kernel_w, |
| const int stride_h, const int stride_w, |
| const int channels, |
| Block* dst, Context* ctx) { |
| auto ocl_ctx = viennacl::ocl::get_context(ctx->vcl_ctx_id); |
| auto kernel = ocl_ctx.get_kernel("opencl_pooling", "sto_pool_forward_train"); |
| |
| auto src_buf = WrapHandle(static_cast<cl_mem>(src->mutable_data()), ocl_ctx); |
| auto dst_buf = WrapHandle(static_cast<cl_mem>(dst->mutable_data()), ocl_ctx); |
| auto randbuf = WrapHandle(static_cast<cl_mem>(rand->mutable_data()), ocl_ctx); |
| |
| viennacl::ocl::enqueue(kernel(height * width, src_buf, channels, |
| height, width, pooled_h, pooled_w, |
| kernel_h, kernel_w, stride_h, stride_w, |
| randbuf, dst_buf)); |
| } |
| |
| |
| void OpenclPooling::Pooling_Forward_Sto_Test(Block* src, Block* dst, |
| const int height, const int width, |
| const int pooled_h, const int pooled_w, |
| const int kernel_h, const int kernel_w, |
| const int stride_h, const int stride_w, |
| const int channels, Context* ctx) { |
| auto ocl_ctx = viennacl::ocl::get_context(ctx->vcl_ctx_id); |
| auto kernel = ocl_ctx.get_kernel("opencl_pooling", "sto_pool_forward_test"); |
| |
| auto src_buf = WrapHandle(static_cast<cl_mem>(src->mutable_data()), ocl_ctx); |
| auto dst_buf = WrapHandle(static_cast<cl_mem>(dst->mutable_data()), ocl_ctx); |
| |
| viennacl::ocl::enqueue(kernel(height * width, src_buf, channels, |
| height, width, pooled_h, pooled_w, |
| kernel_h, kernel_w, stride_h, stride_w, |
| dst_buf)); |
| } |
| |
| |
| void OpenclPooling::Pooling_Backward_Max(Block* top, Block* mask, |
| const int num, const int channels, |
| const int height, const int width, |
| const int pooled_h, const int pooled_w, |
| const int kernel_h, const int kernel_w, |
| const int pad_h, const int pad_w, |
| const int stride_h, const int stride_w, |
| Block* bottom, Context* ctx) { |
| auto ocl_ctx = viennacl::ocl::get_context(ctx->vcl_ctx_id); |
| auto kernel = ocl_ctx.get_kernel("opencl_pooling", "max_pool_backward"); |
| |
| auto src_buf = WrapHandle(static_cast<cl_mem>(top->mutable_data()), ocl_ctx); |
| auto dst_buf = WrapHandle(static_cast<cl_mem>(bottom->mutable_data()), ocl_ctx); |
| auto mask_buf = WrapHandle(static_cast<cl_mem>(mask->mutable_data()), ocl_ctx); |
| |
| viennacl::ocl::enqueue(kernel(num, src_buf, mask_buf, channels, |
| height, width, pooled_h, pooled_w, |
| kernel_h, kernel_w, stride_h, stride_w, |
| pad_h, pad_w, dst_buf)); |
| } |
| |
| |
| void OpenclPooling::Pooling_Backward_Ave(Block* bottom, |
| const int num, const int channels, |
| const int height, const int width, |
| const int pooled_h, const int pooled_w, |
| const int kernel_h, const int kernel_w, |
| const int pad_h, const int pad_w, |
| const int stride_h, const int stride_w, |
| Block* top, Context* ctx) { |
| auto ocl_ctx = viennacl::ocl::get_context(ctx->vcl_ctx_id); |
| auto kernel = ocl_ctx.get_kernel("opencl_pooling", "ave_pool_backward"); |
| |
| auto src_buf = WrapHandle(static_cast<cl_mem>(bottom->mutable_data()), ocl_ctx); |
| auto dst_buf = WrapHandle(static_cast<cl_mem>(top->mutable_data()), ocl_ctx); |
| |
| viennacl::ocl::enqueue(kernel(num, src_buf, channels, |
| height, width, pooled_h, pooled_w, |
| kernel_h, kernel_w, stride_h, stride_w, |
| pad_h, pad_w, dst_buf)); |
| } |
| |
| |
| void OpenclPooling::Pooling_Backward_Sto(Block* src, Block* rand, Block* dst, |
| const int height, const int width, |
| const int pooled_h, const int pooled_w, |
| const int kernel_h, const int kernel_w, |
| const int stride_h, const int stride_w, |
| const int channels, Context* ctx) { |
| auto ocl_ctx = viennacl::ocl::get_context(ctx->vcl_ctx_id); |
| auto kernel = ocl_ctx.get_kernel("opencl_pooling", "sto_pool_backward"); |
| |
| auto src_buf = WrapHandle(static_cast<cl_mem>(src->mutable_data()), ocl_ctx); |
| auto dst_buf = WrapHandle(static_cast<cl_mem>(dst->mutable_data()), ocl_ctx); |
| auto randbuf = WrapHandle(static_cast<cl_mem>(rand->mutable_data()), ocl_ctx); |
| |
| viennacl::ocl::enqueue(kernel(height * width, randbuf, src_buf, channels, |
| height, width, pooled_h, pooled_w, |
| kernel_h, kernel_w, stride_h, stride_w, |
| dst_buf)); |
| } |
| |
| |
| } // namespace singa |
| |
| #endif // USE_OPENCL |