| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * \file roi_pooling.cc |
| * \brief roi pooling operator |
| * \author Ross Girshick, Kye-Hyeon Kim, Jian Guo, Xinyu Chen |
| */ |
| #include "./roi_pooling-inl.h" |
| #include <mshadow/base.h> |
| #include <mshadow/tensor.h> |
| #include <mshadow/packet-inl.h> |
| #include <mshadow/dot_engine-inl.h> |
| #include <cassert> |
| |
| using std::ceil; |
| using std::floor; |
| using std::max; |
| using std::min; |
| |
| namespace mshadow { |
| template <typename Dtype> |
| inline void ROIPoolForward(const Tensor<cpu, 4, Dtype>& out, |
| const Tensor<cpu, 4, Dtype>& data, |
| const Tensor<cpu, 2, Dtype>& bbox, |
| const Tensor<cpu, 4, index_t>& max_idx, |
| const float spatial_scale_) { |
| const Dtype* bottom_data = data.dptr_; |
| const Dtype* bottom_rois = bbox.dptr_; |
| Dtype* top_data = out.dptr_; |
| index_t* argmax_data = max_idx.dptr_; |
| const int batch_size = data.size(0); |
| const int channels_ = data.size(1); |
| const int height_ = data.size(2); |
| const int width_ = data.size(3); |
| const int pooled_height_ = out.size(2); |
| const int pooled_width_ = out.size(3); |
| |
| const int num_rois = bbox.size(0); |
| const index_t data_size = data.size(1) * data.size(2) * data.size(3); |
| const index_t data_size_c = data.size(2) * data.size(3); |
| const index_t out_size_c = out.size(2) * out.size(3); |
| const index_t out_size = channels_ * out_size_c; |
| const index_t max_idx_size_c = max_idx.size(2) * max_idx.size(3); |
| const index_t max_idx_size = channels_ * max_idx_size_c; |
| // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R |
| for (int n = 0; n < num_rois; ++n) { |
| // Increment ROI data pointer |
| const Dtype* bottom_rois_n = bottom_rois + n * bbox.size(1); |
| Dtype* top_data_n = top_data + n * out_size; |
| index_t* argmax_data_n = argmax_data + n * max_idx_size; |
| int roi_start_w = std::round(bottom_rois_n[1] * spatial_scale_); |
| int roi_start_h = std::round(bottom_rois_n[2] * spatial_scale_); |
| int roi_end_w = std::round(bottom_rois_n[3] * spatial_scale_); |
| int roi_end_h = std::round(bottom_rois_n[4] * spatial_scale_); |
| |
| int roi_batch_ind = static_cast<int>(bottom_rois_n[0]); |
| bool is_ind_invalid = (roi_batch_ind < 0) || (roi_batch_ind >= batch_size); |
| |
| // force malformed ROIs to be 1 * 1 |
| int roi_height = max(roi_end_h - roi_start_h + 1, 1); |
| int roi_width = max(roi_end_w - roi_start_w + 1, 1); |
| const Dtype bin_size_h = static_cast<Dtype>(roi_height) / static_cast<Dtype>(pooled_height_); |
| const Dtype bin_size_w = static_cast<Dtype>(roi_width) / static_cast<Dtype>(pooled_width_); |
| |
| index_t offset_batch_data = data_size * roi_batch_ind; |
| |
| #pragma omp parallel for |
| for (int c = 0; c < channels_; ++c) { |
| // Increment all data pointers |
| index_t offset_batch_data_c = offset_batch_data + c * data_size_c; |
| const Dtype* batch_data_c = bottom_data + offset_batch_data_c; |
| Dtype* top_data_c = top_data_n + c * out_size_c; |
| index_t* argmax_data_c = argmax_data_n + c * max_idx_size_c; |
| |
| for (int ph = 0; ph < pooled_height_; ++ph) { |
| for (int pw = 0; pw < pooled_width_; ++pw) { |
| // Compute pooling region for this output unit: |
| // start (included) = floor(ph * roi_height / pooled_height_) |
| // end (excluded) = ceil((ph + 1) * roi_height / pooled_height_) |
| int hstart = static_cast<int>(floor(static_cast<Dtype>(ph) * bin_size_h)); |
| int wstart = static_cast<int>(floor(static_cast<Dtype>(pw) * bin_size_w)); |
| int hend = static_cast<int>(ceil(static_cast<Dtype>(ph + 1) * bin_size_h)); |
| int wend = static_cast<int>(ceil(static_cast<Dtype>(pw + 1) * bin_size_w)); |
| |
| hstart = min(max(hstart + roi_start_h, 0), height_); |
| hend = min(max(hend + roi_start_h, 0), height_); |
| wstart = min(max(wstart + roi_start_w, 0), width_); |
| wend = min(max(wend + roi_start_w, 0), width_); |
| |
| bool is_empty = (hend <= hstart) || (wend <= wstart); |
| |
| const index_t pool_index = ph * pooled_width_ + pw; |
| if (is_empty || is_ind_invalid) { |
| top_data_c[pool_index] = 0; |
| argmax_data_c[pool_index] = -1; |
| continue; |
| } |
| |
| for (int h = hstart; h < hend; ++h) { |
| for (int w = wstart; w < wend; ++w) { |
| const index_t index = h * width_ + w; |
| if (batch_data_c[index] > top_data_c[pool_index]) { |
| top_data_c[pool_index] = batch_data_c[index]; |
| argmax_data_c[pool_index] = offset_batch_data_c + index; |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| return; |
| } |
| |
| template <typename Dtype> |
| inline void ROIPoolBackwardAcc(const Tensor<cpu, 4, Dtype>& in_grad, |
| const Tensor<cpu, 4, Dtype>& out_grad, |
| const Tensor<cpu, 2, Dtype>& bbox, |
| const Tensor<cpu, 4, index_t>& max_idx, |
| const float spatial_scale_) { |
| const Dtype* top_diff = out_grad.dptr_; |
| Dtype* bottom_diff = in_grad.dptr_; |
| index_t* argmax_data = max_idx.dptr_; |
| |
| const index_t count = out_grad.shape_.Size(); |
| |
| for (int index = 0; index < count; ++index) { |
| index_t max_idx = argmax_data[index]; |
| if (max_idx >= 0) { |
| bottom_diff[max_idx] += top_diff[index]; |
| } |
| } |
| |
| return; |
| } |
| } // namespace mshadow |
| |
| namespace mxnet { |
| namespace op { |
| |
| template <> |
| Operator* CreateOp<cpu>(ROIPoolingParam param, int dtype) { |
| Operator* op = nullptr; |
| MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { op = new ROIPoolingOp<cpu, DType>(param); }); |
| return op; |
| } |
| |
| Operator* ROIPoolingProp::CreateOperatorEx(Context ctx, |
| mxnet::ShapeVector* in_shape, |
| std::vector<int>* in_type) const { |
| DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); |
| } |
| |
| DMLC_REGISTER_PARAMETER(ROIPoolingParam); |
| |
| MXNET_REGISTER_OP_PROPERTY(ROIPooling, ROIPoolingProp) |
| .describe(R"code(Performs region of interest(ROI) pooling on the input array. |
| |
| ROI pooling is a variant of a max pooling layer, in which the output size is fixed and |
| region of interest is a parameter. Its purpose is to perform max pooling on the inputs |
| of non-uniform sizes to obtain fixed-size feature maps. ROI pooling is a neural-net |
| layer mostly used in training a `Fast R-CNN` network for object detection. |
| |
| This operator takes a 4D feature map as an input array and region proposals as `rois`, |
| then it pools over sub-regions of input and produces a fixed-sized output array |
| regardless of the ROI size. |
| |
| To crop the feature map accordingly, you can resize the bounding box coordinates |
| by changing the parameters `rois` and `spatial_scale`. |
| |
| The cropped feature maps are pooled by standard max pooling operation to a fixed size output |
| indicated by a `pooled_size` parameter. batch_size will change to the number of region |
| bounding boxes after `ROIPooling`. |
| |
| The size of each region of interest doesn't have to be perfectly divisible by |
| the number of pooling sections(`pooled_size`). |
| |
| Example:: |
| |
| x = [[[[ 0., 1., 2., 3., 4., 5.], |
| [ 6., 7., 8., 9., 10., 11.], |
| [ 12., 13., 14., 15., 16., 17.], |
| [ 18., 19., 20., 21., 22., 23.], |
| [ 24., 25., 26., 27., 28., 29.], |
| [ 30., 31., 32., 33., 34., 35.], |
| [ 36., 37., 38., 39., 40., 41.], |
| [ 42., 43., 44., 45., 46., 47.]]]] |
| |
| // region of interest i.e. bounding box coordinates. |
| y = [[0,0,0,4,4]] |
| |
| // returns array of shape (2,2) according to the given roi with max pooling. |
| ROIPooling(x, y, (2,2), 1.0) = [[[[ 14., 16.], |
| [ 26., 28.]]]] |
| |
| // region of interest is changed due to the change in `spacial_scale` parameter. |
| ROIPooling(x, y, (2,2), 0.7) = [[[[ 7., 9.], |
| [ 19., 21.]]]] |
| |
| )code" ADD_FILELINE) |
| .add_argument("data", |
| "NDArray-or-Symbol", |
| "The input array to the pooling operator, " |
| " a 4D Feature maps ") |
| .add_argument("rois", |
| "NDArray-or-Symbol", |
| "Bounding box coordinates, a 2D array of " |
| "[[batch_index, x1, y1, x2, y2]], where (x1, y1) and (x2, y2) are top left and " |
| "bottom right " |
| "corners of designated region of interest. `batch_index` indicates the index of " |
| "corresponding " |
| "image in the input array") |
| .add_arguments(ROIPoolingParam::__FIELDS__()); |
| |
| NNVM_REGISTER_OP(ROIPooling).add_alias("_npx_roi_pooling"); |
| |
| } // namespace op |
| } // namespace mxnet |