| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import numpy as np |
| |
| |
| def bbox_flip(bbox, width, flip_x=False): |
| """ |
| invalid value in bbox_transform if this wrong (no overlap), note index 0 and 2 |
| also note need to save before assignment |
| :param bbox: [n][x1, y1, x2, y2] |
| :param width: cv2 (height, width, channel) |
| :param flip_x: will flip x1 and x2 |
| :return: flipped box |
| """ |
| if flip_x: |
| xmax = width - bbox[:, 0] |
| xmin = width - bbox[:, 2] |
| bbox[:, 0] = xmin |
| bbox[:, 2] = xmax |
| return bbox |
| |
| |
| def bbox_overlaps(boxes, query_boxes): |
| """ |
| determine overlaps between boxes and query_boxes |
| :param boxes: n * 4 bounding boxes |
| :param query_boxes: k * 4 bounding boxes |
| :return: overlaps: n * k overlaps |
| """ |
| n_ = boxes.shape[0] |
| k_ = query_boxes.shape[0] |
| overlaps = np.zeros((n_, k_), dtype=np.float) |
| for k in range(k_): |
| query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) |
| for n in range(n_): |
| iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 |
| if iw > 0: |
| ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 |
| if ih > 0: |
| box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) |
| all_area = float(box_area + query_box_area - iw * ih) |
| overlaps[n, k] = iw * ih / all_area |
| return overlaps |
| |
| |
| def clip_boxes(boxes, im_shape): |
| """ |
| Clip boxes to image boundaries. |
| :param boxes: [N, 4* num_classes] |
| :param im_shape: tuple of 2 |
| :return: [N, 4* num_classes] |
| """ |
| # x1 >= 0 |
| boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) |
| # y1 >= 0 |
| boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) |
| # x2 < im_shape[1] |
| boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) |
| # y2 < im_shape[0] |
| boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) |
| return boxes |
| |
| |
| def bbox_transform(ex_rois, gt_rois, box_stds): |
| """ |
| compute bounding box regression targets from ex_rois to gt_rois |
| :param ex_rois: [N, 4] |
| :param gt_rois: [N, 4] |
| :return: [N, 4] |
| """ |
| assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' |
| |
| ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 |
| ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 |
| ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) |
| ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) |
| |
| gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 |
| gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 |
| gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0) |
| gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0) |
| |
| targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) / box_stds[0] |
| targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) / box_stds[1] |
| targets_dw = np.log(gt_widths / ex_widths) / box_stds[2] |
| targets_dh = np.log(gt_heights / ex_heights) / box_stds[3] |
| |
| targets = np.vstack((targets_dx, targets_dy, targets_dw, targets_dh)).transpose() |
| return targets |
| |
| |
| def bbox_pred(boxes, box_deltas, box_stds): |
| """ |
| Transform the set of class-agnostic boxes into class-specific boxes |
| by applying the predicted offsets (box_deltas) |
| :param boxes: !important [N 4] |
| :param box_deltas: [N, 4 * num_classes] |
| :return: [N 4 * num_classes] |
| """ |
| if boxes.shape[0] == 0: |
| return np.zeros((0, box_deltas.shape[1])) |
| |
| widths = boxes[:, 2] - boxes[:, 0] + 1.0 |
| heights = boxes[:, 3] - boxes[:, 1] + 1.0 |
| ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) |
| ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) |
| |
| dx = box_deltas[:, 0::4] * box_stds[0] |
| dy = box_deltas[:, 1::4] * box_stds[1] |
| dw = box_deltas[:, 2::4] * box_stds[2] |
| dh = box_deltas[:, 3::4] * box_stds[3] |
| |
| pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] |
| pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] |
| pred_w = np.exp(dw) * widths[:, np.newaxis] |
| pred_h = np.exp(dh) * heights[:, np.newaxis] |
| |
| pred_boxes = np.zeros(box_deltas.shape) |
| # x1 |
| pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0) |
| # y1 |
| pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0) |
| # x2 |
| pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0) |
| # y2 |
| pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0) |
| |
| return pred_boxes |
| |
| |
| def nms(dets, thresh): |
| """ |
| greedily select boxes with high confidence and overlap with current maximum <= thresh |
| rule out overlap >= thresh |
| :param dets: [[x1, y1, x2, y2 score]] |
| :param thresh: retain overlap < thresh |
| :return: indexes to keep |
| """ |
| x1 = dets[:, 0] |
| y1 = dets[:, 1] |
| x2 = dets[:, 2] |
| y2 = dets[:, 3] |
| scores = dets[:, 4] |
| |
| areas = (x2 - x1 + 1) * (y2 - y1 + 1) |
| order = scores.argsort()[::-1] |
| |
| keep = [] |
| while order.size > 0: |
| i = order[0] |
| keep.append(i) |
| xx1 = np.maximum(x1[i], x1[order[1:]]) |
| yy1 = np.maximum(y1[i], y1[order[1:]]) |
| xx2 = np.minimum(x2[i], x2[order[1:]]) |
| yy2 = np.minimum(y2[i], y2[order[1:]]) |
| |
| w = np.maximum(0.0, xx2 - xx1 + 1) |
| h = np.maximum(0.0, yy2 - yy1 + 1) |
| inter = w * h |
| ovr = inter / (areas[i] + areas[order[1:]] - inter) |
| |
| inds = np.where(ovr <= thresh)[0] |
| order = order[inds + 1] |
| |
| return keep |
| |
| |
| def im_detect(rois, scores, bbox_deltas, im_info, |
| bbox_stds, nms_thresh, conf_thresh): |
| """rois (nroi, 4), scores (nrois, nclasses), bbox_deltas (nrois, 4 * nclasses), im_info (3)""" |
| rois = rois.asnumpy() |
| scores = scores.asnumpy() |
| bbox_deltas = bbox_deltas.asnumpy() |
| |
| im_info = im_info.asnumpy() |
| height, width, scale = im_info |
| |
| # post processing |
| pred_boxes = bbox_pred(rois, bbox_deltas, bbox_stds) |
| pred_boxes = clip_boxes(pred_boxes, (height, width)) |
| |
| # we used scaled image & roi to train, so it is necessary to transform them back |
| pred_boxes = pred_boxes / scale |
| |
| # convert to per class detection results |
| det = [] |
| for j in range(1, scores.shape[-1]): |
| indexes = np.where(scores[:, j] > conf_thresh)[0] |
| cls_scores = scores[indexes, j, np.newaxis] |
| cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4] |
| cls_dets = np.hstack((cls_boxes, cls_scores)) |
| keep = nms(cls_dets, thresh=nms_thresh) |
| |
| cls_id = np.ones_like(cls_scores) * j |
| det.append(np.hstack((cls_id, cls_scores, cls_boxes))[keep, :]) |
| |
| # assemble all classes |
| det = np.concatenate(det, axis=0) |
| return det |