example/rcnn/symdata/bbox.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import numpy as np


 def bbox_flip(bbox, width, flip_x=False):
     """
     invalid value in bbox_transform if this wrong (no overlap), note index 0 and 2
     also note need to save before assignment
     :param bbox: [n][x1, y1, x2, y2]
     :param width: cv2 (height, width, channel)
     :param flip_x: will flip x1 and x2
     :return: flipped box
     """
     if flip_x:
         xmax = width - bbox[:, 0]
         xmin = width - bbox[:, 2]
         bbox[:, 0] = xmin
         bbox[:, 2] = xmax
     return bbox


 def bbox_overlaps(boxes, query_boxes):
     """
     determine overlaps between boxes and query_boxes
     :param boxes: n * 4 bounding boxes
     :param query_boxes: k * 4 bounding boxes
     :return: overlaps: n * k overlaps
     """
     n_ = boxes.shape[0]
     k_ = query_boxes.shape[0]
     overlaps = np.zeros((n_, k_), dtype=np.float)
     for k in range(k_):
         query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
         for n in range(n_):
             iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1
             if iw > 0:
                 ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1
                 if ih > 0:
                     box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1)
                     all_area = float(box_area + query_box_area - iw * ih)
                     overlaps[n, k] = iw * ih / all_area
     return overlaps


 def clip_boxes(boxes, im_shape):
     """
     Clip boxes to image boundaries.
     :param boxes: [N, 4* num_classes]
     :param im_shape: tuple of 2
     :return: [N, 4* num_classes]
     """
     # x1 >= 0
     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
     # y1 >= 0
     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
     # x2 < im_shape[1]
     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
     # y2 < im_shape[0]
     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
     return boxes


 def bbox_transform(ex_rois, gt_rois, box_stds):
     """
     compute bounding box regression targets from ex_rois to gt_rois
     :param ex_rois: [N, 4]
     :param gt_rois: [N, 4]
     :return: [N, 4]
     """
     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'

     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
     ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
     ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)

     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
     gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)
     gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)

     targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) / box_stds[0]
     targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) / box_stds[1]
     targets_dw = np.log(gt_widths / ex_widths) / box_stds[2]
     targets_dh = np.log(gt_heights / ex_heights) / box_stds[3]

     targets = np.vstack((targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
     return targets


 def bbox_pred(boxes, box_deltas, box_stds):
     """
     Transform the set of class-agnostic boxes into class-specific boxes
     by applying the predicted offsets (box_deltas)
     :param boxes: !important [N 4]
     :param box_deltas: [N, 4 * num_classes]
     :return: [N 4 * num_classes]
     """
     if boxes.shape[0] == 0:
         return np.zeros((0, box_deltas.shape[1]))

     widths = boxes[:, 2] - boxes[:, 0] + 1.0
     heights = boxes[:, 3] - boxes[:, 1] + 1.0
     ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
     ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)

     dx = box_deltas[:, 0::4] * box_stds[0]
     dy = box_deltas[:, 1::4] * box_stds[1]
     dw = box_deltas[:, 2::4] * box_stds[2]
     dh = box_deltas[:, 3::4] * box_stds[3]

     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
     pred_w = np.exp(dw) * widths[:, np.newaxis]
     pred_h = np.exp(dh) * heights[:, np.newaxis]

     pred_boxes = np.zeros(box_deltas.shape)
     # x1
     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0)
     # y1
     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0)
     # x2
     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0)
     # y2
     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0)

     return pred_boxes


 def nms(dets, thresh):
     """
     greedily select boxes with high confidence and overlap with current maximum <= thresh
     rule out overlap >= thresh
     :param dets: [[x1, y1, x2, y2 score]]
     :param thresh: retain overlap < thresh
     :return: indexes to keep
     """
     x1 = dets[:, 0]
     y1 = dets[:, 1]
     x2 = dets[:, 2]
     y2 = dets[:, 3]
     scores = dets[:, 4]

     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
     order = scores.argsort()[::-1]

     keep = []
     while order.size > 0:
         i = order[0]
         keep.append(i)
         xx1 = np.maximum(x1[i], x1[order[1:]])
         yy1 = np.maximum(y1[i], y1[order[1:]])
         xx2 = np.minimum(x2[i], x2[order[1:]])
         yy2 = np.minimum(y2[i], y2[order[1:]])

         w = np.maximum(0.0, xx2 - xx1 + 1)
         h = np.maximum(0.0, yy2 - yy1 + 1)
         inter = w * h
         ovr = inter / (areas[i] + areas[order[1:]] - inter)

         inds = np.where(ovr <= thresh)[0]
         order = order[inds + 1]

     return keep


 def im_detect(rois, scores, bbox_deltas, im_info,
               bbox_stds, nms_thresh, conf_thresh):
     """rois (nroi, 4), scores (nrois, nclasses), bbox_deltas (nrois, 4 * nclasses), im_info (3)"""
     rois = rois.asnumpy()
     scores = scores.asnumpy()
     bbox_deltas = bbox_deltas.asnumpy()

     im_info = im_info.asnumpy()
     height, width, scale = im_info

     # post processing
     pred_boxes = bbox_pred(rois, bbox_deltas, bbox_stds)
     pred_boxes = clip_boxes(pred_boxes, (height, width))

     # we used scaled image & roi to train, so it is necessary to transform them back
     pred_boxes = pred_boxes / scale

     # convert to per class detection results
     det = []
     for j in range(1, scores.shape[-1]):
         indexes = np.where(scores[:, j] > conf_thresh)[0]
         cls_scores = scores[indexes, j, np.newaxis]
         cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4]
         cls_dets = np.hstack((cls_boxes, cls_scores))
         keep = nms(cls_dets, thresh=nms_thresh)

         cls_id = np.ones_like(cls_scores) * j
         det.append(np.hstack((cls_id, cls_scores, cls_boxes))[keep, :])

     # assemble all classes
     det = np.concatenate(det, axis=0)
     return det
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import numpy as np


	def bbox_flip(bbox, width, flip_x=False):
	"""
	invalid value in bbox_transform if this wrong (no overlap), note index 0 and 2
	also note need to save before assignment
	:param bbox: [n][x1, y1, x2, y2]
	:param width: cv2 (height, width, channel)
	:param flip_x: will flip x1 and x2
	:return: flipped box
	"""
	if flip_x:
	xmax = width - bbox[:, 0]
	xmin = width - bbox[:, 2]
	bbox[:, 0] = xmin
	bbox[:, 2] = xmax
	return bbox


	def bbox_overlaps(boxes, query_boxes):
	"""
	determine overlaps between boxes and query_boxes
	:param boxes: n * 4 bounding boxes
	:param query_boxes: k * 4 bounding boxes
	:return: overlaps: n * k overlaps
	"""
	n_ = boxes.shape[0]
	k_ = query_boxes.shape[0]
	overlaps = np.zeros((n_, k_), dtype=np.float)
	for k in range(k_):
	query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
	for n in range(n_):
	iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1
	if iw > 0:
	ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1
	if ih > 0:
	box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1)
	all_area = float(box_area + query_box_area - iw * ih)
	overlaps[n, k] = iw * ih / all_area
	return overlaps


	def clip_boxes(boxes, im_shape):
	"""
	Clip boxes to image boundaries.
	:param boxes: [N, 4* num_classes]
	:param im_shape: tuple of 2
	:return: [N, 4* num_classes]
	"""
	# x1 >= 0
	boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
	# y1 >= 0
	boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
	# x2 < im_shape[1]
	boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
	# y2 < im_shape[0]
	boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
	return boxes


	def bbox_transform(ex_rois, gt_rois, box_stds):
	"""
	compute bounding box regression targets from ex_rois to gt_rois
	:param ex_rois: [N, 4]
	:param gt_rois: [N, 4]
	:return: [N, 4]
	"""
	assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'

	ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
	ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
	ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
	ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)

	gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
	gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
	gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)
	gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)

	targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) / box_stds[0]
	targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) / box_stds[1]
	targets_dw = np.log(gt_widths / ex_widths) / box_stds[2]
	targets_dh = np.log(gt_heights / ex_heights) / box_stds[3]

	targets = np.vstack((targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
	return targets


	def bbox_pred(boxes, box_deltas, box_stds):
	"""
	Transform the set of class-agnostic boxes into class-specific boxes
	by applying the predicted offsets (box_deltas)
	:param boxes: !important [N 4]
	:param box_deltas: [N, 4 * num_classes]
	:return: [N 4 * num_classes]
	"""
	if boxes.shape[0] == 0:
	return np.zeros((0, box_deltas.shape[1]))

	widths = boxes[:, 2] - boxes[:, 0] + 1.0
	heights = boxes[:, 3] - boxes[:, 1] + 1.0
	ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
	ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)

	dx = box_deltas[:, 0::4] * box_stds[0]
	dy = box_deltas[:, 1::4] * box_stds[1]
	dw = box_deltas[:, 2::4] * box_stds[2]
	dh = box_deltas[:, 3::4] * box_stds[3]

	pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
	pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
	pred_w = np.exp(dw) * widths[:, np.newaxis]
	pred_h = np.exp(dh) * heights[:, np.newaxis]

	pred_boxes = np.zeros(box_deltas.shape)
	# x1
	pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0)
	# y1
	pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0)
	# x2
	pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0)
	# y2
	pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0)

	return pred_boxes


	def nms(dets, thresh):
	"""
	greedily select boxes with high confidence and overlap with current maximum <= thresh
	rule out overlap >= thresh
	:param dets: [[x1, y1, x2, y2 score]]
	:param thresh: retain overlap < thresh
	:return: indexes to keep
	"""
	x1 = dets[:, 0]
	y1 = dets[:, 1]
	x2 = dets[:, 2]
	y2 = dets[:, 3]
	scores = dets[:, 4]

	areas = (x2 - x1 + 1) * (y2 - y1 + 1)
	order = scores.argsort()[::-1]

	keep = []
	while order.size > 0:
	i = order[0]
	keep.append(i)
	xx1 = np.maximum(x1[i], x1[order[1:]])
	yy1 = np.maximum(y1[i], y1[order[1:]])
	xx2 = np.minimum(x2[i], x2[order[1:]])
	yy2 = np.minimum(y2[i], y2[order[1:]])

	w = np.maximum(0.0, xx2 - xx1 + 1)
	h = np.maximum(0.0, yy2 - yy1 + 1)
	inter = w * h
	ovr = inter / (areas[i] + areas[order[1:]] - inter)

	inds = np.where(ovr <= thresh)[0]
	order = order[inds + 1]

	return keep


	def im_detect(rois, scores, bbox_deltas, im_info,
	bbox_stds, nms_thresh, conf_thresh):
	"""rois (nroi, 4), scores (nrois, nclasses), bbox_deltas (nrois, 4 * nclasses), im_info (3)"""
	rois = rois.asnumpy()
	scores = scores.asnumpy()
	bbox_deltas = bbox_deltas.asnumpy()

	im_info = im_info.asnumpy()
	height, width, scale = im_info

	# post processing
	pred_boxes = bbox_pred(rois, bbox_deltas, bbox_stds)
	pred_boxes = clip_boxes(pred_boxes, (height, width))

	# we used scaled image & roi to train, so it is necessary to transform them back
	pred_boxes = pred_boxes / scale

	# convert to per class detection results
	det = []
	for j in range(1, scores.shape[-1]):
	indexes = np.where(scores[:, j] > conf_thresh)[0]
	cls_scores = scores[indexes, j, np.newaxis]
	cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4]
	cls_dets = np.hstack((cls_boxes, cls_scores))
	keep = nms(cls_dets, thresh=nms_thresh)

	cls_id = np.ones_like(cls_scores) * j
	det.append(np.hstack((cls_id, cls_scores, cls_boxes))[keep, :])

	# assemble all classes
	det = np.concatenate(det, axis=0)
	return det