example/rcnn/symnet/proposal_target.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 """
 Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them.
 """

 import mxnet as mx
 import numpy as np

 from symdata.bbox import bbox_overlaps, bbox_transform


 def sample_rois(rois, gt_boxes, num_classes, rois_per_image, fg_rois_per_image, fg_overlap, box_stds):
     """
     generate random sample of ROIs comprising foreground and background examples
     :param rois: [n, 5] (batch_index, x1, y1, x2, y2)
     :param gt_boxes: [n, 5] (x1, y1, x2, y2, cls)
     :param num_classes: number of classes
     :param rois_per_image: total roi number
     :param fg_rois_per_image: foreground roi number
     :param fg_overlap: overlap threshold for fg rois
     :param box_stds: std var of bbox reg
     :return: (rois, labels, bbox_targets, bbox_weights)
     """
     overlaps = bbox_overlaps(rois[:, 1:], gt_boxes[:, :4])
     gt_assignment = overlaps.argmax(axis=1)
     labels = gt_boxes[gt_assignment, 4]
     max_overlaps = overlaps.max(axis=1)

     # select foreground RoI with FG_THRESH overlap
     fg_indexes = np.where(max_overlaps >= fg_overlap)[0]
     # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
     fg_rois_this_image = min(fg_rois_per_image, len(fg_indexes))
     # sample foreground regions without replacement
     if len(fg_indexes) > fg_rois_this_image:
         fg_indexes = np.random.choice(fg_indexes, size=fg_rois_this_image, replace=False)

     # select background RoIs as those within [0, FG_THRESH)
     bg_indexes = np.where(max_overlaps < fg_overlap)[0]
     # compute number of background RoIs to take from this image (guarding against there being fewer than desired)
     bg_rois_this_image = rois_per_image - fg_rois_this_image
     bg_rois_this_image = min(bg_rois_this_image, len(bg_indexes))
     # sample bg rois without replacement
     if len(bg_indexes) > bg_rois_this_image:
         bg_indexes = np.random.choice(bg_indexes, size=bg_rois_this_image, replace=False)

     # indexes selected
     keep_indexes = np.append(fg_indexes, bg_indexes)
     # pad more bg rois to ensure a fixed minibatch size
     while len(keep_indexes) < rois_per_image:
         gap = min(len(bg_indexes), rois_per_image - len(keep_indexes))
         gap_indexes = np.random.choice(range(len(bg_indexes)), size=gap, replace=False)
         keep_indexes = np.append(keep_indexes, bg_indexes[gap_indexes])

     # sample rois and labels
     rois = rois[keep_indexes]
     labels = labels[keep_indexes]
     # set labels of bg rois to be 0
     labels[fg_rois_this_image:] = 0

     # load or compute bbox_target
     targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4], box_stds=box_stds)
     bbox_targets = np.zeros((rois_per_image, 4 * num_classes), dtype=np.float32)
     bbox_weights = np.zeros((rois_per_image, 4 * num_classes), dtype=np.float32)
     for i in range(fg_rois_this_image):
         cls_ind = int(labels[i])
         bbox_targets[i, cls_ind * 4:(cls_ind + 1) * 4] = targets[i]
         bbox_weights[i, cls_ind * 4:(cls_ind + 1) * 4] = 1

     return rois, labels, bbox_targets, bbox_weights


 class ProposalTargetOperator(mx.operator.CustomOp):
     def __init__(self, num_classes, batch_images, batch_rois, fg_fraction, fg_overlap, box_stds):
         super(ProposalTargetOperator, self).__init__()
         self._num_classes = num_classes
         self._batch_images = batch_images
         self._batch_rois = batch_rois
         self._rois_per_image = int(batch_rois / batch_images)
         self._fg_rois_per_image = int(round(fg_fraction * self._rois_per_image))
         self._fg_overlap = fg_overlap
         self._box_stds = box_stds

     def forward(self, is_train, req, in_data, out_data, aux):
         assert self._batch_images == in_data[1].shape[0], 'check batch size of gt_boxes'

         all_rois = in_data[0].asnumpy()
         all_gt_boxes = in_data[1].asnumpy()

         rois = np.empty((0, 5), dtype=np.float32)
         labels = np.empty((0, ), dtype=np.float32)
         bbox_targets = np.empty((0, 4 * self._num_classes), dtype=np.float32)
         bbox_weights = np.empty((0, 4 * self._num_classes), dtype=np.float32)
         for batch_idx in range(self._batch_images):
             b_rois = all_rois[np.where(all_rois[:, 0] == batch_idx)[0]]
             b_gt_boxes = all_gt_boxes[batch_idx]
             b_gt_boxes = b_gt_boxes[np.where(b_gt_boxes[:, -1] > 0)[0]]

             # Include ground-truth boxes in the set of candidate rois
             batch_pad = batch_idx * np.ones((b_gt_boxes.shape[0], 1), dtype=b_gt_boxes.dtype)
             b_rois = np.vstack((b_rois, np.hstack((batch_pad, b_gt_boxes[:, :-1]))))

             b_rois, b_labels, b_bbox_targets, b_bbox_weights = \
                 sample_rois(b_rois, b_gt_boxes, num_classes=self._num_classes, rois_per_image=self._rois_per_image,
                             fg_rois_per_image=self._fg_rois_per_image, fg_overlap=self._fg_overlap, box_stds=self._box_stds)

             rois = np.vstack((rois, b_rois))
             labels = np.hstack((labels, b_labels))
             bbox_targets = np.vstack((bbox_targets, b_bbox_targets))
             bbox_weights = np.vstack((bbox_weights, b_bbox_weights))

         self.assign(out_data[0], req[0], rois)
         self.assign(out_data[1], req[1], labels)
         self.assign(out_data[2], req[2], bbox_targets)
         self.assign(out_data[3], req[3], bbox_weights)

     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
         self.assign(in_grad[0], req[0], 0)
         self.assign(in_grad[1], req[1], 0)


 @mx.operator.register('proposal_target')
 class ProposalTargetProp(mx.operator.CustomOpProp):
     def __init__(self, num_classes='21', batch_images='1', batch_rois='128', fg_fraction='0.25',
                  fg_overlap='0.5', box_stds='(0.1, 0.1, 0.2, 0.2)'):
         super(ProposalTargetProp, self).__init__(need_top_grad=False)
         self._num_classes = int(num_classes)
         self._batch_images = int(batch_images)
         self._batch_rois = int(batch_rois)
         self._fg_fraction = float(fg_fraction)
         self._fg_overlap = float(fg_overlap)
         self._box_stds = tuple(np.fromstring(box_stds[1:-1], dtype=float, sep=','))

     def list_arguments(self):
         return ['rois', 'gt_boxes']

     def list_outputs(self):
         return ['rois_output', 'label', 'bbox_target', 'bbox_weight']

     def infer_shape(self, in_shape):
         assert self._batch_rois % self._batch_images == 0, \
             'BATCHIMAGES {} must devide BATCH_ROIS {}'.format(self._batch_images, self._batch_rois)

         rpn_rois_shape = in_shape[0]
         gt_boxes_shape = in_shape[1]

         output_rois_shape = (self._batch_rois, 5)
         label_shape = (self._batch_rois, )
         bbox_target_shape = (self._batch_rois, self._num_classes * 4)
         bbox_weight_shape = (self._batch_rois, self._num_classes * 4)

         return [rpn_rois_shape, gt_boxes_shape], \
                [output_rois_shape, label_shape, bbox_target_shape, bbox_weight_shape]

     def create_operator(self, ctx, shapes, dtypes):
         return ProposalTargetOperator(self._num_classes, self._batch_images, self._batch_rois, self._fg_fraction,
                                       self._fg_overlap, self._box_stds)

     def declare_backward_dependency(self, out_grad, in_data, out_data):
         return []
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	"""
	Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them.
	"""

	import mxnet as mx
	import numpy as np

	from symdata.bbox import bbox_overlaps, bbox_transform


	def sample_rois(rois, gt_boxes, num_classes, rois_per_image, fg_rois_per_image, fg_overlap, box_stds):
	"""
	generate random sample of ROIs comprising foreground and background examples
	:param rois: [n, 5] (batch_index, x1, y1, x2, y2)
	:param gt_boxes: [n, 5] (x1, y1, x2, y2, cls)
	:param num_classes: number of classes
	:param rois_per_image: total roi number
	:param fg_rois_per_image: foreground roi number
	:param fg_overlap: overlap threshold for fg rois
	:param box_stds: std var of bbox reg
	:return: (rois, labels, bbox_targets, bbox_weights)
	"""
	overlaps = bbox_overlaps(rois[:, 1:], gt_boxes[:, :4])
	gt_assignment = overlaps.argmax(axis=1)
	labels = gt_boxes[gt_assignment, 4]
	max_overlaps = overlaps.max(axis=1)

	# select foreground RoI with FG_THRESH overlap
	fg_indexes = np.where(max_overlaps >= fg_overlap)[0]
	# guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
	fg_rois_this_image = min(fg_rois_per_image, len(fg_indexes))
	# sample foreground regions without replacement
	if len(fg_indexes) > fg_rois_this_image:
	fg_indexes = np.random.choice(fg_indexes, size=fg_rois_this_image, replace=False)

	# select background RoIs as those within [0, FG_THRESH)
	bg_indexes = np.where(max_overlaps < fg_overlap)[0]
	# compute number of background RoIs to take from this image (guarding against there being fewer than desired)
	bg_rois_this_image = rois_per_image - fg_rois_this_image
	bg_rois_this_image = min(bg_rois_this_image, len(bg_indexes))
	# sample bg rois without replacement
	if len(bg_indexes) > bg_rois_this_image:
	bg_indexes = np.random.choice(bg_indexes, size=bg_rois_this_image, replace=False)

	# indexes selected
	keep_indexes = np.append(fg_indexes, bg_indexes)
	# pad more bg rois to ensure a fixed minibatch size
	while len(keep_indexes) < rois_per_image:
	gap = min(len(bg_indexes), rois_per_image - len(keep_indexes))
	gap_indexes = np.random.choice(range(len(bg_indexes)), size=gap, replace=False)
	keep_indexes = np.append(keep_indexes, bg_indexes[gap_indexes])

	# sample rois and labels
	rois = rois[keep_indexes]
	labels = labels[keep_indexes]
	# set labels of bg rois to be 0
	labels[fg_rois_this_image:] = 0

	# load or compute bbox_target
	targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4], box_stds=box_stds)
	bbox_targets = np.zeros((rois_per_image, 4 * num_classes), dtype=np.float32)
	bbox_weights = np.zeros((rois_per_image, 4 * num_classes), dtype=np.float32)
	for i in range(fg_rois_this_image):
	cls_ind = int(labels[i])
	bbox_targets[i, cls_ind * 4:(cls_ind + 1) * 4] = targets[i]
	bbox_weights[i, cls_ind * 4:(cls_ind + 1) * 4] = 1

	return rois, labels, bbox_targets, bbox_weights


	class ProposalTargetOperator(mx.operator.CustomOp):
	def __init__(self, num_classes, batch_images, batch_rois, fg_fraction, fg_overlap, box_stds):
	super(ProposalTargetOperator, self).__init__()
	self._num_classes = num_classes
	self._batch_images = batch_images
	self._batch_rois = batch_rois
	self._rois_per_image = int(batch_rois / batch_images)
	self._fg_rois_per_image = int(round(fg_fraction * self._rois_per_image))
	self._fg_overlap = fg_overlap
	self._box_stds = box_stds

	def forward(self, is_train, req, in_data, out_data, aux):
	assert self._batch_images == in_data[1].shape[0], 'check batch size of gt_boxes'

	all_rois = in_data[0].asnumpy()
	all_gt_boxes = in_data[1].asnumpy()

	rois = np.empty((0, 5), dtype=np.float32)
	labels = np.empty((0, ), dtype=np.float32)
	bbox_targets = np.empty((0, 4 * self._num_classes), dtype=np.float32)
	bbox_weights = np.empty((0, 4 * self._num_classes), dtype=np.float32)
	for batch_idx in range(self._batch_images):
	b_rois = all_rois[np.where(all_rois[:, 0] == batch_idx)[0]]
	b_gt_boxes = all_gt_boxes[batch_idx]
	b_gt_boxes = b_gt_boxes[np.where(b_gt_boxes[:, -1] > 0)[0]]

	# Include ground-truth boxes in the set of candidate rois
	batch_pad = batch_idx * np.ones((b_gt_boxes.shape[0], 1), dtype=b_gt_boxes.dtype)
	b_rois = np.vstack((b_rois, np.hstack((batch_pad, b_gt_boxes[:, :-1]))))

	b_rois, b_labels, b_bbox_targets, b_bbox_weights = \
	sample_rois(b_rois, b_gt_boxes, num_classes=self._num_classes, rois_per_image=self._rois_per_image,
	fg_rois_per_image=self._fg_rois_per_image, fg_overlap=self._fg_overlap, box_stds=self._box_stds)

	rois = np.vstack((rois, b_rois))
	labels = np.hstack((labels, b_labels))
	bbox_targets = np.vstack((bbox_targets, b_bbox_targets))
	bbox_weights = np.vstack((bbox_weights, b_bbox_weights))

	self.assign(out_data[0], req[0], rois)
	self.assign(out_data[1], req[1], labels)
	self.assign(out_data[2], req[2], bbox_targets)
	self.assign(out_data[3], req[3], bbox_weights)

	def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
	self.assign(in_grad[0], req[0], 0)
	self.assign(in_grad[1], req[1], 0)


	@mx.operator.register('proposal_target')
	class ProposalTargetProp(mx.operator.CustomOpProp):
	def __init__(self, num_classes='21', batch_images='1', batch_rois='128', fg_fraction='0.25',
	fg_overlap='0.5', box_stds='(0.1, 0.1, 0.2, 0.2)'):
	super(ProposalTargetProp, self).__init__(need_top_grad=False)
	self._num_classes = int(num_classes)
	self._batch_images = int(batch_images)
	self._batch_rois = int(batch_rois)
	self._fg_fraction = float(fg_fraction)
	self._fg_overlap = float(fg_overlap)
	self._box_stds = tuple(np.fromstring(box_stds[1:-1], dtype=float, sep=','))

	def list_arguments(self):
	return ['rois', 'gt_boxes']

	def list_outputs(self):
	return ['rois_output', 'label', 'bbox_target', 'bbox_weight']

	def infer_shape(self, in_shape):
	assert self._batch_rois % self._batch_images == 0, \
	'BATCHIMAGES {} must devide BATCH_ROIS {}'.format(self._batch_images, self._batch_rois)

	rpn_rois_shape = in_shape[0]
	gt_boxes_shape = in_shape[1]

	output_rois_shape = (self._batch_rois, 5)
	label_shape = (self._batch_rois, )
	bbox_target_shape = (self._batch_rois, self._num_classes * 4)
	bbox_weight_shape = (self._batch_rois, self._num_classes * 4)

	return [rpn_rois_shape, gt_boxes_shape], \
	[output_rois_shape, label_shape, bbox_target_shape, bbox_weight_shape]

	def create_operator(self, ctx, shapes, dtypes):
	return ProposalTargetOperator(self._num_classes, self._batch_images, self._batch_rois, self._fg_fraction,
	self._fg_overlap, self._box_stds)

	def declare_backward_dependency(self, out_grad, in_data, out_data):
	return []