blob: 156c01853a8efe88ed6e1f75fec00aaa16c10b4c [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import numpy as np
import cv2
def get_image(roi_rec, short, max_size, mean, std):
"""
read, resize, transform image, return im_tensor, im_info, gt_boxes
roi_rec should have keys: ["image", "boxes", "gt_classes", "flipped"]
0 --- x (width, second dim of im)
|
y (height, first dim of im)
"""
im = imdecode(roi_rec['image'])
if roi_rec["flipped"]:
im = im[:, ::-1, :]
im, im_scale = resize(im, short, max_size)
height, width = im.shape[:2]
im_info = np.array([height, width, im_scale], dtype=np.float32)
im_tensor = transform(im, mean, std)
# gt boxes: (x1, y1, x2, y2, cls)
if roi_rec['gt_classes'].size > 0:
gt_inds = np.where(roi_rec['gt_classes'] != 0)[0]
gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
gt_boxes[:, 0:4] = roi_rec['boxes'][gt_inds, :]
gt_boxes[:, 4] = roi_rec['gt_classes'][gt_inds]
# scale gt_boxes
gt_boxes[:, 0:4] *= im_scale
else:
gt_boxes = np.empty((0, 5), dtype=np.float32)
return im_tensor, im_info, gt_boxes
def imdecode(image_path):
"""Return BGR image read by opencv"""
import os
assert os.path.exists(image_path), image_path + ' not found'
im = cv2.imread(image_path)
return im
def resize(im, short, max_size):
"""
only resize input image to target size and return scale
:param im: BGR image input by opencv
:param short: one dimensional size (the short side)
:param max_size: one dimensional max size (the long side)
:return: resized image (NDArray) and scale (float)
"""
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
im_scale = float(short) / float(im_size_min)
# prevent bigger axis from being more than max_size:
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
return im, im_scale
def transform(im, mean, std):
"""
transform into mxnet tensor,
subtract pixel size and transform to correct format
:param im: [height, width, channel] in BGR
:param mean: [RGB pixel mean]
:param std: [RGB pixel std var]
:return: [batch, channel, height, width]
"""
im_tensor = np.zeros((3, im.shape[0], im.shape[1]))
for i in range(3):
im_tensor[i, :, :] = (im[:, :, 2 - i] - mean[i]) / std[i]
return im_tensor
def transform_inverse(im_tensor, mean, std):
"""
transform from mxnet im_tensor to ordinary RGB image
im_tensor is limited to one image
:param im_tensor: [batch, channel, height, width]
:param mean: [RGB pixel mean]
:param std: [RGB pixel std var]
:return: im [height, width, channel(RGB)]
"""
assert im_tensor.shape[0] == 3
im = im_tensor.transpose((1, 2, 0))
im = im * std + mean
im = im.astype(np.uint8)
return im
def tensor_vstack(tensor_list, pad=0):
"""
vertically stack tensors by adding a new axis
expand dims if only 1 tensor
:param tensor_list: list of tensor to be stacked vertically
:param pad: label to pad with
:return: tensor with max shape
"""
if len(tensor_list) == 1:
return tensor_list[0][np.newaxis, :]
ndim = len(tensor_list[0].shape)
dimensions = [len(tensor_list)] # first dim is batch size
for dim in range(ndim):
dimensions.append(max([tensor.shape[dim] for tensor in tensor_list]))
dtype = tensor_list[0].dtype
if pad == 0:
all_tensor = np.zeros(tuple(dimensions), dtype=dtype)
elif pad == 1:
all_tensor = np.ones(tuple(dimensions), dtype=dtype)
else:
all_tensor = np.full(tuple(dimensions), pad, dtype=dtype)
if ndim == 1:
for ind, tensor in enumerate(tensor_list):
all_tensor[ind, :tensor.shape[0]] = tensor
elif ndim == 2:
for ind, tensor in enumerate(tensor_list):
all_tensor[ind, :tensor.shape[0], :tensor.shape[1]] = tensor
elif ndim == 3:
for ind, tensor in enumerate(tensor_list):
all_tensor[ind, :tensor.shape[0], :tensor.shape[1], :tensor.shape[2]] = tensor
else:
raise Exception('Sorry, unimplemented.')
return all_tensor