| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import os |
| from collections import namedtuple |
| from uuid import uuid4 |
| import numpy as _np |
| import mxnet as mx |
| from mxnet import gluon, autograd, np, npx |
| from mxnet.test_utils import use_np, assert_almost_equal, check_gluon_hybridize_consistency, same, check_symbolic_backward |
| from common import assertRaises, xfail_when_nonstandard_decimal_separator |
| import random |
| from mxnet.base import MXNetError |
| from mxnet.gluon.data.vision import transforms |
| from mxnet import image |
| import pytest |
| |
| @use_np |
| def test_to_tensor(): |
| # 3D Input |
| data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8) |
| out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) |
| assert_almost_equal(out_nd.asnumpy(), np.transpose( |
| data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1))) |
| |
| # 4D Input |
| data_in = np.random.uniform(0, 255, (5, 300, 300, 3)).astype(dtype=np.uint8) |
| out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) |
| assert_almost_equal(out_nd.asnumpy(), np.transpose( |
| data_in.astype(dtype=np.float32) / 255.0, (0, 3, 1, 2))) |
| |
| # Invalid Input |
| invalid_data_in = np.random.uniform(0, 255, (5, 5, 300, 300, 3)).astype(dtype=np.uint8) |
| transformer = transforms.ToTensor() |
| assertRaises(MXNetError, transformer, invalid_data_in) |
| |
| # Bounds (0->0, 255->1) |
| data_in = np.zeros((10, 20, 3)).astype(dtype=np.uint8) |
| out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) |
| assert same(out_nd.asnumpy(), np.transpose(np.zeros(data_in.shape, dtype=np.float32), (2, 0, 1))) |
| |
| data_in = np.full((10, 20, 3), 255).astype(dtype=np.uint8) |
| out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) |
| assert same(out_nd.asnumpy(), np.transpose(np.ones(data_in.shape, dtype=np.float32), (2, 0, 1))) |
| |
| |
| @use_np |
| def test_normalize(): |
| # 3D Input |
| data_in_3d = np.random.uniform(0, 1, (3, 300, 300)) |
| out_nd_3d = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in_3d) |
| data_expected_3d = data_in_3d.asnumpy() |
| data_expected_3d[:][:][0] = data_expected_3d[:][:][0] / 3.0 |
| data_expected_3d[:][:][1] = (data_expected_3d[:][:][1] - 1.0) / 2.0 |
| data_expected_3d[:][:][2] = data_expected_3d[:][:][2] - 2.0 |
| assert_almost_equal(data_expected_3d, out_nd_3d.asnumpy()) |
| |
| # 4D Input |
| data_in_4d = np.random.uniform(0, 1, (2, 3, 300, 300)) |
| out_nd_4d = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in_4d) |
| data_expected_4d = data_in_4d.asnumpy() |
| data_expected_4d[0][:][:][0] = data_expected_4d[0][:][:][0] / 3.0 |
| data_expected_4d[0][:][:][1] = (data_expected_4d[0][:][:][1] - 1.0) / 2.0 |
| data_expected_4d[0][:][:][2] = data_expected_4d[0][:][:][2] - 2.0 |
| data_expected_4d[1][:][:][0] = data_expected_4d[1][:][:][0] / 3.0 |
| data_expected_4d[1][:][:][1] = (data_expected_4d[1][:][:][1] - 1.0) / 2.0 |
| data_expected_4d[1][:][:][2] = data_expected_4d[1][:][:][2] - 2.0 |
| assert_almost_equal(data_expected_4d, out_nd_4d.asnumpy()) |
| |
| # Invalid Input - Neither 3D or 4D input |
| invalid_data_in = np.random.uniform(0, 1, (5, 5, 3, 300, 300)) |
| normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1)) |
| assertRaises(MXNetError, normalize_transformer, invalid_data_in) |
| |
| # Invalid Input - Channel neither 1 or 3 |
| invalid_data_in = np.random.uniform(0, 1, (5, 4, 300, 300)) |
| normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1)) |
| assertRaises(MXNetError, normalize_transformer, invalid_data_in) |
| |
| |
| @use_np |
| def test_resize(): |
| def _test_resize_with_diff_type(dtype): |
| # test normal case |
| data_in = np.random.uniform(0, 255, (300, 200, 3)).astype(dtype) |
| out_nd = transforms.Resize(200)(data_in) |
| data_expected = mx.image.imresize(data_in, 200, 200, 1) |
| assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) |
| # test 4D input |
| data_bath_in = np.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype) |
| out_batch_nd = transforms.Resize(200)(data_bath_in) |
| for i in range(len(out_batch_nd)): |
| assert_almost_equal(mx.image.imresize(data_bath_in[i], 200, 200, 1).asnumpy(), |
| out_batch_nd[i].asnumpy()) |
| # test interp = 2 |
| out_nd = transforms.Resize(200, interpolation=2)(data_in) |
| data_expected = mx.image.imresize(data_in, 200, 200, 2) |
| assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) |
| # test height not equals to width |
| out_nd = transforms.Resize((200, 100))(data_in) |
| data_expected = mx.image.imresize(data_in, 200, 100, 1) |
| assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) |
| # test keep_ratio |
| out_nd = transforms.Resize(150, keep_ratio=True)(data_in) |
| data_expected = mx.image.imresize(data_in, 150, 225, 1) |
| assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) |
| # test size below zero |
| invalid_transform = transforms.Resize(-150, keep_ratio=True) |
| assertRaises(MXNetError, invalid_transform, data_in) |
| # test size more than 2: |
| invalid_transform = transforms.Resize((100, 100, 100), keep_ratio=True) |
| assertRaises(MXNetError, invalid_transform, data_in) |
| |
| for dtype in ['uint8', 'float32', 'float64']: |
| _test_resize_with_diff_type(dtype) |
| |
| |
| @use_np |
| def test_crop_resize(): |
| def _test_crop_resize_with_diff_type(dtype): |
| # test normal case |
| data_in = np.arange(60).reshape((5, 4, 3)).astype(dtype) |
| out_nd = transforms.CropResize(0, 0, 3, 2)(data_in) |
| out_np = out_nd.asnumpy() |
| assert(out_np.sum() == 180) |
| assert((out_np[0:2,1,1].flatten() == [4, 16]).all()) |
| # test 4D input |
| data_bath_in = np.arange(180).reshape((2, 6, 5, 3)).astype(dtype) |
| out_batch_nd = transforms.CropResize(1, 2, 3, 4)(data_bath_in) |
| out_batch_np = out_batch_nd.asnumpy() |
| assert(out_batch_np.sum() == 7524) |
| assert((out_batch_np[0:2,0:4,1,1].flatten() == [37, 52, 67, 82, 127, 142, 157, 172]).all()) |
| # test normal case with resize |
| data_in = np.random.uniform(0, 255, (300, 200, 3)).astype(dtype) |
| out_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 1)(data_in) |
| data_expected = transforms.Resize(size=25, interpolation=1)(data_in[:50, :100, :3]) #nd.slice(data_in, (0, 0, 0), (50, 100, 3))) |
| assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) |
| # test 4D input with resize |
| data_bath_in = np.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype) |
| out_batch_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 1)(data_bath_in) |
| for i in range(len(out_batch_nd)): |
| actual = transforms.Resize(size=25, interpolation=1)(data_bath_in[i][:50, :100, :3]).asnumpy() #(nd.slice(data_bath_in[i], (0, 0, 0), (50, 100, 3))).asnumpy() |
| expected = out_batch_nd[i].asnumpy() |
| assert_almost_equal(expected, actual) |
| # test with resize height and width should be greater than 0 |
| transformer = transforms.CropResize(0, 0, 100, 50, (-25, 25), 1) |
| assertRaises(MXNetError, transformer, data_in) |
| # test height and width should be greater than 0 |
| transformer = transforms.CropResize(0, 0, -100, -50) |
| assertRaises(MXNetError, transformer, data_in) |
| # test cropped area is bigger than input data |
| transformer = transforms.CropResize(150, 200, 200, 500) |
| assertRaises(MXNetError, transformer, data_in) |
| assertRaises(MXNetError, transformer, data_bath_in) |
| |
| for dtype in ['uint8', 'float32', 'float64']: |
| _test_crop_resize_with_diff_type(dtype) |
| |
| # test npx.image.crop backward |
| def test_crop_backward(test_nd_arr, TestCase): |
| a_np = test_nd_arr.asnumpy() |
| b_np = a_np[(slice(TestCase.y, TestCase.y + TestCase.height), slice(TestCase.x, TestCase.x + TestCase.width), slice(0, 3))] |
| |
| data = mx.sym.Variable('data') |
| crop_sym = mx.sym.image.crop(data, TestCase.x, TestCase.y, TestCase.width, TestCase.height) |
| |
| expected_in_grad = np.zeros_like(np.array(a_np)) |
| expected_in_grad[(slice(TestCase.y, TestCase.y + TestCase.height), slice(TestCase.x, TestCase.x + TestCase.width), slice(0, 3))] = b_np |
| check_symbolic_backward(crop_sym, [a_np], [b_np], [expected_in_grad]) |
| |
| TestCase = namedtuple('TestCase', ['x', 'y', 'width', 'height']) |
| test_list = [TestCase(0, 0, 3, 3), TestCase(2, 1, 1, 2), TestCase(0, 1, 3, 2)] |
| |
| for dtype in ['uint8', 'float32', 'float64']: |
| data_in = np.arange(60).reshape((5, 4, 3)).astype(dtype) |
| for test_case in test_list: |
| test_crop_backward(data_in, test_case) |
| |
| |
| @use_np |
| def test_flip_left_right(): |
| data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8) |
| flip_in = data_in[:, ::-1, :] |
| data_trans = npx.image.flip_left_right(np.array(data_in, dtype='uint8')) |
| assert_almost_equal(flip_in, data_trans.asnumpy()) |
| |
| |
| @use_np |
| def test_flip_top_bottom(): |
| data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8) |
| flip_in = data_in[::-1, :, :] |
| data_trans = npx.image.flip_top_bottom(np.array(data_in, dtype='uint8')) |
| assert_almost_equal(flip_in, data_trans.asnumpy()) |
| |
| |
| @use_np |
| def test_transformer(): |
| from mxnet.gluon.data.vision import transforms |
| |
| transform = transforms.Compose([ |
| transforms.Resize(300), |
| transforms.Resize(300, keep_ratio=True), |
| transforms.CenterCrop(256), |
| transforms.RandomCrop(256, pad=16), |
| transforms.RandomResizedCrop(224), |
| transforms.RandomFlipLeftRight(), |
| transforms.RandomColorJitter(0.1, 0.1, 0.1, 0.1), |
| transforms.RandomBrightness(0.1), |
| transforms.RandomContrast(0.1), |
| transforms.RandomSaturation(0.1), |
| transforms.RandomHue(0.1), |
| transforms.RandomLighting(0.1), |
| transforms.ToTensor(), |
| transforms.RandomRotation([-10., 10.]), |
| transforms.Normalize([0, 0, 0], [1, 1, 1])]) |
| |
| transform(mx.np.ones((245, 480, 3), dtype='uint8')).wait_to_read() |
| |
| @use_np |
| def test_random_crop(): |
| x = mx.np.ones((245, 480, 3), dtype='uint8') |
| y = mx.npx.image.random_crop(x, width=100, height=100) |
| assert y.shape == (100, 100, 3) |
| |
| @use_np |
| def test_random_resize_crop(): |
| x = mx.np.ones((245, 480, 3), dtype='uint8') |
| y = mx.npx.image.random_resized_crop(x, width=100, height=100) |
| assert y.shape == (100, 100, 3) |
| |
| @use_np |
| def test_hybrid_transformer(): |
| from mxnet.gluon.data.vision import transforms |
| |
| transform = transforms.HybridCompose([ |
| transforms.Resize(300), |
| transforms.Resize(300, keep_ratio=True), |
| transforms.CenterCrop(256), |
| transforms.RandomCrop(256, pad=16), |
| transforms.RandomFlipLeftRight(), |
| transforms.RandomColorJitter(0.1, 0.1, 0.1, 0.1), |
| transforms.RandomBrightness(0.1), |
| transforms.RandomContrast(0.1), |
| transforms.RandomSaturation(0.1), |
| transforms.RandomHue(0.1), |
| transforms.RandomLighting(0.1), |
| transforms.ToTensor(), |
| transforms.Normalize([0, 0, 0], [1, 1, 1])]) |
| |
| transform(mx.np.ones((245, 480, 3), dtype='uint8')).wait_to_read() |
| |
| @xfail_when_nonstandard_decimal_separator |
| @use_np |
| def test_rotate(): |
| transformer = transforms.Rotate(10.) |
| assertRaises(TypeError, transformer, mx.np.ones((3, 30, 60), dtype='uint8')) |
| single_image = mx.np.ones((3, 30, 60), dtype='float32') |
| single_output = transformer(single_image) |
| assert same(single_output.shape, (3, 30, 60)) |
| batch_image = mx.np.ones((3, 3, 30, 60), dtype='float32') |
| batch_output = transformer(batch_image) |
| assert same(batch_output.shape, (3, 3, 30, 60)) |
| |
| input_image = np.array([[[0., 0., 0.], |
| [0., 0., 1.], |
| [0., 0., 0.]]]) |
| rotation_angles_expected_outs = [ |
| (90., np.array([[[0., 1., 0.], |
| [0., 0., 0.], |
| [0., 0., 0.]]])), |
| (180., np.array([[[0., 0., 0.], |
| [1., 0., 0.], |
| [0., 0., 0.]]])), |
| (270., np.array([[[0., 0., 0.], |
| [0., 0., 0.], |
| [0., 1., 0.]]])), |
| (360., np.array([[[0., 0., 0.], |
| [0., 0., 1.], |
| [0., 0., 0.]]])), |
| ] |
| for rot_angle, expected_result in rotation_angles_expected_outs: |
| transformer = transforms.Rotate(rot_angle) |
| ans = transformer(input_image) |
| print(type(ans), ans, type(expected_result), expected_result) |
| assert_almost_equal(ans.asnumpy(), expected_result.asnumpy(), atol=1e-6) |
| |
| |
| @use_np |
| def test_random_rotation(): |
| # test exceptions for probability input outside of [0,1] |
| assertRaises(ValueError, transforms.RandomRotation, [-10, 10.], rotate_with_proba=1.1) |
| assertRaises(ValueError, transforms.RandomRotation, [-10, 10.], rotate_with_proba=-0.3) |
| # test `forward` |
| transformer = transforms.RandomRotation([-10, 10.]) |
| assertRaises(TypeError, transformer, mx.np.ones((3, 30, 60), dtype='uint8')) |
| single_image = mx.np.ones((3, 30, 60), dtype='float32') |
| single_output = transformer(single_image) |
| assert same(single_output.shape, (3, 30, 60)) |
| batch_image = mx.np.ones((3, 3, 30, 60), dtype='float32') |
| batch_output = transformer(batch_image) |
| assert same(batch_output.shape, (3, 3, 30, 60)) |
| # test identity (rotate_with_proba = 0) |
| transformer = transforms.RandomRotation([-100., 100.], rotate_with_proba=0.0) |
| data = mx.np.random.normal(size=(3, 30, 60)) |
| assert_almost_equal(data.asnumpy(), transformer(data).asnumpy()) |
| |
| |
| @use_np |
| def test_random_transforms(): |
| from mxnet.gluon.data.vision import transforms |
| |
| tmp_t = transforms.Compose([transforms.Resize(300), transforms.RandomResizedCrop(224)]) |
| counter = 0 |
| def transform_fn(x): |
| nonlocal counter |
| counter += 1 |
| return x |
| transform = transforms.Compose([transforms.RandomApply(transform_fn, 0.5)]) |
| |
| img = mx.np.ones((10, 10, 3), dtype='uint8') |
| iteration = 10000 |
| num_apply = 0 |
| for _ in range(iteration): |
| out = transform(img) |
| assert counter == pytest.approx(5000, 1e-1) |
| |
| @xfail_when_nonstandard_decimal_separator |
| @use_np |
| @pytest.mark.flaky |
| def test_random_gray(): |
| from mxnet.gluon.data.vision import transforms |
| |
| transform = transforms.RandomGray(0.5) |
| img = mx.np.ones((4, 4, 3), dtype='uint8') |
| pixel = img[0, 0, 0].asnumpy() |
| iteration = 1000 |
| num_apply = 0 |
| for _ in range(iteration): |
| out = transform(img) |
| if out[0][0][0].asnumpy() != pixel: |
| num_apply += 1 |
| assert_almost_equal(num_apply/float(iteration), 0.5, 0.1) |
| |
| transform = transforms.RandomGray(0.5) |
| transform.hybridize() |
| img = mx.np.ones((4, 4, 3), dtype='uint8') |
| pixel = img[0, 0, 0].asnumpy() |
| iteration = 1000 |
| num_apply = 0 |
| for _ in range(iteration): |
| out = transform(img) |
| if out[0][0][0].asnumpy() != pixel: |
| num_apply += 1 |
| assert_almost_equal(num_apply/float(iteration), 0.5, 0.1) |
| |
| @use_np |
| def test_bbox_random_flip(): |
| from mxnet.gluon.contrib.data.vision.transforms.bbox import ImageBboxRandomFlipLeftRight |
| |
| transform = ImageBboxRandomFlipLeftRight(0.5) |
| iteration = 200 |
| num_apply = 0 |
| for _ in range(iteration): |
| img = mx.np.ones((10, 10, 3), dtype='uint8') |
| img[0, 0, 0] = 10 |
| bbox = mx.np.array([[1, 2, 3, 4, 0]]) |
| im_out, im_bbox = transform(img, bbox) |
| if im_bbox[0][0].asnumpy() != 1 and im_out[0, 0, 0].asnumpy() != 10: |
| num_apply += 1 |
| assert_almost_equal(np.array([num_apply])/float(iteration), 0.5, 0.5) |
| |
| @use_np |
| def test_bbox_crop(): |
| from mxnet.gluon.contrib.data.vision.transforms.bbox import ImageBboxCrop |
| |
| transform = ImageBboxCrop((0, 0, 3, 3)) |
| img = mx.np.ones((10, 10, 3), dtype='uint8') |
| bbox = mx.np.array([[0, 1, 3, 4, 0]]) |
| im_out, im_bbox = transform(img, bbox) |
| assert im_out.shape == (3, 3, 3) |
| assert im_bbox[0][2] == 3 |