| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import os |
| import sys |
| import tempfile |
| import math |
| import numpy as _np |
| import mxnet as mx |
| |
| curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) |
| sys.path.append(os.path.join(curr_path, '../python/unittest/')) |
| |
| from mxnet.test_utils import rand_ndarray, assert_almost_equal, rand_coord_2d, default_device, check_symbolic_forward, create_2d_np_tensor, use_np |
| from mxnet import gluon, np, npx |
| import pytest |
| from tests.python.unittest.common import assertRaises |
| from mxnet.base import MXNetError |
| |
| # dimension constants |
| MEDIUM_X = 10000 |
| LARGE_X = 100000000 |
| SMALL_X = 100 |
| SMALL_Y = 50 |
| INT_OVERFLOW = 2**31 |
| HALF_INT_OVERFLOW = 2**30 |
| DOUBLE_INT_OVERFLOW = 2**32 |
| |
| |
| @use_np |
| def test_gluon_embedding(): |
| m = gluon.nn.Embedding(SMALL_Y, MEDIUM_X) |
| m.initialize() |
| a = np.zeros((MEDIUM_X, SMALL_Y)) |
| b = m(a) |
| assert b.shape == (MEDIUM_X, SMALL_Y, MEDIUM_X) |
| assert b.asnumpy().size == MEDIUM_X * SMALL_Y * MEDIUM_X |
| |
| |
| @use_np |
| def test_fully_connected(): |
| a = np.ones(shape=(LARGE_X, SMALL_Y)) |
| b = np.ones(shape=(SMALL_Y, SMALL_Y)) |
| c = np.ones(shape=(b.shape[0],)) |
| |
| # w/o bias |
| res = mx.npx.fully_connected(a, b, num_hidden=b.shape[0], no_bias=True) |
| assert np.sum(res[-1] == a.shape[1]) == b.shape[0] |
| |
| # w/ bias |
| res = mx.npx.fully_connected(a, b, c, num_hidden=b.shape[0], no_bias=False) |
| assert np.sum(res[-1] == a.shape[1] + 1) == b.shape[0] |
| |
| |
| @use_np |
| def test_dense(): |
| data = np.ones(shape=(LARGE_X, SMALL_X)) |
| linear = gluon.nn.Dense(SMALL_Y) |
| linear.initialize() |
| res = linear(data) |
| assert res.shape == (LARGE_X, SMALL_Y) |
| |
| |
| @use_np |
| def test_softmax(): |
| input_data = np.ones((SMALL_Y, LARGE_X)) |
| for axis in [0, 1]: |
| true_output = np.full((SMALL_Y, LARGE_X), (1 / input_data.shape[axis])) |
| output = npx.softmax(input_data, axis=axis) |
| assert_almost_equal(output.asnumpy(), true_output, rtol=1e-5, atol=1e-5) |
| |
| |
| ''' |
| _ _ _ _ _ __ _ __ _ _ |
| | ' \ || | ' \| '_ \ || | |
| |_||_\_,_|_|_|_| .__/\_, | |
| |_| |__/ |
| ''' |
| |
| @use_np |
| def test_ones(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| assert A.shape == (INT_OVERFLOW, 2) |
| assert A[0][0] == 1 |
| |
| |
| @use_np |
| def test_zeros(): |
| A = np.zeros((INT_OVERFLOW, 2)) |
| assert A.shape == (INT_OVERFLOW, 2) |
| assert A[0][0] == 0 |
| |
| |
| @use_np |
| def test_ones_like(): |
| inp = np.ones((2, INT_OVERFLOW)) |
| out = np.ones_like(inp) |
| assert out.shape == inp.shape |
| assert out[0, 0] == 1 and out[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_zeros_like(): |
| inp = np.ones((INT_OVERFLOW, 2)) |
| out = np.zeros_like(inp) |
| assert out.shape == inp.shape |
| assert out[0, 0] == 0 and out[-1, -1] == 0 |
| |
| @use_np |
| def test_abs(): |
| # abs absolute and fabs are the same thing |
| inp = np.zeros((INT_OVERFLOW, 2)) |
| inp[-1, -1] = -1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.abs(inp) |
| out.backward() |
| assert out.shape == (INT_OVERFLOW, 2) |
| assert out[-1, -1] == 1 |
| assert inp.grad.shape == (INT_OVERFLOW, 2) |
| assert inp.grad[-1, -1] == -1 |
| |
| |
| @use_np |
| def test_binary_broadcast(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| B = np.ones((INT_OVERFLOW, 1)) |
| C = np.add(A, B) |
| assert C.shape == (INT_OVERFLOW, 2) |
| assert C[0][0] == 2 |
| |
| |
| @use_np |
| def test_all(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = np.all(A) |
| assert B == True |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[0][0] == 0 |
| |
| |
| @use_np |
| def test_amin(): |
| inp = np.ones((INT_OVERFLOW, 2)) |
| inp[-1, -1] = -1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.amin(inp) |
| out.backward() |
| assert out == -1.0 |
| assert inp.grad.shape == (INT_OVERFLOW, 2) |
| assert inp.grad[0, 0] == 0 and inp.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_amax(): |
| inp = np.zeros((INT_OVERFLOW, 2)) |
| inp[-1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.amax(inp) |
| out.backward() |
| assert out == 1.0 |
| assert inp.grad.shape == (INT_OVERFLOW, 2) |
| assert inp.grad[0, 0] == 0 and inp.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_argmin(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| A[10][1] = -1 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = np.argmin(A) |
| assert B == 21 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[0][0] == 0 |
| |
| |
| @use_np |
| def test_argmax(): |
| A = np.zeros((INT_OVERFLOW, 2)) |
| A[10][1] = 1 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = np.argmax(A) |
| assert B == 21 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[0][0] == 0 |
| |
| |
| @use_np |
| @pytest.mark.skip(reason='times out (20 mins)') |
| def test_trigonometric_family(): |
| def batch_check(x, funcs): |
| for f in funcs: |
| one = np.ones((1)) |
| x.attach_grad() |
| one.attach_grad() |
| with mx.autograd.record(): |
| y = f(x) |
| _ = f(one) |
| assert y.shape == (INT_OVERFLOW, 2) |
| assert y[0][0] == _ |
| y.backward() |
| _.backward() |
| assert x.grad.shape == (INT_OVERFLOW, 2) |
| assert x.grad[0][0] == one.grad |
| A = np.ones((INT_OVERFLOW, 2)) |
| batch_check(A, [np.arccos, np.arccosh, np.arcsin, \ |
| np.arcsin, np.arctan, np.arctanh, np.sin, np.cos, \ |
| np.tan, np.sinh, np.cosh, np.tanh]) |
| |
| |
| @use_np |
| def test_any(): |
| A = np.zeros((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = np.any(A) |
| assert B == False |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[0][0] == 0 |
| |
| |
| @use_np |
| def test_append(): |
| A = np.ones((1, INT_OVERFLOW)) |
| B = np.ones((2, INT_OVERFLOW)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| C = np.append(A, B, axis=0) |
| assert C.shape == (3, INT_OVERFLOW) |
| assert C[2][0] == 1 |
| C.backward() |
| assert A.grad.shape == (1, INT_OVERFLOW) |
| assert A[0][0] == 1 |
| |
| |
| @use_np |
| def test_arange(): |
| A = np.arange(INT_OVERFLOW, dtype='int32') |
| assert A.shape == (INT_OVERFLOW, ) |
| assert A[100] == 100 |
| |
| |
| @use_np |
| def test_argsort(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = np.argsort(A) |
| assert B.shape == (INT_OVERFLOW, 2) |
| assert B[0][0] == 0 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A[0][0] == 1 |
| |
| |
| @use_np |
| def test_atleast_xd_family(): |
| def batch_check(x, funcs, shapes): |
| for f, s in zip(funcs, shapes): |
| x.attach_grad() |
| with mx.autograd.record(): |
| y = f(x) |
| assert y.shape == s |
| y.backward() |
| assert x.grad.shape == (INT_OVERFLOW, ) |
| assert x.grad[0] == 0 |
| A = np.zeros((INT_OVERFLOW)) |
| batch_check(A, [np.atleast_1d, np.atleast_2d, np.atleast_3d], \ |
| [(INT_OVERFLOW, ), (1, INT_OVERFLOW), (1, INT_OVERFLOW, 1)]) |
| |
| |
| @use_np |
| def test_average(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = np.average(A) |
| assert B == 1 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert_almost_equal(A.grad[0][0], np.array([1.0 / DOUBLE_INT_OVERFLOW]), \ |
| rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_bincount(): |
| A = np.ones((INT_OVERFLOW), dtype='int32') |
| A[0] = 0 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = np.bincount(A) |
| assert B.shape == (2,) |
| assert B[-1] == INT_OVERFLOW - 1 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, ) |
| assert A.grad[0] == 0 |
| |
| |
| @use_np |
| def test_bitwise_family(): |
| def batch_check(x1, x2, funcs): |
| x1.attach_grad() |
| for f in funcs: |
| with mx.autograd.record(): |
| y = f(x1, x2) |
| y.backward() |
| one = np.ones((1), dtype='int32') |
| assert y.shape == (INT_OVERFLOW, 2) |
| assert y[-1, -1] == f(one, one) |
| assert x1.grad.shape == x1.shape |
| assert x1.grad[-1, -1] == 0 |
| # test on broadcast input |
| inp1 = np.ones((INT_OVERFLOW, 1), dtype='int32') |
| inp2 = np.ones((INT_OVERFLOW, 2), dtype='int32') |
| batch_check(inp1, inp2, [np.bitwise_and, np.bitwise_or, np.bitwise_xor]) |
| out = np.bitwise_not(inp1) |
| assert out.shape == (INT_OVERFLOW, 1) |
| assert out[0] == np.bitwise_not(np.ones((1), dtype='int32')) |
| |
| |
| @use_np |
| def test_blackman(): |
| data = np.blackman(INT_OVERFLOW) |
| ind = int(INT_OVERFLOW / 6) |
| ref = 0.42 - 0.5*math.cos(2*math.pi*ind/INT_OVERFLOW) \ |
| + 0.08*math.cos(4*math.pi*ind/INT_OVERFLOW) |
| assert_almost_equal(data[ind], ref, rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_broadcast_to(): |
| A = np.ones((2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = np.broadcast_to(A, (INT_OVERFLOW, 2)) |
| assert B.shape == (INT_OVERFLOW, 2) |
| assert B[0][0] == 1 |
| B.backward() |
| assert A.grad.shape == (2, ) |
| with mx.autograd.record(): |
| B = np.broadcast_to(A.reshape(2, 1), (2, INT_OVERFLOW)) |
| assert B.shape == (2, INT_OVERFLOW) |
| assert B[0][0] == 1 |
| B.backward() |
| assert A.grad.shape == (2, ) |
| |
| |
| @use_np |
| def test_root_family(): |
| def batch_check(x, funcs, grads): |
| for f, g in zip(funcs, grads): |
| x.attach_grad() |
| with mx.autograd.record(): |
| y = f(x) |
| assert y.shape == (INT_OVERFLOW, 2) |
| assert y[0][0] == 1 |
| y.backward() |
| assert x.grad.shape == (INT_OVERFLOW, 2) |
| assert_almost_equal(A.grad[0][0], np.array(g), \ |
| rtol=1e-3, atol=1e-5) |
| A = np.ones((INT_OVERFLOW, 2)) |
| batch_check(A, [np.sqrt, np.cbrt], [0.5, 1.0 / 3]) |
| |
| |
| @use_np |
| def test_ceil_floor(): |
| def batch_check(x, funcs): |
| for f in funcs: |
| x.attach_grad() |
| with mx.autograd.record(): |
| y = f(x) |
| assert y.shape == (INT_OVERFLOW, 2) |
| assert y[0][0] == 1 |
| y.backward() |
| assert x.grad.shape == (INT_OVERFLOW, 2) |
| assert x.grad[0][0] == 0 |
| A = np.ones((INT_OVERFLOW, 2)) |
| batch_check(A, [np.ceil, np.floor]) |
| |
| |
| @use_np |
| def test_clip(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = np.clip(A, 1, 1) |
| assert B.shape == (INT_OVERFLOW, 2) |
| assert B[0][0] == 1 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[0][0] == 1 |
| |
| |
| @use_np |
| def test_column_stack(): |
| A = np.ones(INT_OVERFLOW) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = np.column_stack((A, A)) |
| assert B.shape == (INT_OVERFLOW, 2) |
| assert B[0][0] == 1 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, ) |
| assert A.grad[0] == 2 |
| |
| |
| @use_np |
| def test_concatenate(): |
| def batch_check(x1, x2, axises, shapes): |
| for a, s in zip(axises, shapes): |
| x1.attach_grad() |
| with mx.autograd.record(): |
| y = np.concatenate((x1, x2), axis=a) |
| assert y.shape == s |
| y.backward() |
| assert x1.grad.shape == (2, INT_OVERFLOW) |
| assert x1.grad[0][0] == 1 |
| A = np.ones((2, INT_OVERFLOW)) |
| B = np.ones((1, INT_OVERFLOW)) |
| batch_check(A, B, [0, None], \ |
| [(3, INT_OVERFLOW), (int(INT_OVERFLOW * 3), )]) |
| |
| @use_np |
| def test_copysign(): |
| inp1 = np.ones((INT_OVERFLOW, 2)) |
| inp1[-1, -1] = 2 |
| inp1.attach_grad() |
| inp2 = np.array([-1]) |
| with mx.autograd.record(): |
| out = np.copysign(inp1, inp2) |
| out.backward() |
| assert out.shape == (INT_OVERFLOW, 2) |
| assert out[-1 ,-1] == -2 |
| assert inp1.grad.shape == (INT_OVERFLOW, 2) |
| assert inp1.grad[-1, -1] == -1 |
| |
| |
| @use_np |
| def test_random_uniform(): |
| A = np.random.uniform(low=0, high=1.0, size=(INT_OVERFLOW)) |
| assert A[0] <= 1 and A[0] >= 0 |
| |
| |
| @use_np |
| def test_random_normal(): |
| A = np.random.normal(loc=0, scale=1.0, size=(INT_OVERFLOW)) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| @use_np |
| @pytest.mark.skip(reason='times out (20 mins)') |
| def test_random_gamma(): |
| A = np.random.gamma(shape=1.0, size=(INT_OVERFLOW)) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| |
| @use_np |
| def test_random_exponential(): |
| A = np.random.exponential(size=(INT_OVERFLOW)) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| |
| @use_np |
| def test_random_laplace(): |
| A = np.random.laplace(loc=0, scale=1.0, size=(INT_OVERFLOW)) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| |
| @use_np |
| def test_random_choice(): |
| A = np.random.choice(a=10, size=(INT_OVERFLOW)) |
| assert A[0] <= 10 and A[0] >= 0 |
| |
| |
| @use_np |
| def test_random_gumbel(): |
| A = np.random.gumbel(loc=0, scale=1.0, size=(INT_OVERFLOW)) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| |
| @use_np |
| def test_random_logistic(): |
| A = np.random.logistic(loc=0, scale=1.0, size=(INT_OVERFLOW)) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| @use_np |
| @pytest.mark.skip(reason='times out (20 mins)') |
| def test_random_multinomial(): |
| A = np.random.multinomial(pvals=np.zeros(INT_OVERFLOW), n=1) |
| assert A[-1] == 1 |
| |
| @use_np |
| def test_random_pareto(): |
| A = np.random.pareto(a=1.0, size=(INT_OVERFLOW)) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| |
| @use_np |
| def test_random_power(): |
| A = np.random.power(a=1.0, size=(INT_OVERFLOW)) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| |
| @use_np |
| def test_random_rayleigh(): |
| A = np.random.rayleigh(scale=1.0, size=(INT_OVERFLOW)) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| |
| @use_np |
| def test_random_weibull(): |
| A = np.random.weibull(a=1.0, size=(INT_OVERFLOW)) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| |
| @use_np |
| def test_random_shuffle(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| np.random.shuffle(A) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| |
| @use_np |
| def test_random_lognormal(): |
| A = np.random.lognormal(mean=0, sigma=1.0, size=(2**31)) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| |
| @use_np |
| def test_random_randint(): |
| A = np.random.randint(low=0, high=5, size=(2, 2**31)) |
| assert A[0][0] < 5 and A[0][0] >= 0 |
| |
| |
| @use_np |
| def test_slice_assign(): |
| # test _slice_assign |
| A = np.zeros((INT_OVERFLOW, 2)) |
| A[-1] = np.ones((1)) |
| assert A[-1, 0] == 1 and A[-1, 1] == 1 |
| # test _slice_assign_scalar |
| B = np.zeros((INT_OVERFLOW, 2)) |
| B[-1] = 2 |
| assert B[-1, 0] == 2 and B[-1, 1] == 2 |
| |
| @use_np |
| def test_flatnonzero(): |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[-1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.flatnonzero(inp) |
| out.backward() |
| assert out.shape == (1, ) |
| assert out[0] == int(2 * INT_OVERFLOW - 1) |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| @use_np |
| def test_ravel(): |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[0, -1], inp[-1, -1] = 1, 2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.ravel(inp) |
| out.backward() |
| assert out.shape == (DOUBLE_INT_OVERFLOW, ) |
| assert out[INT_OVERFLOW-1] == 1 and out[-1] == 2 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 1 |
| |
| @use_np |
| def test_mean(): |
| inp = np.arange(DOUBLE_INT_OVERFLOW).reshape((2, INT_OVERFLOW)) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.mean(inp, axis=1) |
| out.backward() |
| assert out.shape == (2, ) |
| assert_almost_equal(out[0], np.array((HALF_INT_OVERFLOW-0.5)), \ |
| rtol=1e-3, atol=1e-5) |
| assert inp.grad.shape == inp.shape |
| assert_almost_equal(inp.grad[-1, -1], np.array((1.0/INT_OVERFLOW)), \ |
| rtol=1e-3, atol=1e-5) |
| |
| @use_np |
| def test_median(): |
| inp = np.arange(DOUBLE_INT_OVERFLOW).reshape((2, INT_OVERFLOW)) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.median(inp, axis=1) |
| out.backward() |
| assert out.shape == (2, ) |
| assert_almost_equal(out[0], np.array((HALF_INT_OVERFLOW-0.5)), \ |
| rtol=1e-3, atol=1e-5) |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| @use_np |
| def test_percentile(): |
| # np.percentile and np.quantile share the same implementation |
| inp = np.arange(DOUBLE_INT_OVERFLOW).reshape((2, INT_OVERFLOW)) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.percentile(inp, 50, axis=1) |
| out.backward() |
| assert out.shape == (2, ) |
| assert_almost_equal(out[0], np.array((HALF_INT_OVERFLOW-0.5)), \ |
| rtol=1e-3, atol=1e-5) |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| @use_np |
| def test_shares_memory(): |
| # np.shares_memory and np.may_share_memory share the same implementation |
| inp = np.ones((2, INT_OVERFLOW)) |
| out = np.shares_memory(inp[0,:100], inp[0,100:]) |
| out2 = np.shares_memory(inp[1,:101], inp[1,100:]) |
| assert out == False and out2 == True |
| |
| @use_np |
| @pytest.mark.skip(reason='times out (20 mins)') |
| def test_where(): |
| inp1 = np.zeros((2, INT_OVERFLOW)) |
| inp1[-1, -1] = 1 |
| inp2 = inp1 + 1 |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.where(inp1==0, inp1, inp2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert out[0, 0] == 0 and out[-1, -1] == 2 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[0, 0] == 1 and inp1.grad[-1, -1] == 0 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[0, 0] == 0 and inp2.grad[-1, -1] == 1 |
| # one side is scalar |
| with mx.autograd.record(): |
| out = np.where(inp1==0, inp1, 2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert out[0, 0] == 0 and out[-1, -1] == 2 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[0, 0] == 1 and inp1.grad[-1, -1] == 0 |
| # both sides ar scalar |
| with mx.autograd.record(): |
| out = np.where(inp1==0, 0, 2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert out[0, 0] == 0 and out[-1, -1] == 2 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 0 |
| |
| @use_np |
| def test_logical_family(): |
| def batch_check(x1, x2, funcs): |
| x1.attach_grad() |
| for f in funcs: |
| with mx.autograd.record(): |
| y = f(x1, x2) |
| y.backward() |
| assert y.shape == x1.shape |
| assert y[0] == f(x1[0], x2[0]) |
| assert x1.grad.shape == x1.shape |
| assert x1.grad[0] == 0 |
| |
| inp1 = np.zeros((INT_OVERFLOW), dtype='int32') |
| inp2 = np.ones((INT_OVERFLOW), dtype='int32') |
| batch_check(inp1, inp2, [np.logical_and, np.logical_or, np.logical_xor]) |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.logical_not(inp2) |
| out.backward() |
| assert out.shape == inp2.shape |
| assert out[0] == 0 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[0] == 0 |
| |
| |
| @use_np |
| def test_deg_rad(): |
| # deg2rad is the same thing as radians |
| # rad2deg is the same thing as degrees |
| inp = np.zeros((INT_OVERFLOW, 2)) |
| inp[-1, -1] = 180 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.deg2rad(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[0, 0] == 0 |
| assert_almost_equal(out[-1, -1], np.array([np.pi]), rtol=1e-5, atol=1e-5) |
| assert inp.grad.shape == inp.shape |
| assert_almost_equal(inp.grad[0, 0], np.array([1.0 / 180 * np.pi]), rtol=1e-5, atol=1e-5) |
| out.attach_grad() |
| with mx.autograd.record(): |
| out2 = np.rad2deg(out) |
| out2.backward() |
| assert out2.shape == out.shape |
| assert out2[0, 0] == 0 and out2[-1, -1] == 180 |
| assert out.grad.shape == out.shape |
| assert_almost_equal(out.grad[0, 0], np.array([180.0 / np.pi]), rtol=1e-5, atol=1e-5) |
| |
| |
| @use_np |
| def test_divide(): |
| # np.divide and np.true_divide are the same thing |
| inp = np.ones((INT_OVERFLOW, 2)) |
| inp[-1, -1] = 10 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.divide(inp, np.array([2, 3])) |
| out.backward() |
| assert out.shape == inp.shape |
| assert_almost_equal(out[-1, -1], np.array([10 / 3]), rtol=1e-5, atol=1e-5) |
| assert inp.grad.shape == inp.shape |
| assert_almost_equal(inp.grad[-1, -1], np.array([1.0 / 3]), rtol=1e-5, atol=1e-5) |
| |
| |
| @use_np |
| def test_minimum(): |
| inp1 = np.ones((INT_OVERFLOW, 2)) |
| inp1[-1, -1] = -1 |
| inp2 = np.zeros((INT_OVERFLOW, 1)) |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.minimum(inp1, inp2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert out[-1, -1] == -1 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 1 and inp1.grad[0, 0] == 0 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1] == 1 and inp2.grad[0] == 2 |
| |
| |
| @use_np |
| def test_maximum(): |
| inp1 = np.ones((INT_OVERFLOW, 2)) |
| inp1[-1, -1] = -1 |
| inp2 = np.zeros((INT_OVERFLOW, 1)) |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.maximum(inp1, inp2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert out[-1, -1] == 0 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 0 and inp1.grad[0, 0] == 1 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1] == 1 and inp2.grad[0] == 0 |
| |
| |
| @use_np |
| def test_eye(): |
| N = 2**16 |
| data1 = np.eye(N) |
| assert data1.shape == (N, N) |
| for i in range(N): |
| assert data1[i, i] == 1 |
| assert data1[-1, -2] == 0 and data1[0, 1] == 0 |
| data2 = np.eye(N, M=N-1, k=-1) |
| assert data2.shape == (N, N-1) |
| for i in range(1, N): |
| assert data2[i, i-1] == 1 |
| assert data2[0, 0] == 0 and data2[-1, -2] == 0 |
| |
| |
| @use_np |
| def test_fix(): |
| inp = np.ones((2, INT_OVERFLOW)) |
| inp[-1, -1] = -2.9 |
| inp[0, 0] = 2.9 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.fix(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[0, 0] == 2 and out[-1, -1] == -2 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| |
| @use_np |
| def test_flip(): |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[0, 0] = 2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.flip(inp, axis=0) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[1, 0] == 2 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, 0] == 1 |
| out2 = np.flip(inp, axis=1) |
| assert out2[0, -1] == 2 |
| |
| |
| @use_np |
| def test_fliplr(): |
| inp = np.zeros((1, 2, INT_OVERFLOW)) |
| inp[0, 0, 0] = 2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.fliplr(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[0, 1, 0] == 2 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, 0, 0] == 1 |
| |
| |
| @use_np |
| def test_flipud(): |
| inp = np.zeros((2, 1, INT_OVERFLOW)) |
| inp[0, 0, 0] = 2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.flipud(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[1, 0, 0] == 2 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, 0, 0] == 1 |
| |
| |
| @use_np |
| def test_full(): |
| data1 = np.full((INT_OVERFLOW, 2), np.array([1, 2])) |
| assert data1.shape == (INT_OVERFLOW, 2) |
| assert data1[-1, 0] == 1 and data1[-1, 1] == 2 |
| data2 = np.full((2, INT_OVERFLOW), 3) |
| assert data2.shape == (2, INT_OVERFLOW) |
| assert data2[-1, -1] == 3 |
| |
| |
| @use_np |
| def test_full_like(): |
| inp = np.zeros((INT_OVERFLOW, 2)) |
| out = np.full_like(inp, 2) |
| assert out.shape == inp.shape |
| assert out[-1, -1] == 2 |
| |
| |
| @use_np |
| def test_comparison_family(): |
| def batch_check(funcs, exp): |
| inp1.attach_grad() |
| for f, e in zip(funcs, exp): |
| with mx.autograd.record(): |
| out = f(inp1, inp2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert (out[0, 0], out[-1, -1]) == e |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 0 |
| |
| inp1 = np.ones((INT_OVERFLOW, 2)) |
| inp2 = np.zeros((INT_OVERFLOW, 2)) |
| inp2[-1, -1] = 1 |
| batch_check([np.greater, np.greater_equal, \ |
| np.less, np.less_equal, np.equal, np.not_equal], \ |
| [(True, False), (True, True), \ |
| (False, False), (False, True), (False, True), (True, False)]) |
| |
| |
| @use_np |
| def test_lcm(): |
| inp1 = np.ones((2, INT_OVERFLOW), dtype='int32') |
| inp2 = np.ones((2, INT_OVERFLOW), dtype='int32') |
| inp1[-1, -1] = 3 |
| inp2[-1, -1] = 5 |
| inp1.attach_grad() |
| with mx.autograd.record(): |
| out = np.lcm(inp1, inp2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert out[-1, -1] == 15 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 0 |
| |
| |
| @use_np |
| def test_gcd(): |
| inp1 = np.ones((2, INT_OVERFLOW), dtype='int32') |
| inp2 = np.ones((2, INT_OVERFLOW), dtype='int32') |
| inp1[-1, -1] = 12 |
| inp2[-1, -1] = 20 |
| inp1.attach_grad() |
| with mx.autograd.record(): |
| out = np.gcd(inp1, inp2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert out[-1, -1] == 4 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 0 |
| |
| |
| @use_np |
| def test_log_family(): |
| def batch_check(funcs, exp): |
| inp.attach_grad() |
| for f, e in zip(funcs, exp): |
| with mx.autograd.record(): |
| out = f(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert_almost_equal(out[-1, -1], np.array([e[0]]), \ |
| rtol=1e-5, atol=1e-5) |
| assert inp.grad.shape == inp.shape |
| assert_almost_equal(inp.grad[-1, -1], np.array([e[1]]), \ |
| rtol=1e-5, atol=1e-5) |
| |
| inp = np.ones((INT_OVERFLOW, 2)) |
| inp[-1, -1] = 100 |
| batch_check([np.log, np.log10, np.log2, np.log1p], \ |
| [(4.6051702, 0.01), (2, 0.00434294), \ |
| (6.643856, 0.01442695), (4.6151204, 0.00990099)]) |
| |
| |
| @use_np |
| def test_expand_dims(): |
| inp = np.zeros((INT_OVERFLOW)) |
| inp[-1] = 1 |
| out1 = np.expand_dims(inp, axis=0) |
| out2 = np.expand_dims(out1, axis=2) |
| assert out1.shape == (1, INT_OVERFLOW) |
| assert out2.shape == (1, INT_OVERFLOW, 1) |
| assert out1[0, -1] == 1 |
| assert out2[0, -1, 0] == 1 |
| |
| |
| @use_np |
| def test_hamming(): |
| data = np.hamming((INT_OVERFLOW)) |
| ind = int(INT_OVERFLOW / 6) |
| ref = 0.54 - 0.46*math.cos(2*math.pi*ind/(INT_OVERFLOW-1)) |
| assert data.shape == (INT_OVERFLOW, ) |
| assert_almost_equal(data[ind], ref, rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_hanning(): |
| data = np.hanning((INT_OVERFLOW)) |
| ind = int(INT_OVERFLOW / 6) |
| ref = 0.5 - 0.5*math.cos(2*math.pi*ind/(INT_OVERFLOW-1)) |
| assert data.shape == (INT_OVERFLOW, ) |
| assert_almost_equal(data[ind], ref, rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_fmax(): |
| inp1 = np.ones((INT_OVERFLOW, 2)) |
| inp1[-1, -1] = -1 |
| inp2 = np.zeros((INT_OVERFLOW, 1)) |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.fmax(inp1, inp2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert out[-1, -1] == 0 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 0 and inp1.grad[0, 0] == 1 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1] == 1 and inp2.grad[0] == 0 |
| |
| |
| @use_np |
| def test_fmin(): |
| inp1 = np.ones((INT_OVERFLOW, 2)) |
| inp1[-1, -1] = -1 |
| inp2 = np.zeros((INT_OVERFLOW, 1)) |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.fmin(inp1, inp2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert out[-1, -1] == -1 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 1 and inp1.grad[0, 0] == 0 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1] == 1 and inp2.grad[0] == 2 |
| |
| |
| @use_np |
| def test_fmod(): |
| inp1 = np.ones((INT_OVERFLOW, 2)) |
| inp2 = np.ones((INT_OVERFLOW, 1)) |
| inp1[-1, -1], inp2[-1, -1] = 11, 7 |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.fmod(inp1, inp2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert out[-1, -1] == 4 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[0, 0] == 1 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1] == -1 and inp2.grad[0] == -2 |
| |
| |
| @use_np |
| def test_mod(): |
| # np.mod and np.remainder are the same thing |
| inp1 = np.ones((INT_OVERFLOW, 2)) |
| inp2 = np.ones((INT_OVERFLOW, 1)) |
| inp1[-1, -1], inp2[-1, -1] = 11, 7 |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.mod(inp1, inp2) |
| out.backward() |
| assert out.shape == inp1.shape |
| assert out[-1, -1] == 4 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[0, 0] == 1 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1] == -1 and inp2.grad[0] == -2 |
| |
| |
| @use_np |
| def test_value_check_family(): |
| def batch_check(funcs, ref): |
| inp.attach_grad() |
| for f, r in zip(funcs, ref): |
| with mx.autograd.record(): |
| out = f(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| for i in range(4): |
| assert out[i, -1] == r[i] |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| inp = np.zeros((4, INT_OVERFLOW)) |
| inp[1:, -1] = np.array([np.inf, -np.inf, np.nan]) |
| batch_check([np.isinf, np.isneginf, np.isposinf, np.isnan, np.isfinite], \ |
| [(False, True, True, False), (False, False, True, False), \ |
| (False, True, False, False), (False, False, False, True), \ |
| (True, False, False, False)]) |
| |
| |
| @use_np |
| def test_rint(): |
| inp = np.zeros((INT_OVERFLOW, 2)) |
| inp[0, 0], inp[-1, -1] = 2.1, 2.9 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.rint(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[0, 0] == 2 and out[-1, -1] == 3 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| |
| @use_np |
| def test_invert(): |
| inp = np.zeros((2, INT_OVERFLOW), dtype='uint8') |
| inp[-1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.invert(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[0, 0] == 255 and out[-1, -1] == 254 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| |
| @use_np |
| def test_exp(): |
| inp = np.ones((2, INT_OVERFLOW)) |
| inp[-1, -1] = 2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.exp(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert_almost_equal(out[0, 0], np.array(np.e**1), rtol=1e-5, atol=1e-5) |
| assert_almost_equal(out[-1, -1], np.array(np.e**2), rtol=1e-5, atol=1e-5) |
| assert inp.grad.shape == inp.shape |
| assert_almost_equal(inp.grad[-1, -1], out[-1, -1], rtol=1e-5, atol=1e-5) |
| |
| |
| @use_np |
| def test_expm1(): |
| inp = np.ones((2, INT_OVERFLOW)) |
| inp[-1, -1] = 2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.expm1(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert_almost_equal(out[0, 0], np.array(np.e**1 - 1), rtol=1e-5, atol=1e-5) |
| assert_almost_equal(out[-1, -1], np.array(np.e**2 - 1), rtol=1e-5, atol=1e-5) |
| assert inp.grad.shape == inp.shape |
| assert_almost_equal(inp.grad[-1, -1], np.array(np.e**2), rtol=1e-5, atol=1e-5) |
| |
| |
| @use_np |
| @pytest.mark.skip(reason='to be moved to new file and run separately as it takes lot of memory') |
| def test_frexp(): |
| inp = np.ones((2, INT_OVERFLOW)) |
| inp[-1, -1] = 9 |
| out1, out2 = np.frexp(inp) |
| assert_almost_equal(inp[-1, -1], out1[-1, -1] * 2 ** out2[-1, -1], \ |
| rtol=1e-5, atol=1e-5) |
| |
| |
| @use_np |
| def test_reciprocal(): |
| inp = np.ones((2, INT_OVERFLOW)) |
| inp[-1, -1] = 3 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.reciprocal(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert_almost_equal(out[-1, -1], np.array([1.0/3]), rtol=1e-5, atol=1e-5) |
| assert inp.grad.shape == inp.shape |
| assert_almost_equal(inp.grad[-1, -1], np.array([-1.0/3**2]), \ |
| rtol=1e-5, atol=1e-5) |
| |
| |
| @use_np |
| def test_sum(): |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[-1, -1] = 10 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out1 = np.sum(inp, axis=1) |
| out1.backward() |
| assert out1.shape == (2, ) |
| assert out1[0] == 0 and out1[1] == 10 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 1 |
| with mx.autograd.record(): |
| out2 = np.sum(inp, axis=0) |
| out2.backward() |
| assert out2.shape == (INT_OVERFLOW, ) |
| assert out2[0] == 0 and out2[-1] == 10 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_negative(): |
| inp = np.ones((2, INT_OVERFLOW)) |
| inp[-1, -1] = -2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.negative(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[0, 0] == -1 and out[-1, -1] == 2 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == -1 |
| |
| |
| @use_np |
| def test_identity(): |
| M = 2**16 |
| data = np.identity(M) |
| assert data.shape == (M, M) |
| assert data[0, 0] == 1 and data[-1, -1] == 1 and data[-1, -2] == 0 |
| |
| |
| @use_np |
| def test_square(): |
| inp = np.ones((INT_OVERFLOW, 2)) |
| inp[-1, -1] = 3 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.square(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[-1, -1] == 9 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 6 |
| |
| |
| @use_np |
| def test_sign(): |
| inp = np.zeros((INT_OVERFLOW, 2)) |
| inp[-1, -1], inp[-2, -1] = 2, -2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.sign(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[0, 0] == 0 and out[-1, -1] == 1 and out[-2, -1] == -1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| |
| @use_np |
| def test_prod(): |
| inp = np.ones((2, INT_OVERFLOW)) |
| inp[0, 0], inp[-1, -1] = 2, 10 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out1 = np.prod(inp, axis=1) |
| out1.backward() |
| assert out1.shape == (2, ) |
| assert out1[0] == 2 and out1[1] == 10 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 1 |
| with mx.autograd.record(): |
| out2 = np.prod(inp, axis=0) |
| out2.backward() |
| assert out2.shape == (INT_OVERFLOW, ) |
| assert out2[0] == 2 and out2[-1] == 10 |
| assert inp.grad.shape == inp.shape |
| |
| |
| @use_np |
| def test_add(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| B = np.ones((INT_OVERFLOW, 2)) |
| A[-1, -1] = 2 |
| A.attach_grad() |
| with mx.autograd.record(): |
| C = np.add(A, B) |
| C.backward() |
| assert C.shape == (INT_OVERFLOW, 2) |
| assert C[-1, -1] == 3 |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_hypot(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| B = np.ones((INT_OVERFLOW, 2)) |
| A[-1, -1], B[-1, -1] = 3, 4 |
| A.attach_grad() |
| with mx.autograd.record(): |
| C = np.hypot(A, B) |
| C.backward() |
| assert C.shape == A.shape |
| assert C[-1, -1] == 5 |
| assert A.grad.shape == A.shape |
| assert_almost_equal(A.grad[-1, -1], np.array([0.6]), rtol=1e-5, atol=1e-5) |
| |
| |
| @use_np |
| def test_power(): |
| A = np.full((2, INT_OVERFLOW), 2) |
| B = np.ones((2, INT_OVERFLOW)) |
| B[-1, -1] = 3 |
| A.attach_grad() |
| B.attach_grad() |
| with mx.autograd.record(): |
| C = np.power(A, B) |
| C.backward() |
| assert C.shape == A.shape |
| assert C[-1, -1] == 8 |
| assert A.grad.shape == A.shape |
| assert A.grad[-1, -1] == 12 |
| assert B.grad.shape == B.shape |
| assert_almost_equal(B.grad[-1, -1], 2**3 * np.log(2), rtol=1e-5, atol=1e-5) |
| |
| |
| @use_np |
| def test_ldexp(): |
| A = np.ones((2, INT_OVERFLOW)) |
| B = np.ones((2, INT_OVERFLOW)) |
| A[-1, -1], B[-1, -1] = 5, 2 |
| A.attach_grad() |
| B.attach_grad() |
| with mx.autograd.record(): |
| C = np.ldexp(A, B) |
| C.backward() |
| assert C.shape == A.shape |
| assert C[-1, -1] == 20 |
| assert A.grad.shape == A.shape |
| assert A.grad[-1, -1] == 4 |
| assert B.grad.shape == B.shape |
| assert_almost_equal(B.grad[-1, -1], A[-1, -1] * 2**B[-1, -1] * np.log(2), \ |
| rtol=1e-5, atol=1e-5) |
| |
| |
| @use_np |
| def test_multiply(): |
| A = np.ones((2, INT_OVERFLOW)) |
| B = np.ones((2, INT_OVERFLOW)) |
| A[-1, -1], B[-1, -1] = 2, 3 |
| A.attach_grad() |
| B.attach_grad() |
| with mx.autograd.record(): |
| C = np.multiply(A, B) |
| C.backward() |
| assert C.shape == A.shape |
| assert C[0, 0] == 1 and C[-1, -1] == 6 |
| assert A.grad.shape == A.shape |
| assert A.grad[-1, -1] == B[-1, -1] |
| assert B.grad.shape == B.shape |
| assert B.grad[-1, -1] == A[-1, -1] |
| |
| |
| @use_np |
| def test_subtract(): |
| A = np.zeros((INT_OVERFLOW, 2)) |
| B = np.ones((INT_OVERFLOW, 2)) |
| A[-1, -1] = 3 |
| A.attach_grad() |
| B.attach_grad() |
| with mx.autograd.record(): |
| C = np.subtract(A, B) |
| C.backward() |
| assert C.shape == (INT_OVERFLOW, 2) |
| assert C[0, 0] == -1 and C[-1][-1] == 2 |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[0][0] == 1 |
| assert B.grad.shape == (INT_OVERFLOW, 2) |
| assert B.grad[0][0] == -1 |
| |
| |
| @use_np |
| def test_diag(): |
| # test diag extraction |
| inp = np.zeros((2, INT_OVERFLOW+2)) |
| inp[-1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.diag(inp, k=INT_OVERFLOW) |
| out.backward() |
| assert out.shape == (2, ) |
| assert out[1] == 1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[1, -1] == 1 and inp.grad[0, -2] == 1 |
| # now test mat generation |
| N = 2**16 |
| inp = np.ones((N)) |
| inp[-1] = 2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.diag(inp) |
| out.backward() |
| assert out.shape == (N, N) |
| assert out[-1, -1] == 2 and out[0, 0] == 1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1] == 1 |
| |
| |
| @use_np |
| def test_diag_indices_from(): |
| N = 2**16 |
| inp = np.zeros((N, N)) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| dim1, dim2 = np.diag_indices_from(inp) |
| dim1.backward() |
| assert dim1.shape == (N, ) and dim2.shape == (N, ) |
| assert dim1[-1] == N-1 and dim2[-1] == N-1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, 0] == 0 |
| |
| |
| @use_np |
| def test_diagflat(): |
| N = 2**15 |
| inp = np.ones((2, N)) |
| inp[-1, -1] = 2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.diagflat(inp) |
| out.backward() |
| assert out.shape == (N*2, N*2) |
| assert out[-1, -1] == 2 and out[-1, -2] == 0 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_diagonal(): |
| inp = np.zeros((2, INT_OVERFLOW+2)) |
| inp[-1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.diagonal(inp, offset=INT_OVERFLOW) |
| out.backward() |
| assert out.shape == (2, ) |
| assert out[1] == 1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[1, -1] == 1 and inp.grad[0, -2] == 1 |
| # now test with axes specified |
| N = 2**16 |
| inp = np.zeros((N, N, 2)) |
| inp[-1, -1] = np.array([1, 2]) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.diagonal(inp, offset=0, axis1=0, axis2=1) |
| out.backward() |
| assert out.shape == (2, N) |
| assert out[0, -1] == 1 and out[1, -1] == 2 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1, 0] == 1 and inp.grad[-1, -1, 1] == 1 |
| |
| |
| def test_roll(): |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[-1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.roll(inp, 1) |
| # equivalent but slower |
| # out = np.roll(inp, shift=(1, 1), axis=(0, 1)) |
| out.backward() |
| assert out.shape == (2, INT_OVERFLOW) |
| assert out[0, 0] == 1, out[-1, -1] == 0 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 1 |
| |
| |
| def test_polyval(): |
| poly = np.array([1, 1, 5]) |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[-1, -1] = 2 |
| poly.attach_grad() |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.polyval(poly, inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[-1, -1] == 11 and out[0, 0] == 5 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 5 |
| assert poly.grad.shape == poly.shape |
| assert poly.grad[0] == 4 |
| |
| |
| @use_np |
| def test_rot90(): |
| inp = np.zeros((1, 2, INT_OVERFLOW)) |
| inp[-1, -1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.rot90(inp, axes=(1,2)) |
| out.backward() |
| assert out.shape == (1, INT_OVERFLOW, 2) |
| assert out[0, 0, 1] == 1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1, -1] == 1 |
| |
| |
| @use_np |
| def test_squeeze(): |
| inp = np.zeros((2, 1, INT_OVERFLOW)) |
| inp[-1, -1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.squeeze(inp, axis=1) |
| out.backward() |
| assert out.shape == (2, INT_OVERFLOW) |
| assert out[-1, -1] == 1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1, -1] == 1 |
| |
| |
| @use_np |
| def test_tile(): |
| inp = np.array([[0, 1],[2, 3]]) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.tile(inp, (1, HALF_INT_OVERFLOW)) |
| out.backward() |
| assert out.shape == (2, INT_OVERFLOW) |
| assert out[-1, -1] == 3 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == HALF_INT_OVERFLOW |
| |
| |
| @use_np |
| def test_trace(): |
| N = 2**16 |
| inp = np.eye(N) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.trace(inp) |
| out.backward() |
| assert out == N |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, 0] == 1 and inp.grad[-1, -1] == 1 |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[-1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.trace(inp, offset=INT_OVERFLOW-2) |
| out.backward() |
| assert out == 1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, -2] == 1 and inp.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_tri(): |
| N = 2**16 |
| data = np.tri(N) |
| assert data.shape == (N, N) |
| assert data[0, 0] == 1 and data[-1, -1] == 1 |
| assert data[0, -1] == 0 and data[-1, 0] == 1 |
| data = np.tri(2, INT_OVERFLOW, INT_OVERFLOW-2) |
| assert data.shape == (2, INT_OVERFLOW) |
| assert data[0, -1] == 0 and data[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_tril(): |
| N = 2**16 |
| inp = np.ones((N, N)) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.tril(inp) |
| out.backward() |
| assert out.shape == (N, N) |
| assert out[-1, -1] == 1 and out[0, -1] == 0 and out[-1, 0] == 1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 1 and inp.grad[0, -1] == 0 and \ |
| inp.grad[-1, 0] == 1 |
| inp = np.ones((2, INT_OVERFLOW)) |
| inp[-1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.tril(inp, k=INT_OVERFLOW-2) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[0, -1] == 0 and out[-1, -1] == 1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, -1] == 0 and inp.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_triu(): |
| N = 2**16 |
| inp = np.ones((N, N)) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.triu(inp) |
| out.backward() |
| assert out.shape == (N, N) |
| assert out[-1, -1] == 1 and out[0, -1] == 1 and out[-1, 0] == 0 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 1 and inp.grad[0, -1] == 1 and \ |
| inp.grad[-1, 0] == 0 |
| inp = np.ones((2, INT_OVERFLOW)) |
| inp[-1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.triu(inp, k=INT_OVERFLOW-1) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[0, -1] == 1 and out[-1, -1] == 0 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, -1] == 1 and inp.grad[-1, -1] == 0 |
| |
| |
| @use_np |
| def test_transpose(): |
| inp = np.zeros((1, 2, INT_OVERFLOW)) |
| inp[0, 0, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.transpose(inp, (2, 0, 1)) |
| out.backward() |
| assert out.shape == (INT_OVERFLOW, 1, 2) |
| assert out[-1, 0, 0] == 1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1, -1] == 1 |
| |
| |
| @use_np |
| def test_trunc(): |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[0, -1], inp[1, -1] = 1.9, -1.9 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.trunc(inp) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[0, -1] == 1 and out[1, -1] == -1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| |
| @use_np |
| def test_stack(): |
| inp1 = np.zeros((INT_OVERFLOW)) |
| inp2 = np.ones((INT_OVERFLOW)) |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out1 = np.stack([inp1, inp2]) |
| out1.backward() |
| assert out1.shape == (2, INT_OVERFLOW) |
| assert out1[0, -1] == 0 and out1[1, -1] == 1 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1] == 1 |
| with mx.autograd.record(): |
| out2 = np.stack([inp1, inp2], axis=1) |
| out2.backward() |
| assert out2.shape == (INT_OVERFLOW, 2) |
| assert out2[-1, 0] == 0 and out2[-1, 1] == 1 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1] == 1 |
| |
| |
| @use_np |
| def test_dstack(): |
| inp1 = np.zeros((INT_OVERFLOW, 1)) |
| inp2 = np.ones((INT_OVERFLOW, 1)) |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.dstack((inp1, inp2)) |
| out.backward() |
| assert out.shape == (INT_OVERFLOW, 1, 2) |
| assert out[0, -1, 0] == 0 and out[0, -1, 1] == 1 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_hstack(): |
| inp1 = np.zeros((INT_OVERFLOW, 1)) |
| inp2 = np.ones((INT_OVERFLOW, 1)) |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out1 = np.hstack((inp1, inp2)) |
| out1.backward() |
| assert out1.shape == (INT_OVERFLOW, 2) |
| assert out1[-1, 0] == 0 and out1[-1, 1] == 1 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 1 |
| with mx.autograd.record(): |
| out2 = np.hstack((inp1.flatten(), inp2.flatten())) |
| out2.backward() |
| assert out2.shape == (DOUBLE_INT_OVERFLOW, ) |
| assert out2[INT_OVERFLOW-1] == 0 and out2[-1] == 1 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1, -1] == 1 |
| |
| |
| ''' |
| _ _ |
| _ _ _ _ _ __ _ __ _ _ _____ _| |_ ___ _ _ __(_)___ _ _ |
| | ' \ || | ' \| '_ \ || | / -_) \ / _/ -_) ' \(_-< / _ \ ' \ |
| |_||_\_,_|_|_|_| .__/\_, | \___/_\_\\__\___|_||_/__/_\___/_||_| |
| |_| |__/ |
| ''' |
| |
| |
| @use_np |
| def test_activation(): |
| A = np.zeros((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.activation(A, act_type='sigmoid') |
| assert B.shape == (INT_OVERFLOW, 2) |
| assert B[0][0] == 0.5 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert_almost_equal(A.grad[0][0], np.array([0.25]), \ |
| rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_arange_like(): |
| A = np.zeros((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.arange_like(A) |
| assert B.shape == (INT_OVERFLOW, 2) |
| assert B[100][0] == 200 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[0][0] == 0 |
| |
| @use_np |
| def test_batch_dot(): |
| inp1 = np.zeros((2, 1, INT_OVERFLOW)) |
| inp2 = np.zeros((2, INT_OVERFLOW, 1)) |
| inp1[-1, -1, -1] = 2 |
| inp2[-1, -1, -1] = 3 |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = npx.batch_dot(inp1, inp2) |
| out.backward() |
| assert out.shape == (2, 1, 1) |
| assert out[1, 0, 0] == 6 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1, -1] == 3 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1, -1, -1] == 2 |
| |
| @use_np |
| def test_cast(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.cast(A, dtype='float16') |
| assert B.shape == (INT_OVERFLOW, 2) |
| assert B[0][0] == 1 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[0][0] == 1 |
| |
| |
| @use_np |
| def test_broadcast_like(): |
| A = np.ones((1, 2)) |
| B = np.zeros((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| C = npx.broadcast_like(A, B) |
| assert C.shape == (INT_OVERFLOW, 2) |
| assert C[0][0] == 1 |
| C.backward() |
| assert A.grad.shape == (1, 2) |
| with mx.autograd.record(): |
| C = npx.broadcast_like(A.reshape(2, 1), B.T) |
| assert C.shape == (2, INT_OVERFLOW) |
| assert C[0][0] == 1 |
| C.backward() |
| assert A.grad.shape == (1, 2) |
| assert_almost_equal(A.grad[0][0], np.array([INT_OVERFLOW]), \ |
| rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_constraint_check(): |
| A = np.ones((2, INT_OVERFLOW)) |
| constraint = (A > 0) |
| B = npx.constraint_check(constraint) |
| assert B == True |
| |
| # broken |
| @use_np |
| def test_batch_flatten(): |
| A = np.ones((2, 1, INT_OVERFLOW)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.batch_flatten(A) |
| assert B.shape == (2, INT_OVERFLOW) |
| assert B[0][0] == 1 |
| B.backward() |
| assert A.grad.shape == (2, 1, INT_OVERFLOW) |
| assert A.grad[0][0][0] == 1 |
| |
| |
| @use_np |
| def test_batch_norm(): |
| inp = np.zeros((2, INT_OVERFLOW)) |
| gamma = np.array([1.5, 2.5]) |
| beta = np.array([0.3, 0.6]) |
| mov_mean = np.array([0.4, 0.8]) |
| mov_var = np.array([0.6, 1.2]) |
| eps = 1e-5 |
| inp[0, -1], inp[1, -1] = 3, 6 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = npx.batch_norm(inp, gamma=gamma, beta=beta, moving_mean=mov_mean,\ |
| moving_var=mov_var, axis=0, eps=eps, use_global_stats=True) |
| out.backward() |
| assert out.shape == inp.shape |
| ref0 = (inp[0, -1] - mov_mean[0]) / (mov_var[0] + eps)**0.5 * gamma[0] + beta[0] |
| ref1 = (inp[1, -1] - mov_mean[1]) / (mov_var[1] + eps)**0.5 * gamma[1] + beta[1] |
| assert_almost_equal(out[0, -1], ref0, rtol=1e-3, atol=1e-5) |
| assert_almost_equal(out[1, -1], ref1, rtol=1e-3, atol=1e-5) |
| assert inp.grad.shape == inp.shape |
| grad_ref0 = gamma[0] / (mov_var[0] + eps)**0.5 |
| grad_ref1 = gamma[1] / (mov_var[1] + eps)**0.5 |
| assert_almost_equal(inp.grad[0, -1], grad_ref0, rtol=1e-3, atol=1e-5) |
| assert_almost_equal(inp.grad[1, -1], grad_ref1, rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_batch_norm_mean_var(): |
| N = 2**20 |
| inp = np.zeros((2, INT_OVERFLOW), dtype='float64') |
| gamma = np.array([1, 1], dtype='float64') |
| beta = np.array([0, 0], dtype='float64') |
| mov_mean = np.array([0, 0], dtype='float64') |
| mov_var = np.array([1, 1], dtype='float64') |
| eps = 0 |
| inp[1, -1] = N |
| with mx.autograd.record(): |
| out, mean, var = npx.batch_norm(inp, gamma=gamma, beta=beta, moving_mean=mov_mean,\ |
| moving_var=mov_var, axis=0, eps=eps, output_mean_var=True) |
| assert out.shape == inp.shape |
| mean_ref = float(N) / INT_OVERFLOW |
| std_ref = ((INT_OVERFLOW-1) * (mean_ref-0)**2 + (mean_ref-N)**2) / INT_OVERFLOW |
| out_ref = (N - mean_ref) / (std_ref**0.5) |
| assert_almost_equal(mean[1], mean_ref, rtol=1e-3, atol=1e-5) |
| assert_almost_equal(var[1], 1 / std_ref**0.5, rtol=1e-3, atol=1e-5) |
| assert_almost_equal(out[1, -1], out_ref, rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_nonzero(): |
| A = np.zeros((2, INT_OVERFLOW)) |
| A[0, 1] = 1 |
| A[0, -2] = 1 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.nonzero(A) |
| assert B.shape == (2, 2) |
| assert B[0, 0] == 0 and B[0, 1] == 1 |
| assert B[1, 0] == 0 and B[1, 1] == int(INT_OVERFLOW - 2) |
| B.backward() |
| assert A.grad.shape == (2, INT_OVERFLOW) |
| assert A.grad[0][0] == 0 |
| |
| |
| @use_np |
| def test_one_hot(): |
| A = np.zeros((INT_OVERFLOW)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.one_hot(A, 2) |
| assert B.shape == (INT_OVERFLOW, 2) |
| assert B[0][0] == 1 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, ) |
| assert A.grad[0] == 0 |
| |
| |
| @use_np |
| def test_pick(): |
| INT_OVERFLOW = 2**31 |
| A = np.zeros((INT_OVERFLOW, 2)) |
| B = np.zeros((INT_OVERFLOW)) |
| A[-1, 0] = 3 |
| A.attach_grad() |
| B.attach_grad() |
| with mx.autograd.record(): |
| C = npx.pick(A, B) |
| assert C.shape == (INT_OVERFLOW, ) |
| assert C[0] == 0 |
| assert C[-1] == 3 |
| C.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert B.grad.shape == (INT_OVERFLOW, ) |
| assert A.grad[0][0] == 1 |
| |
| |
| @use_np |
| def test_scalar_poisson(): |
| A = npx.scalar_poisson(lam=4, shape=(2, INT_OVERFLOW)) |
| assert A.shape == (2, INT_OVERFLOW) |
| |
| |
| @use_np |
| def test_tensor_poisson(): |
| lam = np.array([2.0, 4.0]) |
| A = npx.tensor_poisson(lam, shape=(INT_OVERFLOW)) |
| assert A.shape == (2, INT_OVERFLOW) |
| |
| |
| @use_np |
| def test_reshape(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.reshape(A, (-5)) |
| assert B.shape == (DOUBLE_INT_OVERFLOW, ) |
| assert B[0] == 1 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[0][0] == 1 |
| |
| |
| @use_np |
| def test_reshape_like(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.reshape_like(A, np.zeros((2, INT_OVERFLOW))) |
| assert B.shape == (2, INT_OVERFLOW) |
| assert B[0][0] == 1 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[0][0] == 1 |
| |
| |
| @use_np |
| def test_sigmoid(): |
| A = np.zeros((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.sigmoid(A) |
| assert B.shape == (INT_OVERFLOW, 2) |
| assert B[0][0] == 0.5 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert_almost_equal(A.grad[0][0], np.array([0.25]), \ |
| rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_shape_array(): |
| A = np.zeros((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.shape_array(A) |
| assert B[0] == INT_OVERFLOW and B[1] == 2 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, 2) |
| assert A.grad[0][0] == 0 |
| |
| |
| @use_np |
| def test_stop_gradient(): |
| A = np.ones((INT_OVERFLOW, 2)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.stop_gradient(A * 3) |
| assert B.shape == (INT_OVERFLOW, 2) |
| assert B[0][0] == 3 |
| B.backward() |
| # should be 3 if not for stop_gradient() |
| assert A.grad[0][0] == 0 |
| |
| |
| @use_np |
| def test_sequence_mask(): |
| A = np.ones((2, 2, INT_OVERFLOW)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.sequence_mask(A, sequence_length=np.array([1,1]), \ |
| use_sequence_length=True) |
| assert B.shape == (2, 2, INT_OVERFLOW) |
| assert B[0][0][0] == 1 |
| assert B[1][0][0] == 0 |
| B.backward() |
| assert A.grad.shape == (2, 2, INT_OVERFLOW) |
| assert A.grad[0][0][0] == 1 |
| |
| |
| @use_np |
| def test_topk(): |
| A = np.ones((2, INT_OVERFLOW)) |
| A[0][100] = 2 |
| A[1][200] = 2 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.topk(A, k=2) |
| assert B.shape == (2, 2) |
| assert B[0][0] == 100 and B[1][0] == 200 |
| B.backward() |
| assert A.grad.shape == (2, INT_OVERFLOW) |
| assert A.grad[0][0] == 0 |
| |
| |
| @use_np |
| def test_slice(): |
| A = np.ones((INT_OVERFLOW, 3)) |
| A[100][1] = 2 |
| B = npx.slice(A, begin=(100,1), end=(200,3)) |
| assert B.shape == (100, 2) |
| assert B[0][0] == 2 |
| |
| |
| @use_np |
| def test_smooth_l1(): |
| A = np.arange((INT_OVERFLOW)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.smooth_l1(A) |
| assert B.shape == (INT_OVERFLOW, ) |
| assert B[1] == 0.5 |
| B.backward() |
| assert A.grad.shape == (INT_OVERFLOW, ) |
| assert A.grad[0] == 0 |
| |
| |
| @use_np |
| def test_gamma(): |
| A = np.ones((2, INT_OVERFLOW)) |
| A[0][0] = 5 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.gamma(A) |
| assert B.shape == (2, INT_OVERFLOW) |
| assert B[0][0] == 24 |
| B.backward() |
| assert A.grad.shape == (2, INT_OVERFLOW) |
| assert_almost_equal(A.grad[0][0], np.array([36.1428]), \ |
| rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_gammaln(): |
| A = np.ones((2, INT_OVERFLOW)) |
| A[0][0] = 5 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.gammaln(A) |
| assert B.shape == (2, INT_OVERFLOW) |
| assert_almost_equal(B[0][0], np.array([np.log(24)]), \ |
| rtol=1e-3, atol=1e-5) |
| B.backward() |
| assert A.grad.shape == (2, INT_OVERFLOW) |
| assert_almost_equal(A.grad[0][0], np.array([1.5061178]), \ |
| rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_digamma(): |
| A = np.ones((2, INT_OVERFLOW)) |
| A[0][0] = 5 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.digamma(A) |
| assert B.shape == (2, INT_OVERFLOW) |
| assert_almost_equal(B[0][0], np.array([1.5061178]), \ |
| rtol=1e-3, atol=1e-5) |
| B.backward() |
| assert A.grad.shape == (2, INT_OVERFLOW) |
| assert_almost_equal(A.grad[0][0], np.array([0.22132295]), \ |
| rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| @pytest.mark.skip(reason='to be moved to new file and run separately as it takes lot of memory') |
| def test_rnn_dim_check(): |
| L_SEQ, BAT, L_INP, L_STA = 2**31, 4, 2**10, 2 |
| data = np.random.uniform(-1, 1, (L_SEQ, BAT, L_INP)) |
| state = np.random.normal(0, 1, (1, BAT, L_STA)) |
| params = np.random.normal(0, 1, (2056,)) |
| assertRaises(ValueError, npx.rnn, data=data, parameters=params, \ |
| mode='rnn_relu', state=state, state_size=L_STA, num_layers=1) |
| |
| |
| @use_np |
| @pytest.mark.skip(reason='runs without oneDNN, which is not default behavior') |
| def test_rnn_vanilla(): |
| L_SEQ, BAT, L_INP, L_STA = 2**20, 4, 2**10, 2 |
| def batch_check(x, modes, params): |
| state = np.random.normal(0, 1, (1, BAT, L_STA)) |
| for m, p in zip(modes, params): |
| x.attach_grad() |
| with mx.autograd.record(): |
| y = npx.rnn(data=x, parameters=p, mode=m, \ |
| state=state, state_size=L_STA, num_layers=1) |
| assert y.shape == (L_SEQ, BAT, L_STA) |
| y.backward() |
| npx.waitall() |
| data = np.random.uniform(-1, 1, (L_SEQ, BAT, L_INP)) |
| modes = ['rnn_tanh', 'rnn_relu'] |
| params = [np.random.normal(0, 1, (2056,)), \ |
| np.random.normal(0, 1, (2056,))] |
| batch_check(data, modes, params) |
| |
| |
| @use_np |
| def test_rnn_gru(): |
| L_SEQ, BAT, L_INP, L_STA = 2**20, 4, 2**10, 2 |
| data = np.random.uniform(-1, 1, (L_SEQ, BAT, L_INP)) |
| state = np.random.normal(0, 1, (1, BAT, L_STA)) |
| params = np.random.normal(0, 1, (6168,)) |
| data.attach_grad() |
| with mx.autograd.record(): |
| out = npx.rnn(data=data, parameters=params, mode='gru', \ |
| state=state, state_size=L_STA, num_layers=1) |
| assert out.shape == (L_SEQ, BAT, L_STA) |
| out.backward() |
| npx.waitall() |
| |
| |
| @use_np |
| def test_rnn_lstm(): |
| L_SEQ, BAT, L_INP, L_STA= 2**20, 4, 2**10, 2 |
| data = np.random.uniform(-1, 1, (L_SEQ, BAT, L_INP)) |
| state = np.random.normal(0, 1, (1, BAT, L_STA)) |
| state_cell = np.random.normal(0, 1, (1, BAT, L_STA)) |
| params = np.random.normal(0, 1, (8224,)) |
| data.attach_grad() |
| with mx.autograd.record(): |
| out = npx.rnn(data=data, parameters=params, mode='lstm', \ |
| state=state, state_size=L_STA, state_cell=state_cell, num_layers=1) |
| assert out.shape == (L_SEQ, BAT, L_STA) |
| out.backward() |
| npx.waitall() |
| |
| |
| |
| @use_np |
| def test_ctc_loss(): |
| def test_ctc_loss_size_check(A, label): |
| assertRaises(ValueError, npx.ctc_loss, A, label) |
| |
| L_SEQ, L_ALP, L_LAB, BAT = 2**10, 2**20, 2**6, 2 |
| A = np.zeros((L_SEQ, BAT, L_ALP)) |
| label = np.random.randint(0, L_ALP, (BAT, L_LAB)) |
| # test for expected exception |
| test_ctc_loss_size_check(A, label) |
| # now we shrink the size a little bit and test for an allowed case |
| L_ALP = 2**20 - 1 |
| A = np.zeros((L_SEQ, BAT, L_ALP)) |
| label = np.random.randint(0, L_ALP, (BAT, L_LAB)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.ctc_loss(A, label) |
| assert B.shape == (BAT, ) |
| assert type(B[0]).__name__ == 'ndarray' |
| B.backward() |
| assert A.grad.shape == (L_SEQ, BAT, L_ALP) |
| assert type(A[0]).__name__ == 'ndarray' |
| |
| |
| @use_np |
| def test_erf(): |
| A = np.ones((2, INT_OVERFLOW)) |
| A[0][0] = 10 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.erf(A) |
| assert B.shape == (2, INT_OVERFLOW) |
| assert B[0][0] == 1 |
| B.backward() |
| assert A.grad.shape == (2, INT_OVERFLOW) |
| assert_almost_equal(A.grad[0][0], np.array([4.2e-44]), \ |
| rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_erfinv(): |
| A = np.ones((2, INT_OVERFLOW)) |
| A[0][0] = 0.5 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.erfinv(A) |
| assert B.shape == (2, INT_OVERFLOW) |
| assert_almost_equal(B[0][0], np.array([0.47693628]), \ |
| rtol=1e-3, atol=1e-5) |
| B.backward() |
| assert A.grad.shape == (2, INT_OVERFLOW) |
| assert_almost_equal(A.grad[0][0], np.array([1.112585]), \ |
| rtol=1e-3, atol=1e-5) |
| |
| |
| @use_np |
| def test_index_add(): |
| A = np.zeros((2, INT_OVERFLOW)) |
| ind = np.array([[0, 0], [0, 1]], dtype='int32') |
| val = np.array([100, 200]) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.index_add(A, ind, val) |
| assert B.shape == (2, INT_OVERFLOW) |
| assert B[0][0] == 100 and B[0][1] == 200 |
| B.backward() |
| assert A.grad.shape == (2, INT_OVERFLOW) |
| assert A.grad[0][0] == 1 |
| |
| |
| @use_np |
| def test_index_update(): |
| A = np.zeros((2, INT_OVERFLOW)) |
| ind = np.array([[0, 0], [0, 1]], dtype='int32') |
| val = np.array([100, 200]) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.index_update(A, ind, val) |
| assert B.shape == (2, INT_OVERFLOW) |
| assert B[0][0] == 100 and B[0][1] == 200 |
| B.backward() |
| assert A.grad.shape == (2, INT_OVERFLOW) |
| assert A.grad[0][0] == 0 |
| |
| |
| @use_np |
| def test_layer_norm(): |
| A = np.ones((2, INT_OVERFLOW)) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.layer_norm(A, gamma=np.ones((2)), beta=np.zeros((2)), axis=0) |
| assert B.shape == (2, INT_OVERFLOW) |
| assert B[0][0] == 0 |
| B.backward() |
| assert A.grad.shape == (2, INT_OVERFLOW) |
| assert A.grad[0][0] == 0 |
| |
| |
| @use_np |
| def test_dlpack(): |
| A = np.ones((2, INT_OVERFLOW)) |
| A[0][100] = 100 |
| B = npx.to_dlpack_for_read(A) |
| assert type(B).__name__ == 'PyCapsule' |
| C = npx.from_dlpack(B) |
| assert type(C).__name__ == 'ndarray' |
| assert C.shape == (2, INT_OVERFLOW) |
| assert C[0][100] == 100 |
| B = npx.to_dlpack_for_write(A) |
| assert type(B).__name__ == 'PyCapsule' |
| C = npx.from_dlpack(B) |
| C += 1 |
| assert type(C).__name__ == 'ndarray' |
| assert C.shape == (2, INT_OVERFLOW) |
| assert C[0][100] == 101 |
| |
| |
| @use_np |
| def test_pooling(): |
| def test_pooling_large_dim(): |
| A = np.ones((1, 1, INT_OVERFLOW)) |
| assertRaises(MXNetError, npx.pooling, data=A, kernel=(2), stride=(2), \ |
| pool_type='max') |
| |
| test_pooling_large_dim() |
| D, H, W = 2**12, 2**10, 2**10 |
| A = np.ones((1, 1, D, H ,W)) |
| A[0, 0, 0, 0, 2] = 100 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.pooling(data=A, kernel=(2, 2, 2), stride=(2, 2, 2), \ |
| pool_type='max') |
| assert B.shape == (1, 1, int(D/2), int(H/2), int(W/2)) |
| assert B[0, 0, 0, 0, 1] == 100 |
| B.backward() |
| assert A.grad.shape == (1, 1, D, H, W) |
| assert A.grad[0, 0, 0, 0, 0] == 1 |
| |
| |
| @use_np |
| def test_roi_pooling(): |
| def test_roi_pooling_large_dim(): |
| A = np.ones((1, 1, INT_OVERFLOW, 5)) |
| roi = np.array([[0, 0, 0, 5, 5]]) |
| assertRaises(MXNetError, npx.roi_pooling, A, roi, pooled_size=(3, 3), \ |
| spatial_scale=1) |
| |
| test_roi_pooling_large_dim() |
| H, W = 2**16, 2**16 |
| A = np.ones((1, 1, H, W)) |
| A[0, 0, 0, 2] = 100 |
| roi = np.array([[0, 0, 0, 5, 5]]) |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.roi_pooling(A, roi, pooled_size=(3, 3), spatial_scale=1) |
| assert B.shape == (1, 1, 3, 3) |
| assert B[0][0][0][1] == 100 |
| B.backward() |
| assert A.grad.shape == (1, 1, H, W) |
| assert A.grad[0][0][0][0] == 1 |
| |
| |
| @use_np |
| @pytest.mark.skip(reason='times out on (generally speaking) large tensors') |
| def test_save_load(): |
| A = np.ones((2, INT_OVERFLOW), dtype='int8') |
| A[0][100] = 100 |
| npx.save('my_tensor', A) |
| B = np.array(npx.load('my_tensor')) |
| assert B[0].shape == (2, INT_OVERFLOW) |
| assert B[0][0][100] == 100 |
| |
| |
| @use_np |
| def test_gather_nd(): |
| A = np.ones((1, 2, INT_OVERFLOW)) |
| A [0, 1, 100] = 100 |
| A.attach_grad() |
| with mx.autograd.record(): |
| B = npx.gather_nd(data=A, \ |
| indices=np.array([[0, 0] , [0, 1], [INT_OVERFLOW-1, 100]], \ |
| dtype='int64')) |
| assert B.shape == (2, ) |
| assert B[0] == 1 and B[1] == 100 |
| B.backward() |
| assert A.grad.shape == (1, 2, INT_OVERFLOW) |
| assert A.grad[0, 0, 0] == 0 |
| assert A.grad[0, 0, INT_OVERFLOW-1] == 1 |
| |
| |
| @use_np |
| def test_random_bernoulli(): |
| prob = np.zeros((INT_OVERFLOW)) |
| prob[0] = 1 |
| A = npx.random.bernoulli(prob=prob, size=(INT_OVERFLOW)) |
| assert A.shape == (INT_OVERFLOW, ) |
| assert A[0] == 1 |
| assert A[1] == 0 |
| |
| |
| @use_np |
| def test_cumsum(): |
| input = np.ones((INT_OVERFLOW, 3), dtype='int64') |
| input.attach_grad() |
| with mx.autograd.record(): |
| output = np.cumsum(input, axis=0, dtype='int64') |
| output.backward() |
| assert output.shape == input.shape |
| assert output[-1, -1] == INT_OVERFLOW |
| assert input.grad.shape == input.shape |
| assert input.grad[0, 0] == INT_OVERFLOW |
| assert input.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_round(): |
| input = np.ones((INT_OVERFLOW, 2)) |
| input[INT_OVERFLOW-1][0] = 1.6 |
| output = np.round(input) |
| assert output.shape == (INT_OVERFLOW, 2) |
| assert output[-1][0] == 2 |
| |
| |
| @use_np |
| def test_cross(): |
| inp = np.ones((INT_OVERFLOW, 3)) |
| inp2 = np.ones((INT_OVERFLOW, 2)) |
| inp[-1] = np.array([1, 2, 3]) |
| inp2[-1] = np.array([4, 5]) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.cross(inp, inp2) |
| out.backward() |
| assert out.shape == (INT_OVERFLOW, 3) |
| assert out[0, 0] == -1 and out[0, 1] == 1 and out[0, 2] == 0 |
| assert out[-1, 0] == -15 and out[-1, 1] == 12 and out[-1, 2] == -3 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, 0] == 1 and inp.grad[0, 1] == -1 and inp.grad[0, 2] == 0 |
| assert inp.grad[-1, 0] == 5 and inp.grad[-1, 1] == -4 and inp.grad[-1, 2] == -1 |
| |
| |
| @use_np |
| def test_array_split(): |
| inp = np.ones((INT_OVERFLOW, 2)) |
| inp[0][0] = 0 |
| inp[-1][-1] = 2 |
| out = np.array_split(inp, 2) |
| assert out[0].shape ==(HALF_INT_OVERFLOW, 2) |
| assert out[1].shape ==(HALF_INT_OVERFLOW, 2) |
| assert out[0][0][0] == 0 |
| assert out[1][-1][-1] == 2 |
| |
| |
| @use_np |
| def test_take(): |
| inp = np.zeros((INT_OVERFLOW, 2)) |
| inp[0], inp[-1] = 1, 2 |
| indices = np.array([[0],[INT_OVERFLOW-1]], dtype='int64') |
| inp.attach_grad() |
| indices.attach_grad() |
| with mx.autograd.record(): |
| out = np.take(inp, indices, axis=0) |
| out.backward() |
| assert out.shape == (2, 1, 2) |
| assert out[0, 0, 0] == 1 and out[1, 0, 0] == 2 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, 0] == 1 and inp.grad[-1, 0] == 1 |
| assert indices.grad.shape == indices.shape |
| assert indices[0, 0] == 0 |
| |
| |
| @use_np |
| def test_std(): |
| N = 2*20 |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[-1, -1] = N |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.std(inp, axis=1) |
| out.backward() |
| assert out.shape == (2, ) |
| ref = ((float(N)/INT_OVERFLOW)**2 * (INT_OVERFLOW-1))**0.5 |
| assert_almost_equal(out[1], ref, rtol=1e-5, atol=1e-5) |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| |
| @use_np |
| def test_var(): |
| N = 2*20 |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[-1, -1] = N |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.var(inp, axis=1) |
| out.backward() |
| assert out.shape == (2, ) |
| ref = (float(N)/INT_OVERFLOW)**2 * (INT_OVERFLOW-1) |
| assert_almost_equal(out[1], ref, rtol=1e-5, atol=1e-5) |
| |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| @use_np |
| def test_rollaxis(): |
| inp = np.zeros((1, 1, 2, INT_OVERFLOW, 1)) |
| inp[-1, -1, -1, -1, -1] = 1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.rollaxis(inp, 3) |
| out.backward() |
| assert out.shape == (INT_OVERFLOW, 1, 1, 2, 1) |
| assert out[-1, -1, -1, -1, -1] == 1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1, -1, -1, -1] == 1 |
| |
| |
| @use_np |
| def test_vstack(): |
| inp1 = np.zeros((INT_OVERFLOW, 1)) |
| inp2 = np.ones((INT_OVERFLOW, 1)) |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out1 = np.vstack((inp1, inp2)) |
| out1.backward() |
| assert out1.shape == (DOUBLE_INT_OVERFLOW, 1) |
| assert out1[INT_OVERFLOW-1, 0] == 0 and out1[-1, 0] == 1 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 1 |
| with mx.autograd.record(): |
| out2 = np.vstack((inp1.flatten(), inp2.flatten())) |
| out2.backward() |
| assert out2.shape == (2, INT_OVERFLOW) |
| assert out2[0, -1] == 0 and out2[1, -1] == 1 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_ediff1d(): |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[0, -1], inp[1, 0] = 1, 3 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.ediff1d(inp, to_begin=-99, to_end=np.array([88, 99])) |
| out.backward() |
| assert out.shape == (2 * INT_OVERFLOW - 1 + 1 + 2, ) |
| assert out[INT_OVERFLOW-1] == 1 and out[INT_OVERFLOW] == 2 and\ |
| out[INT_OVERFLOW+1] == -3 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, 0] == -1 and inp.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_split(): |
| inp = np.ones((INT_OVERFLOW, 2)) |
| inp[INT_OVERFLOW // 2] = 2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.split(inp, 2, axis = 0) |
| out[1].backward() |
| assert out[0].shape == (INT_OVERFLOW // 2, 2) |
| assert out[1].shape == (INT_OVERFLOW // 2, 2) |
| assert out[0][0, 0] == 1 |
| assert out[1][0, 0] == 2 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0][0] == 0 and inp.grad[-1][-1] == 1 |
| |
| |
| @use_np |
| def test_hsplit(): |
| inp = create_2d_np_tensor(rows=INT_OVERFLOW, columns=4) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.hsplit(inp, 2) |
| out[1].backward() |
| assert out[1].shape == (INT_OVERFLOW, 2) |
| assert out[0].shape == (INT_OVERFLOW, 2) |
| assert out[1][-1][0] == INT_OVERFLOW-1 |
| assert out[0][-1][1] == INT_OVERFLOW-1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0][0] == 0 and inp.grad[-1][-1] == 1 |
| |
| |
| @use_np |
| def test_vsplit(): |
| inp = create_2d_np_tensor(rows=INT_OVERFLOW, columns=4) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.vsplit(inp, 2) |
| out[1].backward() |
| assert out[1].shape == (INT_OVERFLOW//2, 4) |
| assert out[0].shape == (INT_OVERFLOW//2, 4) |
| assert out[0][-1][0] == INT_OVERFLOW // 2 -1 |
| assert out[1][-1][1] == INT_OVERFLOW - 1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[INT_OVERFLOW//2 - 1][-1] == 0 and inp.grad[-1][-1] == 1 |
| |
| |
| @use_np |
| def test_dsplit(): |
| inp = np.arange(INT_OVERFLOW, dtype=np.int64).reshape(INT_OVERFLOW, 1, 1) |
| inp = np.broadcast_to(inp, shape=(inp.shape[0], 2, 2)) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.dsplit(inp, 2) |
| out[1].backward() |
| assert out[1].shape == (INT_OVERFLOW, 2, 1) |
| assert out[0].shape == (INT_OVERFLOW, 2, 1) |
| assert out[0][-1][0][0] == INT_OVERFLOW - 1 |
| assert out[1][0][1][0] == 0 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1][-1][0] == 0 and inp.grad[0][1][1] == 1 |
| |
| |
| @use_np |
| def test_unique(): |
| inp = np.zeros((2, HALF_INT_OVERFLOW)) |
| assertRaises(ValueError, np.unique, inp, axis=1) |
| |
| |
| @use_np |
| def test_repeat(): |
| inp = np.ones((2, HALF_INT_OVERFLOW)) |
| assertRaises(ValueError, np.repeat, inp, repeats=2, axis=1) |
| |
| |
| @use_np |
| def test_indices(): |
| assertRaises(ValueError, np.indices, (2, HALF_INT_OVERFLOW)) |
| |
| |
| @use_np |
| def test_tril_indices(): |
| N = 2**16 |
| data = np.tril_indices(N, -1) |
| assert data[0].shape == (((1 + (N-1)) * (N-1) / 2), ) |
| assert data[0][-1] == N - 1 and data[1][-1] == N - 2 |
| |
| |
| @use_np |
| def test_tril_indices_extreme(): |
| data = np.tril_indices(n=2, m=INT_OVERFLOW+2, k=INT_OVERFLOW) |
| assert data[0].shape == (INT_OVERFLOW + 1 + INT_OVERFLOW + 2, ) |
| assert data[0][-1] == 1 and data[1][-1] == INT_OVERFLOW + 1 |
| assert data[0][INT_OVERFLOW] == 0 and data[1][INT_OVERFLOW] == INT_OVERFLOW |
| |
| |
| @use_np |
| def test_diff(): |
| inp = np.zeros((2, INT_OVERFLOW+1)) |
| inp[-1, -1] = 100 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out1 = np.diff(inp) |
| out1.backward() |
| assert out1.shape == (2, INT_OVERFLOW) |
| assert out1[-1, -1] == 100 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 1 |
| with mx.autograd.record(): |
| out2 = np.diff(inp, axis=0) |
| out2.backward() |
| assert out2.shape == (1, INT_OVERFLOW+1) |
| assert out2[-1, -1] == 100 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[1, -1] == 1, inp.grad[0, -1] == 1 |
| |
| |
| @use_np |
| def test_kron(): |
| # tensor tensor case |
| inp1 = np.array([5, 10], dtype="float64") |
| inp2 = np.ones((INT_OVERFLOW), dtype = 'float64') |
| inp2[-1] = 3 |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out1 = np.kron(inp1, inp2) |
| out1.backward() |
| assert out1.shape == (DOUBLE_INT_OVERFLOW, ) |
| assert out1[INT_OVERFLOW-1] == 15 and out1[-1] == 30 |
| assert inp1.grad.shape == inp1.shape and inp2.grad.shape == inp2.shape |
| assert inp1.grad[0] == INT_OVERFLOW + 2 |
| assert inp2.grad[-1] == 15 |
| # scalar tensor case |
| inp3 = np.array([3], dtype='float64') |
| inp3.attach_grad() |
| with mx.autograd.record(): |
| out2 = np.kron(inp3, inp2) |
| out2.backward() |
| assert out2.shape == (INT_OVERFLOW, ) |
| assert out2[INT_OVERFLOW-1] == 9 |
| assert inp3.grad.shape == inp3.shape and inp2.grad.shape == inp2.shape |
| assert inp3.grad[0] == INT_OVERFLOW + 2 |
| assert inp2.grad[-1] == 3 |
| |
| |
| @use_np |
| def test_logspace(): |
| data = np.logspace(1.0, 10.0, INT_OVERFLOW) |
| assert data.shape == (INT_OVERFLOW, ) |
| assert data[0] == 10 and data[-1] == 10000000000 |
| assert_almost_equal(data[HALF_INT_OVERFLOW], np.array(10**5.5), \ |
| rtol=1e-3, atol=1e-5) |
| |
| @use_np |
| def test_linspace(): |
| data = np.linspace(0, 1000, INT_OVERFLOW) |
| assert data.shape == (INT_OVERFLOW, ) |
| assert data[0] == 0 and data[-1] == 1000 |
| assert data[HALF_INT_OVERFLOW] == 500 |
| |
| |
| @use_np |
| def test_histogram(): |
| inp = np.ones((INT_OVERFLOW, 2)) |
| inp[-1, -1] = 2 |
| hist, _ = np.histogram(inp, np.array([0.5, 1.5, 2.5, 3.5])) |
| assert hist.shape == (3, ) |
| assert hist[0] == int(2 * INT_OVERFLOW - 1) and hist[1] == 1 |
| |
| |
| @use_np |
| def test_nan_to_num(): |
| inp = np.zeros((3, INT_OVERFLOW)) |
| inp[:, -1] = np.array([np.nan, np.inf, -np.inf]) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.nan_to_num(inp, nan=0, posinf=1, neginf=-1) |
| out.backward() |
| assert out.shape == inp.shape |
| assert out[0, -1] == 0 and out[1, -1] == 1 and out[2, -1] == -1 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0, -1] == 0 and inp.grad[1, -1] == 0 |
| assert inp.grad[0, 0] == 1 and inp.grad[2, -1] == 0 |
| |
| |
| @use_np |
| def test_interp(): |
| xp = np.array([1, 2, 3]) |
| fp = np.array([3, 2, 1]) |
| inp = np.ones((2, INT_OVERFLOW)) |
| inp[-1, -1] = 2.5 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| B = np.interp(inp, xp, fp) |
| B.backward() |
| assert B.shape == inp.shape |
| assert B[-1, -1] == 1.5 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1] == 0 |
| |
| |
| @use_np |
| @pytest.mark.skip(reason='times out (20 mins)') |
| def test_edge_padding(): |
| inp = create_2d_np_tensor(rows=INT_OVERFLOW, columns=4, dtype=np.int64) |
| out = np.pad(inp, ((1, 1), (1, 1)), "edge") |
| assert out[0][0] == 0 |
| assert out[-1][-1] == INT_OVERFLOW - 1 |
| assert out.shape == (INT_OVERFLOW + 2, 4 + 2) |
| |
| |
| @use_np |
| def test_constant_padding(): |
| inp = create_2d_np_tensor(rows=INT_OVERFLOW, columns=4, dtype=np.int64) |
| out = np.pad(inp, ((1, 1), (1, 1)), "constant") |
| assert out[0][0] == 0 |
| assert out[-1][-1] == 0 |
| assert out.shape == (INT_OVERFLOW + 2, 4 + 2) |
| |
| |
| @use_np |
| @pytest.mark.skip(reason='times out (20 mins)') |
| def test_minimum_padding(): |
| inp = create_2d_np_tensor(rows=INT_OVERFLOW, columns=4, dtype=np.int64) |
| out = np.pad(inp, ((1, 1), (1, 1)), "minimum") |
| assert out[0][-1] == 0 |
| assert out[-1][-1] == 0 |
| assert out.shape == (INT_OVERFLOW + 2, 4 + 2) |
| |
| |
| @use_np |
| @pytest.mark.skip(reason='times out (20 mins)') |
| def test_reflection_padding(): |
| inp = create_2d_np_tensor(rows=INT_OVERFLOW, columns=4, dtype=np.int64) |
| out = np.pad(inp, ((1, 1), (1, 1)), "reflect") |
| assert out[0][-1] == 0 + 1 |
| assert out[-1][0] == INT_OVERFLOW - 1 - 1 |
| assert out.shape == (INT_OVERFLOW + 2, 4 + 2) |
| |
| |
| @use_np |
| @pytest.mark.skip(reason='times out (20 mins)') |
| def test_symmetric_padding(): |
| inp = create_2d_np_tensor(rows=INT_OVERFLOW, columns=4, dtype=np.int64) |
| out = np.pad(inp, ((1, 1), (1, 1)), "symmetric") |
| assert out[0][0] == 0 |
| assert out[-1][-1] == INT_OVERFLOW - 1 |
| assert out.shape == (INT_OVERFLOW + 2, 4 + 2) |
| |
| |
| @use_np |
| def test_fill_diagonal(): |
| # test 2d square matrix case |
| N = 2**16 |
| data1 = np.zeros((N, N)) |
| np.fill_diagonal(data1, [1, 2, 3, 4]) |
| assert data1[0, 0] == 1 and data1[-1, -1] == 4 |
| # test 2d long matrix case with wrap |
| data2 = np.zeros((INT_OVERFLOW, 2)) |
| np.fill_diagonal(data2, [1, 2], wrap=True) |
| assert data2[0, 0] == 1 and data2[-1, -1] == 2 |
| |
| |
| @use_np |
| def test_insert(): |
| inp = np.zeros((INT_OVERFLOW, 2)) |
| inp2 = np.ones((INT_OVERFLOW)) |
| inp2[-1] = 2 |
| inp3 = inp.flatten() |
| out = np.insert(inp, 1, inp2, axis=1) |
| out2 = np.insert(inp3, slice(1, 2), np.array([5, 6])) |
| assert out.shape == (INT_OVERFLOW, 3) |
| assert out2.shape == (INT_OVERFLOW * 2 + 2,) |
| assert out[0, 1] == 1 and out[-1, 1] == 2 |
| assert out2[1] == 5 and out2[2] == 6 |
| assertRaises(MXNetError, np.insert, arr=inp3, obj=np.array([2, 2], dtype=np.int64), values=np.array([5, 6])) |
| |
| |
| @use_np |
| def test_moveaxis(): |
| inp = np.zeros((2, 1, INT_OVERFLOW)) |
| inp[0, 0, -1], inp[1, 0, -1] = 1, 2 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = np.moveaxis(inp, 2, 0) |
| out.backward() |
| assert out.shape == (INT_OVERFLOW, 2, 1) |
| assert out[-1, 0, 0] == 1 and out[-1, 1, 0] == 2 |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[-1, -1, -1] == 1 |
| |
| |
| @use_np |
| def test_newaxis(): |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[-1, -1] = 1 |
| out1 = inp[np.newaxis, :, :] |
| assert out1.shape == (1, 2, INT_OVERFLOW) |
| assert out1[0, -1, -1] == 1 |
| out1 = out1[:, :, :, np.newaxis] |
| assert out1.shape == (1, 2, INT_OVERFLOW, 1) |
| assert out1[0, -1, -1, 0] == 1 |
| |
| |
| @use_np |
| def test_triu_indices(): |
| N = 2**16 |
| data = np.triu_indices(N, 1) |
| assert data[0].shape == (((1 + (N-1)) * (N-1) / 2), ) |
| assert data[0][-1] == N - 2 and data[1][-1] == N - 1 |
| |
| |
| @use_np |
| def test_triu_indices_from(): |
| N = 2**16 |
| arr = np.zeros((N, N)) |
| data = np.triu_indices_from(arr, 1) |
| assert data[0].shape == (((1 + (N-1)) * (N-1) / 2), ) |
| assert data[0][-1] == N - 2 and data[1][-1] == N - 1 |
| |
| |
| @use_np |
| def test_empty(): |
| data = np.empty((2, INT_OVERFLOW), dtype='float64') |
| data = data + 1 |
| assert data.shape == (2, INT_OVERFLOW) |
| assert data[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_shape_reshape(): |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[0, -1] = 1 |
| assert np.shape(inp) == (2, INT_OVERFLOW) |
| out = np.reshape(inp, (INT_OVERFLOW, 2)) |
| assert np.shape(inp) == (2, INT_OVERFLOW) |
| assert np.shape(out) == (INT_OVERFLOW, 2) |
| assert out[HALF_INT_OVERFLOW-1, 1] == 1 |
| |
| |
| @use_np |
| def test_copy(): |
| inp = np.zeros((2, INT_OVERFLOW)) |
| inp[1, -1] = 2 |
| out = np.copy(inp) |
| out[0, -1] = 3 |
| assert out.shape == inp.shape |
| assert inp[0, -1] == 0 and inp[1, -1] == 2 |
| assert out[0, -1] == 3 and inp[1, -1] == 2 |
| |
| |
| @use_np |
| def test_broadcast_arrays(): |
| inp1 = np.ones((INT_OVERFLOW)) |
| inp1[-1] = 2 |
| inp2 = np.array([[3], [4]]) |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.broadcast_arrays(inp1, inp2) |
| out[0].backward() |
| out[1].backward() |
| assert out[0].shape == (2, INT_OVERFLOW) |
| assert out[0][-1, -1] == 2 |
| assert out[1].shape == (2, INT_OVERFLOW) |
| assert out[1][0, -1] == 3 and out[1][1, -1] == 4 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1] == 2 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1, -1] == INT_OVERFLOW |
| |
| |
| @use_np |
| def test_inner(): |
| inp1 = np.ones((INT_OVERFLOW)) |
| inp2 = np.zeros((INT_OVERFLOW)) |
| inp2[-1] = 3 |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.inner(inp1, inp2) |
| out.backward() |
| assert out.shape == () |
| assert out == 3 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1] == 3 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1] == 1 |
| |
| |
| @use_np |
| def test_matmul(): |
| inp1 = np.ones((1, 2, INT_OVERFLOW), dtype='float64') |
| inp2 = np.ones((INT_OVERFLOW, 1), dtype='float64') |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.matmul(inp1, inp2) |
| out.backward() |
| assert out.shape == (1, 2, 1) |
| assert out[0, 0, 0] == INT_OVERFLOW |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1, -1] == 1 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1, -1] == 2 |
| |
| |
| @use_np |
| def test_outer(): |
| inp1 = np.ones((INT_OVERFLOW), dtype='float64') |
| inp1[-1] = 2 |
| inp2 = np.ones((2), dtype='float64') |
| inp2[-1] = 3 |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.outer(inp1, inp2) |
| out.backward() |
| assert out.shape == (INT_OVERFLOW, 2) |
| assert out[-1, 0] == 2 and out[0, -1] == 3 and out[-1, -1] == 6 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[0] == 2 + 3 - 1 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[0] == INT_OVERFLOW + 2 - 1 |
| |
| |
| @use_np |
| def test_tensordot(): |
| inp1 = np.ones((1, INT_OVERFLOW, 2), dtype='float64') |
| inp1[0, -1, 1] = 2 |
| inp2 = np.ones((INT_OVERFLOW, 1, 1), dtype='float64') |
| inp2[-1, 0, 0] = 3 |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.tensordot(inp1, inp2, axes=[[0, 1], [1, 0]]) |
| out.backward() |
| assert out.shape == (2, 1) |
| assert out[0] == INT_OVERFLOW + 3 - 1 and out[1] == INT_OVERFLOW + 6 - 1 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1, -1] == 3 and inp1.grad[0, 0, 0] == 1 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1, -1, -1] == 3 and inp2.grad[0, 0, 0] == 2 |
| |
| |
| @use_np |
| def test_vdot(): |
| inp1 = np.ones((2, INT_OVERFLOW)) |
| inp2 = np.zeros((INT_OVERFLOW, 2)) |
| inp1[0, -1] = 2 |
| inp2[HALF_INT_OVERFLOW-1, 1] = 3 |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.vdot(inp1, inp2) |
| out.backward() |
| assert out.shape == () |
| assert out == 6 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[0, -1] == 3 and inp1.grad[-1, -1] == 0 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[HALF_INT_OVERFLOW-1, 1] == 2 and inp2.grad[-1, -1] == 1 |
| |
| |
| @use_np |
| def test_dot(): |
| inp1 = np.zeros((1, INT_OVERFLOW)) |
| inp2 = np.zeros((INT_OVERFLOW, 1)) |
| inp1[-1, -1] = 2 |
| inp2[-1, -1] = 3 |
| inp1.attach_grad() |
| inp2.attach_grad() |
| with mx.autograd.record(): |
| out = np.dot(inp1, inp2) |
| out.backward() |
| assert out.shape == (1, 1) |
| assert out[0, 0] == 6 |
| assert inp1.grad.shape == inp1.shape |
| assert inp1.grad[-1, -1] == 3 |
| assert inp2.grad.shape == inp2.shape |
| assert inp2.grad[-1, -1] == 2 |
| |
| |
| @use_np |
| def test_convolution(): |
| dim = 2 |
| batch_size = 1 |
| channel = 3 |
| height = SMALL_Y |
| width = LARGE_X // 3 |
| num_filter = 4 |
| kernel = (3,) * dim # => shape = (3, 3) |
| |
| inp=mx.np.ones(shape=(batch_size, channel, height, width)) |
| weight = mx.np.ones(shape=(num_filter, channel, kernel[0], kernel[1])) |
| bias = mx.np.array(num_filter,) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = mx.npx.convolution(data=inp, weight=weight, num_filter=num_filter, \ |
| kernel=kernel, stride=(SMALL_Y, SMALL_Y), no_bias=True) |
| assert out.shape == (batch_size, channel + 1, 1, (width + SMALL_Y - 1)// SMALL_Y) |
| assert out[0][0][0][0] == channel * kernel[0] * kernel[1] |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0][0][0][0] == 0 |
| |
| |
| @use_np |
| def test_deconvolution(): |
| dim = 2 |
| batch_size = 1 |
| channel = 3 |
| height = SMALL_Y |
| width = LARGE_X // 5 |
| num_filter = 4 |
| kernel = (4,) * dim # => shape = (3, 3) |
| |
| inp=mx.np.ones(shape=(batch_size, channel, 1, width)) |
| weight = mx.np.ones(shape=(channel, num_filter, kernel[0], kernel[1])) |
| bias = mx.np.array(num_filter,) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = mx.npx.deconvolution(data=inp, weight=weight, num_filter=num_filter, \ |
| kernel=kernel, no_bias=True) |
| assert out.shape == (batch_size, channel + 1, kernel[0], width + 3) |
| assert out[0][0][kernel[0]//2][kernel[1]] == channel * num_filter |
| assert inp.grad.shape == inp.shape |
| assert inp.grad[0][0][0][0] == 0 |
| |
| |
| @use_np |
| def test_dropout(): |
| shape = (LARGE_X, SMALL_Y) |
| inp = mx.np.ones(shape=shape) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = npx.dropout(inp, p=0.5, cudnn_off=True) |
| out.backward() |
| assert out.shape == shape |
| assert _np.count_nonzero(out[0] == 2) != 0 |
| assert inp.grad.shape == shape |
| assert _np.count_nonzero(inp.grad[0] == 2) != 0 |
| |
| |
| @use_np |
| def test_log_softmax(): |
| LOG_SOFTMAX_VAL = -18.420681 |
| ndim = 2 |
| shape = (LARGE_X, SMALL_Y) |
| axis = 1 |
| inp = np.ones(shape=shape) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = npx.log_softmax(inp, axis=0) |
| out.backward() |
| assert out.shape == shape |
| assert_almost_equal(out[-1][-1], LOG_SOFTMAX_VAL, atol=1e-3, rtol=1e-3) |
| assert inp.grad.shape == shape |
| assert_almost_equal(inp.grad[0][0], 0.0, atol=1e-3, rtol=1e-3) |
| |
| |
| @use_np |
| def test_relu(): |
| shape = (LARGE_X, SMALL_Y) |
| inp = np.ones(shape) |
| inp[:, shape[1] // 2:shape[1]] = -1 |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = npx.relu(inp) |
| out.backward() |
| assert out.shape == shape |
| assert out[0][0] == 1 |
| assert out[-1][-1] == 0 |
| assert inp.grad.shape == shape |
| assert inp.grad[0][0] == 1 |
| |
| |
| @use_np |
| def test_leaky_relu(): |
| inp = -1 * mx.np.ones(shape=(LARGE_X, SMALL_Y)) |
| inp.attach_grad() |
| |
| def check_leaky(): |
| with mx.autograd.record(): |
| res = mx.npx.leaky_relu(inp, act_type="leaky", slope=0.3) |
| res.backward() |
| assert_almost_equal(res[-1][-1], 0.3 * inp[-1][-1], atol=1e-3, rtol=1e-3) |
| assert_almost_equal(inp.grad[0][-1], -0.3 * inp[-1][-1], atol=1e-3, rtol=1e-3) |
| |
| def check_elu(): |
| with mx.autograd.record(): |
| res = mx.npx.leaky_relu(inp, act_type="elu", slope=0.3) |
| res.backward() |
| assert_almost_equal(res[-1][-1], 0.3*(_np.exp(inp[-1][-1])-1), atol=1e-3, rtol=1e-3) |
| assert_almost_equal(inp.grad[-1][0], 0.3*_np.exp(inp[-1][-1]), atol=1e-3, rtol=1e-3) |
| |
| def check_selu(): |
| lam = 1.0507009873554804934193349852946 |
| alpha = 1.6732632423543772848170429916717 |
| with mx.autograd.record(): |
| res = mx.npx.leaky_relu(inp, act_type="selu") |
| res.backward() |
| assert_almost_equal(res[-1][-1], (lam * alpha * (_np.exp(inp[-1][-1])-1)), atol=1e-3, rtol=1e-3) |
| assert_almost_equal(inp.grad[0][0], 0.590423, atol=1e-3, rtol=1e-3) |
| |
| check_leaky() |
| check_elu() |
| check_selu() |
| |
| |
| @use_np |
| def test_norm(): |
| shape = (LARGE_X * 2, SMALL_Y // 2) |
| inp = np.ones(shape) |
| inp.attach_grad() |
| with mx.autograd.record(): |
| out = npx.norm(inp, ord=2, axis=1) |
| out.backward() |
| assert out.shape == (shape[0],) |
| assert out[shape[0] - 1] == 5 |
| assert inp.grad.shape == shape |
| assert_almost_equal(inp.grad[0][0], 1/5, atol=1e-3, rtol=1e-3) |
| |
| |
| @use_np |
| def test_embedding(): |
| inp = np.arange(0, 4, dtype=np.int32).reshape(2, 2) |
| vec = np.ones((4, INT_OVERFLOW)) |
| vec[0][INT_OVERFLOW-1] = 3 |
| vec[1][INT_OVERFLOW//2] = 2 |
| vec[2][1] = 2 |
| vec[3][0] = 0 |
| out = npx.embedding(inp, vec, 4, INT_OVERFLOW) |
| assert out[0][0][INT_OVERFLOW-1] == 3 |
| assert out[0][0][0] == 1 |
| assert out[0][1][INT_OVERFLOW//2] == 2 |
| assert out[0][1][1] == 1 |
| assert out[1][0][1] == 2 |
| assert out[1][0][INT_OVERFLOW//2] == 1 |
| assert out[1][1][0] == 0 |
| assert out[1][1][INT_OVERFLOW-1] == 1 |