blob: fe2df9e7ad1044f2555e0ac06c49026143898371 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: skip-file
from __future__ import absolute_import
import sys
import unittest
import itertools
import numpy as _np
import platform
import mxnet as mx
import scipy.stats as ss
from nose.tools import assert_raises
from mxnet import np, npx
from mxnet.gluon import HybridBlock
from mxnet.base import MXNetError
from mxnet.test_utils import same, assert_almost_equal, rand_shape_nd, rand_ndarray
from mxnet.test_utils import check_numeric_gradient, use_np, collapse_sum_like
from common import assertRaises, with_seed
import random
from mxnet.test_utils import verify_generator, gen_buckets_probs_with_ppf
from mxnet.numpy_op_signature import _get_builtin_op
from mxnet.test_utils import is_op_runnable, has_tvm_ops
from mxnet.operator import get_all_registered_operators
@with_seed()
@use_np
def test_np_tensordot():
class TestTensordot(HybridBlock):
def __init__(self, axes):
super(TestTensordot, self).__init__()
self._axes = axes
def hybrid_forward(self, F, a, b):
return F.np.tensordot(a, b, self._axes)
def tensordot_backward(a, b, axes=2):
if (a.ndim < 1) or (b.ndim < 1):
raise ValueError('An input is zero-dim')
if _np.isscalar(axes):
a_axes_summed = [i + a.ndim - axes for i in range(axes)]
b_axes_summed = [i for i in range(axes)]
else:
if len(axes) != 2:
raise ValueError('Axes must consist of two arrays.')
a_axes_summed, b_axes_summed = axes
if _np.isscalar(a_axes_summed):
a_axes_summed = a_axes_summed,
if _np.isscalar(b_axes_summed):
b_axes_summed = b_axes_summed,
for i in range(len(a_axes_summed)):
a_axes_summed[i] = (a_axes_summed[i] + a.ndim) % a.ndim
for i in range(len(b_axes_summed)):
b_axes_summed[i] = (b_axes_summed[i] + b.ndim) % b.ndim
if len(a_axes_summed) != len(b_axes_summed):
raise ValueError('Axes length mismatch')
a_axes_remained = []
for i in range(a.ndim):
if not (i in a_axes_summed):
a_axes_remained.append(i)
a_axes = a_axes_remained[:] + a_axes_summed[:]
b_axes_remained = []
for i in range(b.ndim):
if not (i in b_axes_summed):
b_axes_remained.append(i)
b_axes = b_axes_summed[:] + b_axes_remained[:]
ad1 = _np.prod([a.shape[i] for i in a_axes_remained]) if len(a_axes_remained) > 0 else 1
ad2 = _np.prod([a.shape[i] for i in a_axes_summed]) if len(a_axes_summed) > 0 else 1
bd1 = _np.prod([b.shape[i] for i in b_axes_summed]) if len(b_axes_summed) > 0 else 1
bd2 = _np.prod([b.shape[i] for i in b_axes_remained]) if len(b_axes_remained) > 0 else 1
out_grad = _np.ones((ad1, bd2))
new_a = _np.transpose(a, a_axes)
new_a_shape = new_a.shape[:]
new_a = new_a.reshape((ad1, ad2))
new_b = _np.transpose(b, b_axes)
new_b_shape = new_b.shape[:]
new_b = new_b.reshape((bd1, bd2))
reverse_a_axes = [0 for i in a_axes]
for i in range(len(a_axes)):
reverse_a_axes[a_axes[i]] = i
reverse_b_axes = [0 for i in b_axes]
for i in range(len(b_axes)):
reverse_b_axes[b_axes[i]] = i
grad_b = _np.dot(new_a.T, out_grad).reshape(new_b_shape)
grad_b = _np.transpose(grad_b, reverse_b_axes)
grad_a = _np.dot(out_grad, new_b.T).reshape(new_a_shape)
grad_a = _np.transpose(grad_a, reverse_a_axes)
return [grad_a, grad_b]
# test non zero size input
tensor_shapes = [
((3, 5), (5, 4), 1), # (a_shape, b_shape, axes)
((3,), (3,), 1),
((3, 4, 5, 3, 2), (5, 3, 2, 1, 2), 3),
((3, 5, 4, 3, 2), (2, 3, 5, 1, 2), [[1, 3, 4], [2, 1, 0]]),
((3, 5, 4), (5, 4, 3), [[1, 0, 2], [0, 2, 1]]),
((3, 5, 4), (5, 3, 4), [[2, 0], [-1, -2]]),
((2, 2), (2, 2), 2),
((3, 5, 4), (5, ), [[-2], [0]]),
((3, 5, 4), (5, ), [[1], [0]]),
((2,), (2, 3), 1),
((3,), (3,), 0),
((2,), (2, 3), 0),
((3, 5, 4), (5, ), 0),
((2, 3, 4), (4, 3, 2), [[], []]),
((3, 0), (0, 5), 1),
((3, 0), (0, 4), [[1], [0]]),
((0, 3), (3, 5), 1),
((0, 3), (5, 0), [[0], [1]])
]
for hybridize in [True, False]:
for a_shape, b_shape, axes in tensor_shapes:
for dtype in [_np.float32, _np.float64]:
test_tensordot = TestTensordot(axes)
if hybridize:
test_tensordot.hybridize()
a = rand_ndarray(shape = a_shape, dtype = dtype).as_np_ndarray()
b = rand_ndarray(shape = b_shape, dtype = dtype).as_np_ndarray()
a.attach_grad()
b.attach_grad()
np_out = _np.tensordot(a.asnumpy(), b.asnumpy(), axes)
with mx.autograd.record():
mx_out = test_tensordot(a, b)
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out, rtol = 1e-3, atol = 1e-5)
mx_out.backward()
np_backward = tensordot_backward(a.asnumpy(), b.asnumpy(), axes)
assert_almost_equal(a.grad.asnumpy(), np_backward[0], rtol = 1e-3, atol=1e-5)
assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol = 1e-3, atol=1e-5)
# Test imperative once again
mx_out = np.tensordot(a, b, axes)
np_out = _np.tensordot(a.asnumpy(), b.asnumpy(), axes)
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
# test numeric gradient
if (_np.prod(a_shape) > 0 and _np.prod(b_shape) > 0):
a_sym = mx.sym.Variable("a").as_np_ndarray()
b_sym = mx.sym.Variable("b").as_np_ndarray()
mx_sym = mx.sym.np.tensordot(a_sym, b_sym, axes).as_nd_ndarray()
check_numeric_gradient(mx_sym, [a.as_nd_ndarray(), b.as_nd_ndarray()],
rtol=1e-1, atol=1e-1, dtype = dtype)
@with_seed()
@use_np
def test_np_dot():
shapes = [
((3, 0), (0, 4)),
((3,), (3,)), # Case 1
((3, 4), (4, 5)), # Case 2
((), ()), # Case 3
((3, 4, 5), ()), # Case 3.5.1
((), (3, 4, 5)), # Case 3.5.2
((3, 4, 5), (5, )), # Case 4
((3, 4, 5), (5, 2)), # Case 5
((5,), (5, 2)),
((3, 5, 4), (5, 4, 3)),
((3, 4), (5, 4, 3)),
((4,), (5, 4, 3))
]
eps = 1e-3
for shape_a, shape_b in shapes:
np_a = _np.random.uniform(-1.0, 1.0, shape_a)
np_a[abs(np_a) < eps] = 2 * eps
np_b = _np.random.uniform(-1.0, 1.0, shape_b)
np_b[abs(np_b) < eps] = 2 * eps
a = mx.nd.array(np_a)
b = mx.nd.array(np_b)
np_res = _np.dot(np_a, np_b)
mx_res = np.dot(a.as_np_ndarray(), b.as_np_ndarray())
assert mx_res.shape == np_res.shape
assert_almost_equal(np_res, mx_res.asnumpy(), rtol=1e-5, atol=1e-5)
mx_a = mx.sym.Variable("a")
mx_b = mx.sym.Variable("b")
mx_sym = mx.sym.np.dot(mx_a.as_np_ndarray(), mx_b.as_np_ndarray()).as_nd_ndarray()
if (len(shape_a) > 0 and len(shape_b) > 0 and _np.prod(shape_a) > 0 and _np.prod(shape_b) > 0):
check_numeric_gradient(mx_sym, {"a": a, "b": b}, numeric_eps=eps, rtol=1e-2, atol=1e-3)
bad_shapes = [((4, 5), (2, 3)), ((3, 4, 5), (6, ))]
for shape_a, shape_b in bad_shapes:
a = mx.nd.array(random.random()) if len(shape_a) == 0 else rand_ndarray(shape_a)
b = mx.nd.array(random.random()) if len(shape_b) == 0 else rand_ndarray(shape_b)
try:
mx_res = np.dot(a.as_np_ndarray(), b.as_np_ndarray())
except mx.base.MXNetError:
continue
assert False
@with_seed()
@use_np
def test_np_vdot():
class TestVdot(HybridBlock):
def __init__(self):
super(TestVdot, self).__init__()
def hybrid_forward(self, F, a, b):
return F.np.vdot(a, b)
def vdot_backward(a, b):
return [b, a]
# test different size inputs
tensor_shapes = [(), (5,), (3, 3)]
for hybridize in [True, False]:
for shape in tensor_shapes:
for dtype in [_np.float32, _np.float64]:
test_vdot = TestVdot()
if hybridize:
test_vdot.hybridize()
a = rand_ndarray(shape=shape, dtype=dtype).as_np_ndarray()
b = rand_ndarray(shape=shape, dtype=dtype).as_np_ndarray()
a.attach_grad()
b.attach_grad()
np_out = _np.vdot(a.asnumpy(), b.asnumpy())
with mx.autograd.record():
mx_out = test_vdot(a, b)
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out, rtol = 1e-3, atol = 1e-5)
mx_out.backward()
np_backward = vdot_backward(a.asnumpy(), b.asnumpy())
assert_almost_equal(a.grad.asnumpy(), np_backward[0], rtol = 1e-2, atol=1e-2)
assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol = 1e-2, atol=1e-2)
# Test imperative once again
mx_out = np.vdot(a, b)
np_out = _np.vdot(a.asnumpy(), b.asnumpy())
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
# test numeric gradient
if len(shape) > 0 and _np.prod(shape) > 0:
a_sym = mx.sym.Variable("a").as_np_ndarray()
b_sym = mx.sym.Variable("b").as_np_ndarray()
mx_sym = mx.sym.np.vdot(a_sym, b_sym).as_nd_ndarray()
check_numeric_gradient(mx_sym, [a.as_nd_ndarray(), b.as_nd_ndarray()],
rtol=1e-1, atol=1e-1, dtype=dtype)
@with_seed()
@use_np
def test_np_inner():
class TestInner(HybridBlock):
def __init__(self):
super(TestInner, self).__init__()
def hybrid_forward(self, F, a, b):
return F.np.inner(a, b)
def inner_backward(a, b):
a_axes_summed = [a.ndim - 1]
b_axes_summed = [b.ndim - 1]
a_axes_remained = []
for i in range(a.ndim):
if not (i in a_axes_summed):
a_axes_remained.append(i)
a_axes = a_axes_remained[:] + a_axes_summed[:]
b_axes_remained = []
for i in range(b.ndim):
if not (i in b_axes_summed):
b_axes_remained.append(i)
b_axes = b_axes_summed[:] + b_axes_remained[:]
ad1 = _np.prod([a.shape[i] for i in a_axes_remained]) if len(a_axes_remained) > 0 else 1
ad2 = _np.prod([a.shape[i] for i in a_axes_summed]) if len(a_axes_summed) > 0 else 1
bd1 = _np.prod([b.shape[i] for i in b_axes_summed]) if len(b_axes_summed) > 0 else 1
bd2 = _np.prod([b.shape[i] for i in b_axes_remained]) if len(b_axes_remained) > 0 else 1
out_grad = _np.ones((ad1, bd2))
new_a = _np.transpose(a, a_axes)
new_a_shape = new_a.shape[:]
new_a = new_a.reshape((ad1, ad2))
new_b = _np.transpose(b, b_axes)
new_b_shape = new_b.shape[:]
new_b = new_b.reshape((bd1, bd2))
reverse_a_axes = [0 for i in a_axes]
for i in range(len(a_axes)):
reverse_a_axes[a_axes[i]] = i
reverse_b_axes = [0 for i in b_axes]
for i in range(len(b_axes)):
reverse_b_axes[b_axes[i]] = i
grad_b = _np.dot(new_a.T, out_grad).reshape(new_b_shape)
grad_b = _np.transpose(grad_b, reverse_b_axes)
grad_a = _np.dot(out_grad, new_b.T).reshape(new_a_shape)
grad_a = _np.transpose(grad_a, reverse_a_axes)
return [grad_a, grad_b]
# test non zero size input
tensor_shapes = [
((3,), (3,)),
((2, 3), (3,)),
((3,), (2, 3))
]
for hybridize in [True, False]:
for a_shape, b_shape in tensor_shapes:
for dtype in [_np.float32, _np.float64]:
test_inner = TestInner()
if hybridize:
test_inner.hybridize()
a = rand_ndarray(shape=a_shape, dtype=dtype).as_np_ndarray()
b = rand_ndarray(shape=b_shape, dtype=dtype).as_np_ndarray()
a.attach_grad()
b.attach_grad()
np_out = _np.inner(a.asnumpy(), b.asnumpy())
with mx.autograd.record():
mx_out = test_inner(a, b)
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out, rtol = 1e-3, atol = 1e-5)
mx_out.backward()
np_backward = inner_backward(a.asnumpy(), b.asnumpy())
assert_almost_equal(a.grad.asnumpy(), np_backward[0], rtol = 1e-2, atol=1e-2)
assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol = 1e-2, atol=1e-2)
# Test imperative once again
mx_out = np.inner(a, b)
np_out = _np.inner(a.asnumpy(), b.asnumpy())
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
# test numeric gradient
a_sym = mx.sym.Variable("a").as_np_ndarray()
b_sym = mx.sym.Variable("b").as_np_ndarray()
mx_sym = mx.sym.np.inner(a_sym, b_sym).as_nd_ndarray()
check_numeric_gradient(mx_sym, [a.as_nd_ndarray(), b.as_nd_ndarray()],
rtol=1e-1, atol=1e-1, dtype=dtype)
@with_seed()
@use_np
def test_np_outer():
class TestOuter(HybridBlock):
def __init__(self):
super(TestOuter, self).__init__()
def hybrid_forward(self, F, a, b):
return F.np.outer(a, b)
# test non zero size input
tensor_shapes = [
((3,), (3,)),
((2, 3), (6,)),
((6,), (2, 3))
]
for hybridize in [True, False]:
for a_shape, b_shape in tensor_shapes:
for dtype in [_np.float32, _np.float64]:
test_outer = TestOuter()
if hybridize:
test_outer.hybridize()
a = rand_ndarray(shape=a_shape, dtype=dtype).as_np_ndarray()
b = rand_ndarray(shape=b_shape, dtype=dtype).as_np_ndarray()
a.attach_grad()
b.attach_grad()
np_out = _np.outer(a.asnumpy(), b.asnumpy())
with mx.autograd.record():
mx_out = test_outer(a, b)
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
mx_out.backward()
# Test imperative once again
mx_out = np.outer(a, b)
np_out = _np.outer(a.asnumpy(), b.asnumpy())
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
# test numeric gradient
a_sym = mx.sym.Variable("a").as_np_ndarray()
b_sym = mx.sym.Variable("b").as_np_ndarray()
mx_sym = mx.sym.np.outer(a_sym, b_sym).as_nd_ndarray()
check_numeric_gradient(mx_sym, [a.as_nd_ndarray(), b.as_nd_ndarray()],
rtol=1e-1, atol=1e-1, dtype=dtype)
@with_seed()
@use_np
def test_np_sum():
class TestSum(HybridBlock):
def __init__(self, axis=None, dtype=None, keepdims=False):
super(TestSum, self).__init__()
self._axis = axis
self._dtype = dtype
self._keepdims = keepdims
def hybrid_forward(self, F, a, *args, **kwargs):
return F.np.sum(a, axis=self._axis, dtype=self._dtype, keepdims=self._keepdims)
def is_int(dtype):
return 'int' in dtype
in_data_dim = random.choice([2, 3, 4])
shape = rand_shape_nd(in_data_dim, dim=3)
acc_type = {'float16': 'float32', 'float32': 'float64', 'float64': 'float64',
'int8': 'int32', 'int32': 'int64', 'int64': 'int64', 'bool': 'int64'}
is_windows = sys.platform.startswith('win')
for hybridize in [False, True]:
for keepdims in [True, False]:
for axis in ([i for i in range(in_data_dim)] + [(), None]):
for itype in ['float16', 'float32', 'float64', 'int8', 'int32', 'int64', 'bool']:
for dtype in ['float16', 'float32', 'float64', 'int8', 'int32', 'int64']:
if (is_int(dtype) and not is_int(itype)) or (is_windows and is_int(itype))\
or (itype == 'bool' and\
(dtype not in ('float32', 'float64', 'int32', 'int64') or is_windows)):
continue
# test gluon
test_sum = TestSum(axis=axis, dtype=dtype, keepdims=keepdims)
if hybridize:
test_sum.hybridize()
if is_int(itype):
x = _np.random.randint(-128, 128, shape, dtype=itype)
x = np.array(x)
elif itype == 'bool':
x = _np.random.randint(0, 2, shape) < 1
x = np.array(x, dtype='bool')
else:
x = np.random.uniform(-1.0, 1.0, size=shape, dtype=itype)
expected_ret = _np.sum(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims)
expected_ret = expected_ret.astype(dtype)
if itype == 'bool':
if is_op_runnable() and (not is_windows): # special handling of boolean ndarray
y = test_sum(x)
assert y.dtype == expected_ret.dtype
assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-4, atol=1e-5,
use_broadcast=False)
continue
x.attach_grad()
with mx.autograd.record():
y = test_sum(x)
assert y.shape == expected_ret.shape
assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if dtype == 'float16' else 1e-3,
atol=1e-5 if dtype == 'float16' else 1e-5, use_broadcast=False)
y.backward()
assert same(x.grad.asnumpy(), _np.ones(shape=x.shape, dtype=x.dtype))
# test numeric
if itype == 'float32' and dtype == 'float32':
x_sym = mx.sym.Variable("x").as_np_ndarray()
mx_sym = mx.sym.np.sum(x_sym, axis=axis, dtype=dtype, keepdims=keepdims).as_nd_ndarray()
check_numeric_gradient(mx_sym, [x.as_nd_ndarray()],
numeric_eps=1e-3, rtol=1e-2, atol=1e-3, dtype=_np.float32)
# test imperative
mx_out = np.sum(x, axis=axis, dtype=dtype, keepdims=keepdims)
np_out = _np.sum(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims).astype(dtype)
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False)
@with_seed()
@use_np
def test_np_max_min():
class TestMax(HybridBlock):
def __init__(self, axis=None, keepdims=False):
super(TestMax, self).__init__()
self._axis = axis
self._keepdims = keepdims
def hybrid_forward(self, F, a, *args, **kwargs):
return a.max(axis=self._axis, keepdims=self._keepdims)
class TestMin(HybridBlock):
def __init__(self, axis=None, keepdims=False):
super(TestMin, self).__init__()
self._axis = axis
self._keepdims = keepdims
def hybrid_forward(self, F, a, *args, **kwargs):
return a.min(axis=self._axis, keepdims=self._keepdims)
def is_int(dtype):
return 'int' == dtype
def get_grad(axis, func_name):
index = -1 if func_name == 'max' else 0
if axis == ():
return _np.ones((2,3,4,5))
else:
temp = _np.zeros((2,3,4,5))
if axis == 0:
temp[index,:,:,:] = 1
return temp
elif axis == 1:
temp[:,index,:,:] = 1
return temp
elif axis == 2:
temp[:,:,index,:] = 1
return temp
elif axis == 3:
temp[:,:,:,index] = 1
return temp
elif not axis:
temp[index,index,index,index] = 1
return temp
raise ValueError('axis should be int or None or ()')
def _test_np_exception(func, shape, dim):
x = np.random.uniform(-1.0, 1.0, shape)
out = getattr(x, func)()
assert out.ndim == dim, 'dimension mismatch, output.ndim={}, dim={}'.format(output.ndim, dim)
in_data_dim = random.choice([2, 3, 4])
shape = rand_shape_nd(in_data_dim, dim=3)
for func in ['max', 'min']:
for hybridize in [False, True]:
for keepdims in [True, False]:
for axis in ([i for i in range(in_data_dim)] + [(), None]):
for itype in ['float16', 'float32', 'float64', 'int']:
# test gluon
if func == 'max':
test_gluon = TestMax(axis=axis, keepdims=keepdims)
else:
test_gluon = TestMin(axis=axis, keepdims=keepdims)
if hybridize:
test_gluon.hybridize()
if is_int(itype):
x = np.arange(120).reshape((2, 3, 4, 5))
else:
x = np.random.uniform(-1.0, 1.0, size=shape, dtype=itype)
x.attach_grad()
if func == 'max':
expected_ret = _np.amax(x.asnumpy(), axis=axis, keepdims=keepdims)
else:
expected_ret = _np.amin(x.asnumpy(), axis=axis, keepdims=keepdims)
with mx.autograd.record():
y = test_gluon(x)
assert y.shape == expected_ret.shape
assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if itype == 'float16' else 1e-3,
atol=1e-5 if itype == 'float16' else 1e-5)
y.backward()
# only check the gradient with hardcoded input
if is_int(itype):
assert same(x.grad.asnumpy(), get_grad(axis, func)), \
'x={}\ny={}\nx.grad={}\nnumpy={}'.format(x.asnumpy(), y.asnumpy(), x.grad.asnumpy(), get_grad(axis))
# test imperative
if func == 'max':
mx_out = np.max(x, axis=axis, keepdims=keepdims)
np_out = _np.amax(x.asnumpy(), axis=axis, keepdims=keepdims)
else:
mx_out = np.min(x, axis=axis, keepdims=keepdims)
np_out = _np.amin(x.asnumpy(), axis=axis, keepdims=keepdims)
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
# test zero and zero dim
shapes = [(), (0), (2, 0), (0, 2, 1)]
exceptions = [False, True, True, True]
dims = [0] * len(shapes)
for func in ['max', 'min']:
for shape, exception, dim in zip(shapes, exceptions, dims):
if exception:
assertRaises(MXNetError, _test_np_exception, func, shape, dim)
else:
_test_np_exception(func, shape, dim)
@with_seed()
@use_np
def test_np_mean():
class TestMean(HybridBlock):
def __init__(self, axis=None, dtype=None, keepdims=False):
super(TestMean, self).__init__()
self._axis = axis
self._dtype = dtype
self._keepdims = keepdims
def hybrid_forward(self, F, a, *args, **kwargs):
return a.mean(axis=self._axis, dtype=self._dtype, keepdims=self._keepdims)
def is_int(dtype):
return 'int' in dtype
is_windows = sys.platform.startswith('win')
in_data_dim = random.choice([2, 3, 4])
shape = rand_shape_nd(in_data_dim, dim=3)
acc_type = {'float16': 'float32', 'float32': 'float64', 'float64': 'float64',
'bool': 'int64', 'int8': 'int32', 'int32': 'int64', 'int64': 'int64'}
ft_types = ['float16', 'float32', 'float64']
it_types = ['bool', 'int8', 'int32', 'int64']
for hybridize in [False, True]:
for keepdims in [True, False]:
for axis in ([i for i in range(in_data_dim)] + [(), None]):
for itype, dtype in itertools.product(ft_types, [None] + ft_types + it_types):
if dtype == 'bool':
continue
# test gluon
test_mean = TestMean(axis=axis, dtype=dtype, keepdims=keepdims)
if hybridize:
test_mean.hybridize()
x = np.random.uniform(-1.0, 1.0, size=shape).astype(itype)
x = x.as_np_ndarray()
x.attach_grad()
expected_ret = _np.mean(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims)
expected_ret = expected_ret.astype(dtype)
with mx.autograd.record():
y = test_mean(x)
assert y.shape == expected_ret.shape
assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if dtype == 'float16' else 1e-3,
atol=1e-5 if dtype == 'float16' else 1e-5)
y.backward()
N = x.size / y.size
assert same(x.grad.asnumpy(), _np.ones(shape=x.shape, dtype=x.dtype) / N)
# test numeric
if itype == 'float32' and dtype == 'float32':
x_sym = mx.sym.Variable("x").as_np_ndarray()
mx_sym = mx.sym.np.mean(x_sym, axis=axis, dtype=dtype, keepdims=keepdims).as_nd_ndarray()
check_numeric_gradient(mx_sym, [x.as_nd_ndarray()],
numeric_eps=1e-3, rtol=1e-3, atol=1e-4, dtype=_np.float32)
# test imperative
mx_out = np.mean(x, axis=axis, dtype=dtype, keepdims=keepdims)
np_out = _np.mean(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims).astype(dtype)
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
for itype, dtype in itertools.product(it_types, [None] + ft_types + it_types):
if dtype == 'bool':
continue
# test gluon
test_mean = TestMean(axis=axis, dtype=dtype, keepdims=keepdims)
if hybridize:
test_mean.hybridize()
if itype == 'bool':
x = np.array(_np.random.uniform(size=shape) > 0.5)
else:
x = np.random.uniform(-128, 127, size=shape).astype(itype)
expected_ret = _np.mean(x.asnumpy(), axis=axis, dtype=dtype, keepdims=keepdims)
if itype == 'bool':
if is_op_runnable() and (not is_windows) and dtype not in ['float16', 'int8']: # special handling of boolean ndarray
y = test_mean(x)
assert y.shape == expected_ret.shape
assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if dtype == 'float16' else 1e-3,
atol=1e-5 if dtype == 'float16' else 1e-5)
continue
y = test_mean(x)
assert y.shape == expected_ret.shape
assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if dtype == 'float16' else 1e-3,
atol=1e-5 if dtype == 'float16' else 1e-5)
# test imperative
mx_out = np.mean(x, axis=axis, dtype=dtype, keepdims=keepdims)
np_out = _np.mean(x.asnumpy(), axis=axis, dtype=dtype, keepdims=keepdims).astype(dtype)
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
@with_seed()
@use_np
def test_np_moment():
class TestMoment(HybridBlock):
def __init__(self, name, axis=None, dtype=None, keepdims=False, ddof=0):
super(TestMoment, self).__init__()
self._name = name
self._axis = axis
self._dtype = dtype
self._keepdims = keepdims
self._ddof = ddof
def hybrid_forward(self, F, a, *args, **kwargs):
return getattr(a, self._name)(axis=self._axis, dtype=self._dtype, keepdims=self._keepdims, ddof=self._ddof)
def is_int(dtype):
return 'int' in dtype
def legalize_shape(shape):
shape_ = list(shape)
for i in range(len(shape_)):
shape_[i] += 1
return tuple(shape_)
in_data_dim = random.choice([2, 3, 4])
shape = rand_shape_nd(in_data_dim, dim=3)
shape = legalize_shape(shape)
acc_type = {'float16': 'float32', 'float32': 'float64', 'float64': 'float64',
'int8': 'float64', 'int32': 'float64', 'int64': 'float64'}
for name in ['var', 'std']:
for hybridize in [False, True]:
for ddof in [0, 1]:
for keepdims in [True, False]:
for axis in ([i for i in range(in_data_dim)] + [(), None]):
for itype in ['float16', 'float32', 'float64', 'int8', 'int32', 'int64']:
for dtype in ['float16', 'float32', 'float64']:
if is_int(dtype) and not is_int(itype) or is_int(itype) and is_int(dtype):
continue
atol = 3e-4 if itype == 'float16' or dtype == 'float16' else 1e-5
rtol = 1e-2 if itype == 'float16' or dtype == 'float16' else 1e-3
# test gluon
test_moment = TestMoment(name, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof)
if hybridize:
test_moment.hybridize()
if is_int(itype):
x = _np.random.randint(-16, 16, shape, dtype=itype)
x = mx.nd.array(x)
else:
x = mx.nd.random.uniform(-1.0, 1.0, shape=shape, dtype=itype)
x = x.as_np_ndarray()
x.attach_grad()
expected_ret = getattr(_np, name)(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims, ddof=ddof)
expected_ret = expected_ret.astype(dtype)
y = test_moment(x)
assert y.shape == expected_ret.shape
assert_almost_equal(y.asnumpy(), expected_ret, rtol=rtol, atol=atol, use_broadcast=False, equal_nan=True)
# test imperative
mx_out = getattr(np, name)(x, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof)
np_out = getattr(_np, name)(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims, ddof=ddof).astype(dtype)
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol, use_broadcast=False, equal_nan=True)
@with_seed()
@use_np
def test_np_shape():
shapes = [
(),
(0, 1),
(2, 3),
(2, 3, 4),
]
for shape in shapes:
mx_a = np.random.uniform(size=shape)
np_a = _np.random.uniform(size=shape)
mx_shape = np.shape(mx_a)
np_shape = _np.shape(np_a)
assert mx_shape == np_shape
@with_seed()
@use_np
def test_np_linspace():
configs = [
(0.0, 1.0, 10),
(-2, 4, 30),
(5.234324, 8.98324, 324),
(2, 10, 100)
]
exception_configs = [
(0, 10, -1),
(0, 1, 2.5)
]
dtypes = ['int32', 'float16', 'float32', 'float64', None]
for config in configs:
for dtype in dtypes:
for endpoint in [False, True]:
for retstep in [False, True]:
if isinstance(config, tuple):
mx_ret = np.linspace(*config, endpoint=endpoint, retstep=retstep, dtype=dtype)
np_ret = _np.linspace(*config, endpoint=endpoint, retstep=retstep, dtype=dtype)
else:
mx_ret = np.linspace(config, endpoint=endpoint, retstep=retstep, dtype=dtype)
np_ret = _np.linspace(config, endpoint=endpoint, retstep=retstep, dtype=dtype)
if retstep:
assert_almost_equal(mx_ret[0].asnumpy(), np_ret[0], atol=1e-3, rtol=1e-5)
same(mx_ret[1], np_ret[1])
else:
assert_almost_equal(mx_ret.asnumpy(), np_ret, atol=1e-3, rtol=1e-5)
# check for exception input
for config in exception_configs:
assertRaises(MXNetError, np.linspace, *config)
# check linspace equivalent to arange
for test_index in range(1000):
assert_almost_equal(mx.np.linspace(0, test_index, test_index + 1).asnumpy(), _np.arange(test_index + 1))
class TestLinspace(HybridBlock):
def __init__(self, start, stop, num=50, endpoint=None, retstep=False, dtype=None, axis=0):
super(TestLinspace, self).__init__()
self._start = start
self._stop = stop
self._num = num
self._endpoint = endpoint
self._retstep = retstep
self._dtype = dtype
def hybrid_forward(self, F, x):
if self._retstep:
raise ValueError("linspace didn't support retstep = True inside HybridBlock")
else:
return x + F.np.linspace(self._start, self._stop, self._num, \
self._endpoint, self._retstep, self._dtype)
for dtype in dtypes:
x = np.zeros(shape=(), dtype=dtype)
for config in configs:
for hybridize in [False, True]:
for endpoint in [False, True]:
if isinstance(config, tuple):
net = TestLinspace(*config, endpoint=endpoint, dtype=dtype)
np_out = _np.linspace(*config, endpoint=endpoint, dtype=dtype)
else:
net = TestLinspace(config, endpoint=endpoint, dtype=dtype)
np_out = _np.linspace(config, endpoint=endpoint, dtype=dtype)
if hybridize:
net.hybridize()
mx_out = net(x)
assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-3, rtol=1e-5)
@with_seed()
@use_np
def test_np_logspace():
class TestLogspace(HybridBlock):
def __init__(self, start, stop, num=50, endpoint=None, base=50.0, dtype=None, axis=0):
super(TestLogspace, self).__init__()
self._start = start
self._stop = stop
self._num = num
self._endpoint = endpoint
self._base = base
self._dtype = dtype
self.axis = axis
def hybrid_forward(self, F, x):
return x + F.np.logspace(self._start, self._stop, self._num, self._endpoint, self._base, self._dtype, self.axis)
configs = [
(0.0, 1.0, 20),
(2, 8, 0),
(22, 11, 1),
(2.22, 9.99, 11),
(4.99999, 12.11111111, 111)
]
base_configs = [0, 1, 5, 8, 10, 33]
dtypes = ['float32', 'float64', None]
for config in configs:
for dtype in dtypes:
for endpoint in [False, True]:
for hybridize in [False, True]:
for base in base_configs:
x = np.zeros(shape=(), dtype=dtype)
net = TestLogspace(*config, endpoint=endpoint, base=base, dtype=dtype)
np_out = _np.logspace(*config, endpoint=endpoint, base=base, dtype=dtype)
if hybridize:
net.hybridize()
mx_out = net(x)
assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-3, rtol=1e-5)
if dtype is not None:
assert mx_out.dtype == np_out.dtype
# Test imperative once again
mx_ret = np.logspace(*config, endpoint=endpoint, base=base, dtype=dtype)
np_ret = _np.logspace(*config, endpoint=endpoint, base=base, dtype=dtype)
assert_almost_equal(mx_ret.asnumpy(), np_ret, atol=1e-3, rtol=1e-5)
if dtype is not None:
assert mx_out.dtype == np_out.dtype
@with_seed()
@use_np
def test_npx_slice():
class TestSlice(HybridBlock):
def __init__(self, begin, end, step):
super(TestSlice, self).__init__()
self._begin = begin
self._end = end
self._step = step
def hybrid_forward(self, F, a):
return F.npx.slice(a, begin=self._begin, end=self._end, step=self._step)
shape = (8, 16, 9, 9)
np_array = _np.arange(_np.prod(shape), dtype='int32').reshape(shape)
configs = [
([], [], None),
([], [], []),
([1], [4], None),
([1], [10], [3]),
([10], [0], [-2]),
([None], [None], [None]),
([None], [None], [-1]),
([10], [None], [-1]),
([1, 0, 3], [-2, 10, -4], [None, 2, 3]),
([-2, -3, -5, -6], [1, 3, 4, 5], None),
([-2, -3, -5, -6], [1, 3, 4, 5], [-1, -2, -3, -4]),
([2, -3, -5, -6], [2, 3, 4, 5], None),
([2, -3, -5, 5], [3, 3, 4, 5], None),
]
for hybridize in [True, False]:
for config in configs:
start, end, step = config[0], config[1], config[2]
test_slice = TestSlice(begin=start, end=end, step=step)
if hybridize:
test_slice.hybridize()
a = np.array(np_array, dtype=np_array.dtype)
a.attach_grad()
basic_index = tuple([
slice(start[i], end[i], step[i]) if step is not None else slice(start[i], end[i])
for i in range(len(start))
])
expected_ret = np_array[basic_index]
with mx.autograd.record():
y = test_slice(a)
assert same(y.asnumpy(), expected_ret)
# test backward
mx.autograd.backward(y)
expected_grad = _np.zeros(shape)
expected_grad[basic_index] = 1
assert same(a.grad.asnumpy(), expected_grad)
@with_seed()
@use_np
def test_npx_batch_dot():
ctx = mx.context.current_context()
dtypes = ['float32', 'float64']
if ctx.device_type == 'gpu':
dtypes += ['float16']
eps_dict = {'float32': 1E-4, 'float64': 1E-4, 'float16': 1E-3}
class TestBatchDot(HybridBlock):
def __init__(self, transpose_a, transpose_b):
super(TestBatchDot, self).__init__()
self._transpose_a = transpose_a
self._transpose_b = transpose_b
def hybrid_forward(self, F, lhs, rhs):
return F.npx.batch_dot(lhs, rhs,
transpose_a=self._transpose_a,
transpose_b=self._transpose_b)
def batch_dot_numpy(lhs, rhs, transpose_a, transpose_b):
assert lhs.ndim == rhs.ndim >= 3
if transpose_a:
lhs = lhs.swapaxes(-1, -2)
if transpose_b:
rhs = rhs.swapaxes(-1, -2)
return _np.matmul(lhs, rhs)
def gt_grad_batch_dot_numpy(lhs, rhs, ograd, transpose_a, transpose_b, lhs_req, rhs_req,
init_lhs_grad, init_rhs_grad):
if transpose_a and transpose_b:
# Gradient of z = dot(x.T, y.T)
# dx = dot(dz, y).T = dot(y.T, dz.T)
# dy = dot(x, dz).T = dot(dz.T, x.T)
lhs_grad = batch_dot_numpy(rhs, ograd, transpose_a=True, transpose_b=True)
rhs_grad = batch_dot_numpy(ograd, lhs, transpose_a=True, transpose_b=True)
elif not transpose_a and transpose_b:
# Gradient of z = dot(x, y.T)
# dx = dot(dz, y)
# dy = dot(x.T, dz).T = dot(dz.T, x)
lhs_grad = batch_dot_numpy(ograd, rhs, transpose_a=False, transpose_b=False)
rhs_grad = batch_dot_numpy(ograd, lhs, transpose_a=True, transpose_b=False)
elif transpose_a and not transpose_b:
# Gradient of z = dot(x.T, y)
# dx = dot(dz, y.T).T = dot(y, dz.T)
# dy = dot(x, dz)
lhs_grad = batch_dot_numpy(rhs, ograd, transpose_a=False, transpose_b=True)
rhs_grad = batch_dot_numpy(lhs, ograd, transpose_a=False, transpose_b=False)
else:
# Gradient of z = dot(x, y)
# dx = dot(dz, y.T)
# dy = dot(x.T, dz)
lhs_grad = batch_dot_numpy(ograd, rhs, transpose_a=False, transpose_b=True)
rhs_grad = batch_dot_numpy(lhs, ograd, transpose_a=True, transpose_b=False)
if lhs_req == 'add':
lhs_grad += init_lhs_grad
if rhs_req == 'add':
rhs_grad += init_rhs_grad
return lhs_grad, rhs_grad
configs = [
((2, 3, 0), (2, 4, 0), False, True),
((2, 4, 3), (2, 4, 3), True, False),
((0, 3, 0), (0, 0, 2), False, False),
((3, 2, 3, 2), (3, 2, 2, 3), True, True),
((3, 1, 5, 2), (3, 1, 2, 1), False, False)
]
bad_configs = [
((5, 3, 2), (5, 1, 3), False, False),
((2, 5, 3, 1), (2, 4, 3, 1), True, False)
]
for hybridize in [True, False]:
for lhs_shape, rhs_shape, transpose_a, transpose_b in configs:
for dtype in dtypes:
eps = eps_dict[dtype]
for lhs_grad_req in ['write', 'add']:
for rhs_grad_req in ['write', 'add']:
f_batch_dot = TestBatchDot(transpose_a=transpose_a,
transpose_b=transpose_b)
if hybridize:
f_batch_dot.hybridize()
lhs_val = mx.np.array(_np.random.uniform(-1.0, 1.0, lhs_shape), dtype=dtype)
rhs_val = mx.np.array(_np.random.uniform(-1.0, 1.0, rhs_shape), dtype=dtype)
lhs_val.attach_grad(grad_req=lhs_grad_req)
rhs_val.attach_grad(grad_req=rhs_grad_req)
gt_out = batch_dot_numpy(lhs_val.asnumpy(), rhs_val.asnumpy(),
transpose_a, transpose_b)
init_lhs_grad = mx.np.random.uniform(-1.0, 1.0, lhs_shape, dtype=dtype)
init_rhs_grad = mx.np.random.uniform(-1.0, 1.0, rhs_shape, dtype=dtype)
o_grad = mx.np.random.uniform(-1.0, 1.0, gt_out.shape, dtype=dtype)
if lhs_grad_req == 'add':
lhs_val.grad[:] = init_lhs_grad
if rhs_grad_req == 'add':
rhs_val.grad[:] = init_rhs_grad
with mx.autograd.record():
out = f_batch_dot(lhs_val, rhs_val)
out.backward(o_grad)
assert_almost_equal(out.asnumpy(), gt_out, rtol=eps, atol=eps)
gt_lhs_grad, gt_rhs_grad = gt_grad_batch_dot_numpy(lhs_val.asnumpy(),
rhs_val.asnumpy(),
o_grad.asnumpy(),
transpose_a=transpose_a,
transpose_b=transpose_b,
lhs_req=lhs_grad_req,
rhs_req=rhs_grad_req,
init_lhs_grad=init_lhs_grad.asnumpy(),
init_rhs_grad=init_rhs_grad.asnumpy())
assert_almost_equal(lhs_val.grad.asnumpy(), gt_lhs_grad, rtol=eps, atol=eps)
assert_almost_equal(rhs_val.grad.asnumpy(), gt_rhs_grad, rtol=eps, atol=eps)
for lhs_shape, rhs_shape, transpose_a, transpose_b in bad_configs:
for dtype in dtypes:
lhs_val = mx.np.array(_np.random.uniform(-1.0, 1.0, lhs_shape), dtype=dtype)
rhs_val = mx.np.array(_np.random.uniform(-1.0, 1.0, rhs_shape), dtype=dtype)
assert_raises(MXNetError, lambda: mx.npx.batch_dot(lhs_val, rhs_val,
transpose_a=transpose_a,
transpose_b=transpose_b))
@with_seed()
@use_np
def test_npx_batch_norm():
momentum = 0.9
epsilon = 1e-5
class TestBatchNorm(HybridBlock):
def __init__(self, eps=1e-5, fix_gamma=False, momentum=0.9, **kwargs):
super(TestBatchNorm, self).__init__()
self.eps = eps
self.fix_gamma = fix_gamma
self.momentum = momentum
self.kwargs = kwargs
def hybrid_forward(self, F, data, bn_gamma, bn_beta,
bn_running_mean, bn_running_var):
op = F.npx.batch_norm
output = op(data, bn_gamma, bn_beta,
bn_running_mean, bn_running_var,
momentum=self.momentum, eps=self.eps,
fix_gamma=self.fix_gamma, **self.kwargs)
return output
def _test_batchnorm_impl(shape, fix_gamma, cudnn_off, output_mean_var,
axis,
data_grad_req, gamma_grad_req, beta_grad_req):
kwargs = dict(output_mean_var=output_mean_var)
kwargs.update(dict(axis=axis, cudnn_off=cudnn_off))
op = TestBatchNorm(eps=epsilon, fix_gamma=fix_gamma, momentum=momentum, **kwargs)
nch = shape[axis]
if not fix_gamma:
bn_gamma = np.random.uniform(size=(nch,))
bn_gamma.attach_grad(grad_req=gamma_grad_req)
else:
bn_gamma = np.ones((nch,))
bn_beta = np.random.uniform(size=(nch,))
bn_beta.attach_grad(grad_req=beta_grad_req)
bn_running_mean = np.zeros(nch)
bn_running_var = np.ones(nch)
running_mean = np.zeros(nch)
running_var = np.ones(nch)
num_iters = 10
expand_shape = [1] * len(shape)
expand_shape[axis] = shape[axis]
expand_shape = tuple(expand_shape)
data = np.random.uniform(size=shape)
data.attach_grad(grad_req=data_grad_req)
adX, adW, adb = 0, 0, 0
is_train = data_grad_req != 'null' or \
(not fix_gamma and gamma_grad_req != 'null') or \
beta_grad_req != 'null'
for _ in range(num_iters):
if data_grad_req != 'add':
data = np.random.uniform(size=shape)
data.attach_grad(grad_req=data_grad_req)
ograd = np.random.uniform(size=shape)
with mx.autograd.record():
output = op(data, bn_gamma, bn_beta,
bn_running_mean, bn_running_var)
if output_mean_var:
output, output_mean, output_std = output
if is_train:
output.backward(ograd)
mx.nd.waitall()
assert 0 <= axis < data.ndim
reduce_axis = tuple(i for i in range(data.ndim) if i != axis)
assert len(reduce_axis) == data.ndim - 1
data_mean = data.mean(
axis=reduce_axis, keepdims=True)
data_var = ((data - data_mean) ** 2).mean(axis=reduce_axis,
keepdims=True)
target_output = (data - data_mean) / \
np.sqrt(data_var + epsilon) * \
bn_gamma.reshape(expand_shape) + \
bn_beta.reshape(expand_shape)
# squeeze data_mean and data_var
data_mean_flat = data_mean.squeeze()
data_var_flat = data_var.squeeze()
running_mean = running_mean * momentum + \
data_mean_flat * (1 - momentum)
running_var = running_var * momentum + \
data_var_flat * (1 - momentum)
W = bn_gamma.reshape(expand_shape)
dnx = ograd * W
xsm = data - data_mean
nd = 1.0 / np.sqrt(data_var + epsilon)
nx = xsm * nd
m = _np.prod(shape) / shape[axis]
dvar = np.sum(dnx * xsm, axis=reduce_axis, keepdims=True,
) * (-0.5) * np.power(nd, 3)
dmean = -nd * np.sum(dnx, axis=reduce_axis, keepdims=True) - \
dvar * xsm.mean(axis=reduce_axis, keepdims=True,
) * 2.0
dX = dnx * nd + dvar * xsm * (2.0 / m) + dmean * (1.0 / m)
dW = np.sum(ograd * nx, axis=reduce_axis)
db = np.sum(ograd, axis=reduce_axis)
adX = dX if data_grad_req != 'add' else adX + dX
adW = dW if gamma_grad_req != 'add' else adW + dW
adb = db if beta_grad_req != 'add' else adb + db
atol, rtol = 5e-2, 5e-2
if output_mean_var:
assert_almost_equal(output_mean.asnumpy(),
data_mean_flat.asnumpy(),
atol=atol, rtol=rtol)
assert_almost_equal(output_std.asnumpy(),
(1.0 / np.sqrt(data_var_flat +
epsilon)).asnumpy(),
atol=atol, rtol=rtol)
assert_almost_equal(output.asnumpy(), target_output.asnumpy(),
atol=atol, rtol=rtol)
if is_train:
assert_almost_equal(bn_running_mean.asnumpy(
), running_mean.asnumpy(), atol=atol, rtol=rtol)
assert_almost_equal(bn_running_var.asnumpy(
), running_var.asnumpy(), atol=atol, rtol=rtol)
if data_grad_req != 'null':
assert_almost_equal(data.grad.asnumpy(),
adX.asnumpy(), atol=atol, rtol=rtol)
if not fix_gamma:
if gamma_grad_req != 'null':
assert_almost_equal(
bn_gamma.grad.asnumpy(), adW.asnumpy(),
atol=atol, rtol=rtol)
else:
assert((bn_gamma.asnumpy() == 1).all())
if beta_grad_req != 'null':
assert_almost_equal(
bn_beta.grad.asnumpy(), adb.asnumpy(), atol=atol, rtol=rtol)
shapes = [(24, 2), (24, 3, 4), (24, 8, 4, 5), (24, 5, 6, 4, 5)]
bools = [False, True]
for shape, fix_gamma, cudnn_off, output_mean_var in itertools.product(
shapes, bools, bools, bools):
grad_reqs = ['write'] if len(shape) != 4 else ['null', 'write', 'add']
for data_grad_req in grad_reqs:
for gamma_grad_req in grad_reqs:
if fix_gamma and gamma_grad_req != 'null':
continue
for beta_grad_req in grad_reqs:
for axis in range(len(shape)):
_test_batchnorm_impl(
shape, fix_gamma, cudnn_off, output_mean_var,
axis,
data_grad_req,
gamma_grad_req, beta_grad_req)
@with_seed()
@use_np
def test_npi_boolean_assign():
class TestBooleanAssignScalar(HybridBlock):
def __init__(self, val):
super(TestBooleanAssignScalar, self).__init__()
self._val = val
def hybrid_forward(self, F, a, mask):
return F.np._internal.boolean_mask_assign_scalar(a, mask, self._val, out=a)
class TestBooleanAssignTensor(HybridBlock):
def __init__(self):
super(TestBooleanAssignTensor, self).__init__()
def hybrid_forward(self, F, a, mask, value):
return F.np._internal.boolean_mask_assign_tensor(a, mask, value, out=a)
shapes = [(3, 4), (3, 0), ()]
for hybridize in [False]:
for shape in shapes:
test_data = np.random.uniform(size=shape)
mx_mask = np.around(np.random.uniform(size=shape))
valid_num = int(mx_mask.sum())
np_mask = mx_mask.asnumpy().astype(_np.bool)
for val in [42., np.array(42.), np.array([42.]), np.random.uniform(size=(valid_num,))]:
test_block = TestBooleanAssignScalar(val) if isinstance(val, float) else TestBooleanAssignTensor()
if hybridize:
test_block.hybridize()
np_data = test_data.asnumpy()
mx_data = test_data.copy()
np_data[np_mask] = val
mx_data = test_block(mx_data, mx_mask) if isinstance(val, float) else test_block(mx_data, mx_mask, val)
assert_almost_equal(mx_data.asnumpy(), np_data, rtol=1e-3, atol=1e-5, use_broadcast=False)
@with_seed()
@use_np
def test_np_reshape():
class TestReshape(HybridBlock):
def __init__(self, newshape):
super(TestReshape, self).__init__()
self._newshape = newshape
def hybrid_forward(self, F, a):
return F.np.reshape(a, self._newshape)
shape_pairs = [((2, 6), (6, 2)), ((2, 6), (3, 4)), ((1, 0), (0,)), ((0, 0), (0,)), ((), (1, 1, 1))]
for hybridize in [True, False]:
for shape_pair in shape_pairs:
shape1, shape2 = shape_pair
test_reshape = TestReshape(shape2)
if hybridize:
test_reshape.hybridize()
x = rand_ndarray(shape1).as_np_ndarray()
x.attach_grad()
np_out = _np.reshape(x.asnumpy(), shape2)
with mx.autograd.record():
mx_out = test_reshape(x)
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False)
mx_out.backward()
np_backward = _np.ones(shape1)
assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5, use_broadcast=False)
mx_out = np.reshape(x, shape2)
np_out = _np.reshape(x.asnumpy(), shape2)
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False)
@with_seed()
@use_np
def test_np_squeeze():
config = [((), None),
((), -1),
((), 0),
((4, 1, 2), None),
((1, 1, 1), None),
((1, 0, 1, 5), 2),
((1, 0, 1, 1), (-1, -4))]
class TestSqueeze(HybridBlock):
def __init__(self, axis):
super(TestSqueeze, self).__init__()
self._axis = axis
def hybrid_forward(self, F, x):
return F.np.squeeze(x, self._axis)
for shape, axis in config:
data_np = _np.random.uniform(size=shape)
data_mx = np.array(data_np, dtype=data_np.dtype)
ret_np = _np.squeeze(data_np, axis)
ret_mx = np.squeeze(data_mx, axis)
assert_almost_equal(ret_mx.asnumpy(), ret_np, rtol=1e-5, atol=1e-6, use_broadcast=False)
net = TestSqueeze(axis)
for hybrid in [False, True]:
if hybrid:
net.hybridize()
data_mx.attach_grad()
with mx.autograd.record():
ret_mx = net(data_mx)
assert_almost_equal(ret_mx.asnumpy(), ret_np, rtol=1e-5, atol=1e-6, use_broadcast=False)
ret_mx.backward()
assert_almost_equal(data_mx.grad.asnumpy(), _np.ones_like(data_np),
rtol=1e-5, atol=1e-6, use_broadcast=False)
@with_seed()
@use_np
def test_np_prod():
class TestProd(HybridBlock):
def __init__(self, axis=None, dtype=None, keepdims=False):
super(TestProd, self).__init__()
self._axis = axis
self._dtype = dtype
self._keepdims = keepdims
def hybrid_forward(self, F, a, *args, **kwargs):
return F.np.prod(a, axis=self._axis, dtype=self._dtype, keepdims=self._keepdims)
in_data_dim = random.choice([3, 4])
shape = rand_shape_nd(in_data_dim, dim=3)
for hybridize in [False, True]:
for keepdims in [True, False]:
for axis in ([i for i in range(in_data_dim)] + [(), None]):
for itype in ['float32', 'float64']:
for dtype in ['float32', 'float64']:
# test gluon
test_prod = TestProd(axis=axis, dtype=dtype, keepdims=keepdims)
if hybridize:
test_prod.hybridize()
x = np.array(_np.random.uniform(-2.0, 2.0, size=shape), dtype=itype)
x.attach_grad()
expected_ret = _np.prod(x.asnumpy(), axis=axis, keepdims=keepdims)
expected_ret = expected_ret.astype(dtype)
with mx.autograd.record():
y = test_prod(x)
assert y.shape == expected_ret.shape
assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3, atol=1e-5, use_broadcast=False)
y.backward()
# use keepdims=True so that broadcast divide can be used to calculate
# grad of input
expected_ret = _np.prod(x.asnumpy(), axis=axis, keepdims=True)
assert_almost_equal(x.grad.asnumpy(), expected_ret / x.asnumpy(), rtol=1e-3, atol=1e-3,
use_broadcast=False)
# test numeric
if itype == 'float32' and dtype == 'float32':
x_sym = mx.sym.Variable("x").as_np_ndarray()
mx_sym = mx.sym.np.prod(x_sym, axis=axis, dtype=dtype, keepdims=keepdims).as_nd_ndarray()
check_numeric_gradient(mx_sym, [x.as_nd_ndarray()],
numeric_eps=1e-3, rtol=1e-3, atol=1e-4, dtype=_np.float32)
# test imperative
mx_out = np.prod(x, axis=axis, dtype=dtype, keepdims=keepdims)
np_out = _np.prod(x.asnumpy(), axis=axis, keepdims=keepdims).astype(dtype)
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False)
@with_seed()
@use_np
def test_np_flatten():
class TestFlatten(HybridBlock):
def hybrid_forward(self, F, x):
return x.flatten()
shapes = [(), (2, 0, 1), (3, 4, 5), 6, (0,), (0, 0, 0)]
for shape in shapes:
for hybridize in [True, False]:
test_flatten = TestFlatten()
if hybridize:
test_flatten.hybridize()
a_np = _np.random.uniform(size=shape).astype('float32')
a_mx = np.array(a_np, dtype=a_np.dtype)
a_mx.attach_grad()
with mx.autograd.record():
ret = test_flatten(a_mx)
expected_ret = a_np.flatten()
assert_almost_equal(expected_ret, ret.asnumpy(), rtol=1e-5, atol=1e-6, use_broadcast=False)
# check gradient
ret.backward()
assert_almost_equal(a_mx.grad.asnumpy(), _np.ones_like(a_np), rtol=1e-5, atol=1e-6, use_broadcast=False)
@with_seed()
@use_np
def test_np_broadcast_to():
class TestBroadcastTo(HybridBlock):
def __init__(self, dst_shape):
super(TestBroadcastTo, self).__init__()
self._dst_shape = dst_shape
def hybrid_forward(self, F, x):
return F.np.broadcast_to(x, self._dst_shape)
shapes = [
((), (1, 2, 4, 5)),
((1,), (4, 5, 6)),
((1, 0), (2, 4, 0)),
((1, 1), (2, 4, 0)),
((4, 1), (1, 2, 3, 4, 5)),
((4, 1), (1, 0, 3, 4, 5))
]
for src_shape, dst_shape in shapes:
for hybridize in [True, False]:
test_broadcast_to = TestBroadcastTo(dst_shape)
if hybridize:
test_broadcast_to.hybridize()
a = _np.random.uniform(size=src_shape).astype(np.float32)
expected_ret = _np.broadcast_to(a, dst_shape)
a_mx = np.array(a, dtype=a.dtype)
a_mx.attach_grad()
with mx.autograd.record():
ret = test_broadcast_to(a_mx)
assert_almost_equal(ret.asnumpy(), expected_ret, rtol=1e-5, atol=1e-6, use_broadcast=False)
ret.backward()
expected_grad = collapse_sum_like(_np.ones_like(expected_ret), src_shape)
assert_almost_equal(a_mx.grad.asnumpy(), expected_grad, rtol=1e-5, atol=1e-6, use_broadcast=False)
@with_seed()
@use_np
def test_np_transpose():
def np_transpose_grad(out_shape, dtype, axes=None):
ograd = _np.ones(out_shape, dtype=dtype)
if axes is None or axes == ():
return _np.transpose(ograd, axes)
np_axes = _np.array(list(axes))
transpose_axes = _np.zeros_like(np_axes)
transpose_axes[np_axes] = _np.arange(len(np_axes))
return _np.transpose(ograd, tuple(list(transpose_axes)))
class TestTranspose(HybridBlock):
def __init__(self, axes=None):
super(TestTranspose, self).__init__()
self.axes = axes
def hybrid_forward(self, F, a):
return F.np.transpose(a, self.axes)
test_workloads = [[(), [(), None]],
[(2,), [(0,), None]],
[(0, 2), [(0, 1), (1, 0)]],
[(5, 10), [(0, 1), (1, 0), None]],
[(8, 2, 3), [(2, 0, 1), (0, 2, 1), (0, 1, 2), (2, 1, 0), (-1, 1, 0), None]],
[(8, 2, 16), [(0, 2, 1), (2, 0, 1), (0, 1, 2), (2, 1, 0), (-1, -2, -3)]],
[(8, 3, 4, 8), [(0, 2, 3, 1), (1, 2, 3, 0), (0, 3, 2, 1)]],
[(8, 3, 2, 3, 8), [(0, 1, 3, 2, 4), (0, 1, 2, 3, 4), (4, 0, 1, 2, 3)]],
[(3, 4, 3, 4, 3, 2), [(0, 1, 3, 2, 4, 5), (2, 3, 4, 1, 0, 5), None]]]
for hybridize in [True, False]:
for dtype in [_np.float32, _np.float16, _np.int32]:
for data_shape, axes_workload in test_workloads:
for axes in axes_workload:
for grad_req in ['write', 'add']:
test_trans = TestTranspose(axes)
if hybridize:
test_trans.hybridize()
x = np.random.normal(0, 1, data_shape).astype(dtype)
x = x.astype(dtype)
x.attach_grad(grad_req=grad_req)
if grad_req == 'add':
x.grad[()] = np.random.normal(0, 1, x.grad.shape).astype(x.grad.dtype)
x_grad_np = x.grad.asnumpy()
np_out = _np.transpose(x.asnumpy(), axes)
with mx.autograd.record():
mx_out = test_trans(x)
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False)
mx_out.backward()
np_backward = np_transpose_grad(np_out.shape, dtype, axes)
if grad_req == 'add':
assert_almost_equal(x.grad.asnumpy(), np_backward + x_grad_np,
rtol=1e-3, atol=1e-5, use_broadcast=False)
else:
assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5, use_broadcast=False)
mx_out = x.transpose(axes)
np_out = x.asnumpy().transpose(axes)
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False)
if isinstance(axes, (list, tuple)):
mx_out = x.transpose(*axes)
np_out = x.asnumpy().transpose(*axes)
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False)
# Test for error raising
dat = np.random.normal(0, 1, (3, 4, 5), dtype=np.float32)
assert_raises(MXNetError, lambda: dat.transpose((0, 0, 1)))
assert_raises(MXNetError, lambda: dat.transpose((0, 1, 3)))
@with_seed()
@use_np
def test_np_meshgrid():
nx, ny = (4, 5)
x = np.array(_np.linspace(0, 1, nx), dtype=np.float32)
y = np.array(_np.linspace(0, 1, ny), dtype=np.float32)
z = np.ones(())
xv, yv, zv = np.meshgrid(x, y, z)
xv_expected, yv_expected, zv_expected = _np.meshgrid(x.asnumpy(), y.asnumpy(), z.asnumpy())
assert same(xv.asnumpy(), xv_expected)
assert same(yv.asnumpy(), yv_expected)
assert same(zv.asnumpy(), zv_expected)
@with_seed()
@use_np
def test_np_broadcast_arrays():
shape_config = [
[(), (2, 1), (1, 3), (4, 1, 1), (5, 4, 2, 3)],
[(0,), (), (2, 1), (1, 0), (3, 2, 1)]
]
for shapes in shape_config:
arrays_np = [_np.random.randint(low=0, high=1000, size=shape, dtype=_np.int32) for shape in shapes]
arrays_mx = [np.array(arr, dtype=arr.dtype) for arr in arrays_np]
expected_rets = _np.broadcast_arrays(*arrays_np)
rets = np.broadcast_arrays(*arrays_mx)
for expected_ret, ret in zip(expected_rets, rets):
assert same(expected_ret, ret.asnumpy())
@with_seed()
@use_np
def test_np_tile():
config = [
((), ()),
((), 0),
((), (2, 0)),
((), (2, 3)),
((4, 2), (2,)),
((4, 2), (2, 3)),
((4, 2), (2, 1, 4)),
((4, 2), (2, 3, 4)),
((4, 2), (2, 0)),
((4, 2), (2, 0, 3)),
((4, 2), (2, 0, 3)),
((4, 0), (2, 0, 3)),
]
class TestTile(HybridBlock):
def __init__(self, reps):
super(TestTile, self).__init__()
self._reps = reps
def hybrid_forward(self, F, x):
return F.np.tile(x, reps=self._reps)
for shape, reps in config:
data_np = _np.random.randint(low=0, high=1000, size=shape)
data_mx = np.array(data_np, dtype=data_np.dtype)
ret_np = _np.tile(data_np, reps=reps)
ret_mx = np.tile(data_mx, reps=reps)
assert same(ret_mx.asnumpy(), ret_np)
net = TestTile(reps)
for hybrid in [False, True]:
if hybrid:
net.hybridize()
ret_mx = net(data_mx)
assert same(ret_mx.asnumpy(), ret_np)
@with_seed()
@use_np
def test_np_tril():
# numpy tril does not support scalar array (zero-dim)
config = [
((4, 2), 3),
((4, 2), 9),
((4, 2), 0),
((4, 2), -1),
((4, 5, 6), 0),
((4, 5, 6), 5),
((4, 5, 6), 2),
((4, 5, 6), -2),
((4, 5, 6), -5),
((4, 0), 0),
((4, 0), 2),
((4, 0), 4),
((4, 0), -3),
((4, 0, 5), 0),
((4, 0, 5), 1),
((4, 0, 5), 5),
((4, 0, 5), -3),
((3, ), 0),
((3, ), 2),
((3, ), 5)
]
class TestTril(HybridBlock):
def __init__(self, k):
super(TestTril, self).__init__()
self._k = k
def hybrid_forward(self, F, x):
return F.np.tril(x, k=self._k)
for prefix in [1, -1]:
for shape, k in config:
data_np = _np.random.uniform(size=shape)
data_mx = np.array(data_np, dtype=data_np.dtype)
data_mx.attach_grad()
ret_np = _np.tril(data_np, k*prefix)
with mx.autograd.record():
ret_mx = np.tril(data_mx, k*prefix)
assert same(ret_mx.asnumpy(), ret_np)
ret_mx.backward()
if len(shape) == 2:
grad_np = _np.tri(*shape, k=k*prefix)
assert same(data_mx.grad.asnumpy(), grad_np)
if len(shape) == 1:
grad_np = _np.tri(*shape, k=k*prefix)
grad_np = grad_np.sum(axis=0, keepdims=False)
assert same(data_mx.grad.asnumpy(), grad_np)
net = TestTril(k*prefix)
for hybrid in [False, True]:
if hybrid:
net.hybridize()
ret_mx = net(data_mx)
assert same(ret_mx.asnumpy(), ret_np)
@with_seed()
@use_np
def test_np_unary_funcs():
def check_unary_func(func, ref_grad, shape, low, high):
class TestUnary(HybridBlock):
def __init__(self, func):
super(TestUnary, self).__init__()
self._func = func
def hybrid_forward(self, F, a, *args, **kwargs):
return getattr(F.np, self._func)(a)
np_func = getattr(_np, func)
mx_func = TestUnary(func)
np_test_data = _np.random.uniform(low, high, shape).astype(_np.float32)
mx_test_data = mx.numpy.array(np_test_data)
for hybridize in [True, False]:
if hybridize:
mx_func.hybridize()
if ref_grad:
mx_test_data.attach_grad()
np_out = np_func(np_test_data)
with mx.autograd.record():
y = mx_func(mx_test_data)
assert y.shape == np_out.shape
assert_almost_equal(y.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
if np_out.dtype == np.bool_:
assert y.dtype == np.bool_
if ref_grad:
y.backward()
assert_almost_equal(mx_test_data.grad.asnumpy(), ref_grad(np_test_data), rtol=1e-1, atol=1e-2, equal_nan=True)
np_out = getattr(_np, func)(np_test_data)
mx_out = getattr(mx.np, func)(mx_test_data)
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
assertRaises(NotImplementedError, getattr(np, func), mx_test_data, where=False)
assertRaises(NotImplementedError, getattr(np, func), mx_test_data, subok=False)
assertRaises(NotImplementedError, getattr(np, func), mx_test_data, dtype=_np.int8)
assertRaises(TypeError, getattr(np, func), mx_test_data, dtype="abcdefg")
assertRaises(NotImplementedError, getattr(np, func), mx_test_data, casting='safe')
assertRaises(TypeError, getattr(np, func), mx_test_data, casting='mxnet')
assertRaises(NotImplementedError, getattr(np, func), mx_test_data, order='C')
assertRaises(NotImplementedError, getattr(np, func), mx_test_data, order='mxnet')
funcs = {
'absolute' : (lambda x: -1. * (x < 0) + (x > 0), -1.0, 1.0),
'cbrt' : (lambda x: 1. / (3. * _np.cbrt(x) ** 2), -1.0, 1.0),
'ceil' : (None, -10.0, 10.0),
'exp' : (lambda x: _np.exp(x), -1.0, 1.0),
'expm1' : (lambda x: _np.exp(x), -1.0, 1.0),
'fix' : (None, -10.0, 10.0),
'floor' : (None, -10.0, 10.0),
'log' : (lambda x: 1.0 / x, 0.1, 5.0),
'log10' : (lambda x: 1.0 / (x * _np.log(10)), 0.1, 10.0),
'log1p' : (lambda x: 1.0 / (1.0 + x), -0.9, 5.0),
'log2' : (lambda x: 1.0 / (x * _np.log(2)), 0.1, 2.0),
'logical_not' : (None, -1.0, 1.0),
'negative' : (lambda x: -1. * _np.ones(x.shape), -1.0, 1.0),
'reciprocal' : (lambda x: -1. / (x ** 2), 0.01, 1.0),
'rint' : (None, -5.0, 5.0),
'sign' : (None, -1.0, 1.0),
'sqrt' : (lambda x: 0.5 / _np.sqrt(x), 0.001, 10.0),
'square' : (lambda x: 2.0 * x, -1.0, 1.0),
'trunc' : (None, -5.0, 5.0),
'sin' : (lambda x: _np.cos(x), -1.0, 1.0),
'cos' : (lambda x: -_np.sin(x), -1.0, 1.0),
'tan' : (lambda x: _np.tan(x) ** 2 + 1.0, -1.0, 1.0),
'arcsin' : (lambda x: 1. / (1. - x ** 2) ** (1. / 2.), -1.0, 1.0),
'arccos' : (lambda x: -1. / (1. - x ** 2.) ** (1. / 2.), -1.0, 1.0),
'arctan' : (lambda x: 1. / (x ** 2. + 1.), -1.0, 1.0),
'degrees' : (lambda x: 180. / _np.pi * _np.ones(x.shape), -1.0, 1.0),
'radians' : (lambda x: _np.pi / 180. * _np.ones(x.shape), -1.0, 1.0),
'sinh' : (lambda x: _np.cosh(x), -1.0, 1.0),
'cosh' : (lambda x: _np.sinh(x), -1.0, 1.0),
'tanh' : (lambda x: 1. - _np.tanh(x) ** 2, -1.0, 1.0),
'arcsinh' : (lambda x: 1./(x**2 + 1.)**(1./2.), -1.0, 1.0),
'arccosh' : (lambda x: 1./(x**2 - 1.)**(1./2.), 2.0, 5.0),
'arctanh' : (lambda x: -1./(x**2 - 1.), -0.99, 0.99)
}
if has_tvm_ops():
funcs['rad2deg'] = (lambda x: 180. / _np.pi * _np.ones(x.shape), -1.0, 1.0)
funcs['deg2rad'] = (lambda x: _np.pi / 180. * _np.ones(x.shape), -1.0, 1.0)
ndim = random.choice([2, 3, 4])
shape = random.choice([rand_shape_nd(ndim, dim=3), (1, 0, 2)])
for shape in [rand_shape_nd(ndim, dim=3), (1, 0, 2)]:
for func, func_data in funcs.items():
ref_grad, low, high = func_data
check_unary_func(func, ref_grad, shape, low, high)
@with_seed()
@use_np
def test_np_binary_funcs():
def check_binary_func(func, lshape, rshape, low, high, lgrads, rgrads=None, alltypes=None):
class TestBinary(HybridBlock):
def __init__(self, func):
super(TestBinary, self).__init__()
self._func = func
def hybrid_forward(self, F, a, b, *args, **kwargs):
return getattr(F.np, self._func)(a, b)
np_func = getattr(_np, func)
mx_func = TestBinary(func)
alltypes = alltypes if alltypes else [[_np.float16, _np.float32, _np.float64]]
for dtypes, lgrad, rgrad in zip(alltypes, lgrads, rgrads if rgrads else lgrads):
for dtype in dtypes:
ldtype = rdtype = dtype
if isinstance(dtype, tuple):
assert len(dtype) == 2
ldtype, rdtype = dtype
np_test_x1 = _np.random.uniform(low, high, lshape).astype(ldtype)
np_test_x2 = _np.random.uniform(low, high, rshape).astype(rdtype)
mx_test_x1 = mx.numpy.array(np_test_x1, dtype=ldtype)
mx_test_x2 = mx.numpy.array(np_test_x2, dtype=rdtype)
for hybridize in [True, False]:
if hybridize:
mx_func.hybridize()
if lgrad:
mx_test_x1.attach_grad()
mx_test_x2.attach_grad()
np_out = np_func(np_test_x1, np_test_x2)
with mx.autograd.record():
y = mx_func(mx_test_x1, mx_test_x2)
assert y.shape == np_out.shape
assert_almost_equal(y.asnumpy(), np_out.astype(y.dtype), rtol=1e-3, atol=1e-5,
use_broadcast=False, equal_nan=True)
if lgrad:
y.backward()
assert_almost_equal(mx_test_x1.grad.asnumpy(),
collapse_sum_like(lgrad(y.asnumpy(), np_test_x1, np_test_x2), mx_test_x1.shape),
rtol=1e-1, atol=1e-2, equal_nan=True, use_broadcast=False)
if rgrads is None:
assert_almost_equal(mx_test_x2.grad.asnumpy(),
collapse_sum_like(rgrad(y.asnumpy(), np_test_x2, np_test_x1), mx_test_x2.shape),
rtol=1e-1, atol=1e-2, equal_nan=True, use_broadcast=False)
else:
assert_almost_equal(mx_test_x2.grad.asnumpy(),
collapse_sum_like(rgrad(y.asnumpy(), np_test_x1, np_test_x2), mx_test_x2.shape),
rtol=1e-1, atol=1e-2, equal_nan=True, use_broadcast=False)
np_out = getattr(_np, func)(np_test_x1, np_test_x2)
mx_out = getattr(mx.np, func)(mx_test_x1, mx_test_x2)
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out.astype(mx_out.dtype), rtol=1e-3, atol=1e-5,
use_broadcast=False, equal_nan=True)
assertRaises(NotImplementedError, getattr(np, func), mx_test_x1, mx_test_x2, where=False)
assertRaises(NotImplementedError, getattr(np, func), mx_test_x1, mx_test_x2, subok=False)
assertRaises(NotImplementedError, getattr(np, func), mx_test_x1, mx_test_x2, dtype=_np.int8)
assertRaises(TypeError, getattr(np, func), mx_test_x1, mx_test_x2, dtype="abcdefg")
assertRaises(NotImplementedError, getattr(np, func), mx_test_x1, mx_test_x2, casting='safe')
assertRaises(TypeError, getattr(np, func), mx_test_x1, mx_test_x2, casting='mxnet')
assertRaises(NotImplementedError, getattr(np, func), mx_test_x1, mx_test_x2, order='C')
assertRaises(NotImplementedError, getattr(np, func), mx_test_x1, mx_test_x2, order='mxnet')
funcs = {
'add': (-1.0, 1.0, [lambda y, x1, x2: _np.ones(y.shape)], None),
'subtract':
(-1.0, 1.0, [lambda y, x1, x2: _np.ones(y.shape)],
[lambda y, x1, x2: -_np.ones(y.shape)]),
'multiply': (-1.0, 1.0, [lambda y, x1, x2: _np.broadcast_to(x2, y.shape)],
[lambda y, x1, x2: _np.broadcast_to(x1, y.shape)]),
'divide': (0.1, 1.0, [lambda y, x1, x2: _np.ones(y.shape) / x2],
[lambda y, x1, x2: -x1 / (x2 * x2)]),
'mod': (1.0, 10.0,
[lambda y, x1, x2: _np.ones(y.shape),
lambda y, x1, x2: _np.zeros(y.shape)],
[lambda y, x1, x2: -_np.floor(x1 / x2),
lambda y, x1, x2: _np.zeros(y.shape)],
[[_np.float16, _np.float32, _np.float64], [_np.int32]]),
'remainder': (1.0, 10.0,
[lambda y, x1, x2: _np.ones(y.shape),
lambda y, x1, x2: _np.zeros(y.shape)],
[lambda y, x1, x2: -_np.floor(x1 / x2),
lambda y, x1, x2: _np.zeros(y.shape)],
[[_np.float16, _np.float32, _np.float64], [_np.int32]]),
'power': (1.0, 2.0, [lambda y, x1, x2: _np.power(x1, x2 - 1.0) * x2],
[lambda y, x1, x2: _np.power(x1, x2) * _np.log(x1)]),
'lcm': (-100, 100, [None], None, [[_np.int32]]),
'bitwise_xor': (-100, 100, [None], None, [[_np.int32]]),
'maximum': (-1, 1, [lambda y, x1, x2: _np.ones(y.shape) * (x1 >= x2)],
[lambda y, x1, x2: _np.ones(y.shape) * (x1 < x2)]),
'minimum': (-1, 1, [lambda y, x1, x2: _np.ones(y.shape) * (x1 <= x2)],
[lambda y, x1, x2: _np.ones(y.shape) * (x1 > x2)]),
'copysign': (-1, 1,
[lambda y, x1, x2: _np.ones(y.shape) * (((x1 * x2) >= 0).astype(_np.float32) - ((x1 * x2) < 0).astype(_np.float32))],
[lambda y, x1, x2: _np.zeros(y.shape)]),
'arctan2': (-1, 1, [lambda y, x1, x2: x2 / (_np.square(x1) + _np.square(x2))],
[lambda y, x1, x2: -x1 / (_np.square(x1) + _np.square(x2))]),
'hypot': (-1, 1, [lambda y, x1, x2: x1 / y],
[lambda y, x1, x2: x2 / y]),
'ldexp': (-3, 3, [None], None, [[_np.int32]]),
}
shape_pairs = [((3, 2), (3, 2)),
((3, 2), (3, 1)),
((3, 1), (3, 0)),
((0, 2), (1, 2)),
((2, 3, 4), (3, 1)),
((2, 3), ()),
((), (2, 3))]
for lshape, rshape in shape_pairs:
for func, func_data in funcs.items():
dtypes = None
assert (len(func_data) == 4 or len(func_data) == 5)
if len(func_data) is 4:
low, high, lgrads, rgrads = func_data
else:
low, high, lgrads, rgrads, dtypes = func_data
check_binary_func(func, lshape, rshape, low, high, lgrads, rgrads, dtypes)
@with_seed()
@use_np
def test_np_mixed_precision_binary_funcs():
itypes = [np.bool, np.int8, np.int32, np.int64]
ftypes = [np.float16, np.float32, np.float64]
def check_mixed_precision_binary_func(func, low, high, lshape, rshape, lgrad, rgrad, ltype, rtype):
class TestMixedBinary(HybridBlock):
def __init__(self, func):
super(TestMixedBinary, self).__init__()
self._func = func
def hybrid_forward(self, F, a, b, *args, **kwargs):
return getattr(F.np, self._func)(a, b)
np_func = getattr(_np, func)
mx_func = TestMixedBinary(func)
np_test_x1 = _np.random.uniform(low, high, lshape).astype(ltype)
np_test_x2 = _np.random.uniform(low, high, rshape).astype(rtype)
mx_test_x1 = mx.numpy.array(np_test_x1, dtype=ltype)
mx_test_x2 = mx.numpy.array(np_test_x2, dtype=rtype)
rtol = 1e-2 if ltype is np.float16 or rtype is np.float16 else 1e-3
atol = 1e-3 if ltype is np.float16 or rtype is np.float16 else 1e-5
for hybridize in [True, False]:
if hybridize:
mx_func.hybridize()
np_out = np_func(np_test_x1, np_test_x2)
with mx.autograd.record():
y = mx_func(mx_test_x1, mx_test_x2)
assert y.shape == np_out.shape
assert_almost_equal(y.asnumpy(), np_out.astype(y.dtype), rtol=rtol, atol=atol,
use_broadcast=False, equal_nan=True)
np_out = getattr(_np, func)(np_test_x1, np_test_x2)
mx_out = getattr(mx.np, func)(mx_test_x1, mx_test_x2)
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out.astype(mx_out.dtype), rtol=rtol, atol=atol,
use_broadcast=False, equal_nan=True)
funcs = {
'add': (-1.0, 1.0, None, None),
'subtract': (-1.0, 1.0, None, None),
'multiply': (-1.0, 1.0, lambda y, x1, x2: _np.broadcast_to(x2, y.shape),
lambda y, x1, x2: _np.broadcast_to(x1, y.shape))
}
shape_pairs = [((3, 2), (3, 2)),
((3, 2), (3, 1)),
((3, 0), (3, 0)),
((3, 1), (3, 0)),
((0, 2), (1, 2)),
((2, 3, 4), (3, 1)),
((2, 3), ()),
((), (2, 3))]
itypes = [np.bool, np.int8, np.int32, np.int64]
ftypes = [np.float16, np.float32, np.float64]
for func, func_data in funcs.items():
low, high, lgrad, rgrad = func_data
for lshape, rshape in shape_pairs:
for type1, type2 in itertools.product(itypes, ftypes):
check_mixed_precision_binary_func(func, low, high, lshape, rshape, lgrad, rgrad, type1, type2)
check_mixed_precision_binary_func(func, low, high, lshape, rshape, lgrad, rgrad, type2, type1)
for type1, type2 in itertools.product(ftypes, ftypes):
if type1 == type2:
continue
check_mixed_precision_binary_func(func, low, high, lshape, rshape, lgrad, rgrad, type1, type2)
@with_seed()
@use_np
def test_np_boolean_binary_funcs():
def check_boolean_binary_func(func, mx_x1, mx_x2):
class TestBooleanBinary(HybridBlock):
def __init__(self, func):
super(TestBooleanBinary, self).__init__()
self._func = func
def hybrid_forward(self, F, a, b, *args, **kwargs):
return getattr(F.np, self._func)(a, b)
np_x1 = mx_x1.asnumpy()
np_x2 = mx_x2.asnumpy()
np_func = getattr(_np, func)
mx_func = TestBooleanBinary(func)
for hybridize in [True, False]:
if hybridize:
mx_func.hybridize()
np_out = np_func(np_x1, np_x2)
with mx.autograd.record():
y = mx_func(mx_x1, mx_x2)
assert y.shape == np_out.shape
assert_almost_equal(y.asnumpy(), np_out.astype(y.dtype), rtol=1e-3, atol=1e-20,
use_broadcast=False, equal_nan=True)
np_out = getattr(_np, func)(np_x1, np_x2)
mx_out = getattr(mx.np, func)(mx_x1, mx_x2)
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out.astype(mx_out.dtype), rtol=1e-3, atol=1e-20,
use_broadcast=False, equal_nan=True)
funcs = [
'add',
'multiply',
'true_divide',
]
shape_pairs = [((3, 2), (3, 2)),
((3, 2), (3, 1)),
((3, 1), (3, 0)),
((0, 2), (1, 2)),
((2, 3, 4), (3, 1)),
((2, 3), ()),
((), (2, 3))]
for lshape, rshape in shape_pairs:
for func in funcs:
x1 = np.array(_np.random.uniform(size=lshape) > 0.5)
x2 = np.array(_np.random.uniform(size=rshape) > 0.5)
check_boolean_binary_func(func, x1, x2)
@with_seed()
@use_np
def test_npx_relu():
def np_relu(x):
return _np.maximum(x, 0.0)
def np_relu_grad(x):
return 1.0 * (x > 0.0)
class TestReLU(HybridBlock):
def __init__(self):
super(TestReLU, self).__init__()
def hybrid_forward(self, F, a):
return F.npx.relu(a)
shapes = [(), (2, 3, 4), (2, 0, 3), (1, 0, 0)]
for hybridize in [True, False]:
for shape in shapes:
test_relu = TestReLU()
if hybridize:
test_relu.hybridize()
x = rand_ndarray(shape).as_np_ndarray()
x.attach_grad()
np_out = np_relu(x.asnumpy())
with mx.autograd.record():
mx_out = test_relu(x)
assert mx_out.shape == np_out.shape
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
mx_out.backward()
np_backward = np_relu_grad(x.asnumpy())
assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5)
mx_out = npx.relu(x)
np_out = np_relu(x.asnumpy())
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
@with_seed()
@use_np
def test_npx_sigmoid():