blob: 72ba80164eb743ddd963fa83db097f6d823850ea [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from mxnet.test_utils import *
from mxnet.base import MXNetError
import pytest
from common import assertRaises
import random
import warnings
def is_scalar(var):
return False if hasattr(var, "__len__") else True
def get_result_type(call, dflt_stype):
"""Try to infer result storage type for a sparse matrix and a given unary operation"""
if call is not None and dflt_stype != 'default':
zero = np.zeros(([1]))
result = do_normalize(call(zero))
if not almost_equal(result, zero, equal_nan=True):
expected_result_type = 'default'
else:
if dflt_stype is not None:
expected_result_type = dflt_stype;
else:
expected_result_type = 'default'
else:
expected_result_type = 'default'
return expected_result_type
def get_result_type_with_scalar(call, dflt_stype):
"""Try to infer result storage type when operating a sparse matrices and a scalar"""
if call is not None and dflt_stype != 'default':
zero = np.zeros(([1]))
result = call(zero, 5)
if not almost_equal(result, zero, equal_nan=True):
expected_result_type = 'default'
else:
if dflt_stype is not None:
expected_result_type = dflt_stype;
else:
expected_result_type = 'default'
else:
expected_result_type = 'default'
return expected_result_type
def get_result_type_2(call, dflt_stype):
"""Try to infer result storage type when operating on two sparse matrices"""
if call is not None and dflt_stype != 'default':
zero = np.zeros(([1]))
need_default = False
for outer in [zero, np.ones(zero.shape)]:
for inner in [zero, np.ones(zero.shape)]:
result = do_normalize(call(outer, inner))
if not almost_equal(result, zero, equal_nan=True):
need_default = True
break
if need_default is True:
break
if not need_default and dflt_stype is not None:
expected_result_type = dflt_stype
else:
expected_result_type = 'default'
else:
expected_result_type = 'default'
return expected_result_type
def get_result_type_3(call, dflt_stype):
"""Try to infer result storage type when operating on three sparse matrices"""
if call is not None and dflt_stype != 'default':
zero = np.zeros(([1]))
need_default = False
for moon in [zero]:
for outer in [zero]:
for inner in [zero]:
res_1, res_2 = call(moon, outer, inner)
result = do_normalize(res_1)
if not almost_equal(result, zero, equal_nan=True):
need_default = True
break
result = do_normalize(res_2)
if not almost_equal(result, zero, equal_nan=True):
need_default = True
break
if need_default is True:
break
if need_default is True:
break
if not need_default and dflt_stype is not None:
expected_result_type = dflt_stype
else:
expected_result_type = 'default'
else:
expected_result_type = 'default'
return expected_result_type
def get_fw_bw_result_types(forward_numpy_call, fwd_res_dflt,
backward_numpy_call, bwd_res_dflt):
return (get_result_type(forward_numpy_call, fwd_res_dflt),
get_result_type(backward_numpy_call, bwd_res_dflt))
def get_fw_bw_result_types_2(forward_numpy_call, fwd_res_dflt,
backward_numpy_call, bwd_res_dflt):
return (get_result_type(forward_numpy_call, fwd_res_dflt),
get_result_type_2(backward_numpy_call, bwd_res_dflt))
def get_fw_bw_result_types_with_scalar(forward_numpy_call, fwd_res_dflt,
backward_numpy_call, bwd_res_dflt):
return (get_result_type_with_scalar(forward_numpy_call, fwd_res_dflt),
get_result_type_with_scalar(backward_numpy_call, bwd_res_dflt))
def gen_rsp_random_indices(shape, density=.5, force_indices=None):
assert density >= 0 and density <= 1
indices = set()
if force_indices is not None:
for val in force_indices:
indices.add(int(val))
if not np.isclose(density, .0, rtol=1.e-3, atol=1.e-3, equal_nan=True) and len(shape) > 0:
row_count = shape[0]
for i in range(row_count):
r = random.uniform(0, 1)
if r <= density and len(indices) < shape[0]:
indices.add(i)
assert len(indices) <= shape[0]
return list(indices)
def all_zero(var):
return 0
@pytest.mark.skip(reason="https://github.com/apache/incubator-mxnet/issues/18740")
def test_elemwise_binary_ops():
# skip testing on GPU because only CPU ops are implemented
if default_device().device_type is 'gpu':
return
def test_elemwise_binary_op(name, lhs_stype, rhs_stype, shape,
forward_mxnet_call, forward_numpy_call, backward_numpy_call,
lhs_grad_stype,
rhs_grad_stype,
expected_result_storage_type=None,
modifier_func=None,
lhs_density=.5,
rhs_density=.5,
force_lr_overlap=False,
force_grad_overlap=False,
ograd_density=0.0,
skip_gradient_check=False,
shuffle_csr_indices=True,
verbose=False):
if lhs_grad_stype is None:
lhs_grad_stype = lhs_stype
if rhs_grad_stype is None:
rhs_grad_stype = rhs_stype
lhs_grad_stype = get_result_type_3(backward_numpy_call, lhs_grad_stype)
rhs_grad_stype = get_result_type_3(backward_numpy_call, rhs_grad_stype)
if verbose is True:
print("testing: {} lhs={}, rhs={}, lhs_grad_stype={}, rhs_grad_stype={}"
.format(name, lhs_stype, rhs_stype, lhs_grad_stype, rhs_grad_stype))
# Output type should be same as lvalue type, unless otherwise specified
if expected_result_storage_type is None:
if lhs_stype == 'default' or rhs_stype == 'default':
expected_result_storage_type = 'default'
else:
expected_result_storage_type = lhs_stype
lhs = mx.symbol.Variable('lhs', stype=lhs_stype)
rhs = mx.symbol.Variable('rhs', stype=rhs_stype)
grad_stypes = dict()
grad_stypes['lhs'] = lhs_grad_stype
grad_stypes['rhs'] = rhs_grad_stype
if lhs_stype == 'default':
lhs_nd = rand_ndarray(shape, 'default')
if abs(lhs_density) < 1e-4:
func = all_zero
else:
func = modifier_func
lhs_nd = mx.nd.array(assign_each(lhs_nd.asnumpy(), func))
else:
lhs_nd = create_sparse_array_zd(
shape, lhs_stype, density=lhs_density,
modifier_func=modifier_func,
shuffle_csr_indices=shuffle_csr_indices,
rsp_indices=gen_rsp_random_indices(
shape,
density=lhs_density,
force_indices=[(shape[0]/2)] if force_lr_overlap is True else None
))
if rhs_stype == 'default':
rhs_nd = rand_ndarray(shape, 'default')
if abs(rhs_density) < 1e-4:
func = all_zero
else:
func = modifier_func
rhs_nd = mx.nd.array(assign_each(rhs_nd.asnumpy(), func))
else:
rhs_nd = create_sparse_array_zd(
shape, rhs_stype, density=rhs_density,
modifier_func=modifier_func,
shuffle_csr_indices=shuffle_csr_indices,
rsp_indices=gen_rsp_random_indices(
shape,
density=rhs_density,
force_indices=[(shape[0]/2)] if force_lr_overlap is True else None
))
lhs_np = lhs_nd.asnumpy()
rhs_np = rhs_nd.asnumpy()
if verbose is True:
print("lhs input: {}".format(lhs_np))
print("rhs input: {}".format(rhs_np))
out_np = forward_numpy_call(lhs_np, rhs_np)
if verbose is True:
print("out_np: {}".format(out_np))
test = forward_mxnet_call(lhs, rhs)
location = {'lhs': lhs_nd, 'rhs': rhs_nd}
outputs = check_symbolic_forward(test, location, [out_np], equal_nan=True)
assert len(outputs) == 1
assert outputs[0].stype == expected_result_storage_type
if verbose is True:
print ("mx forward output: ", outputs[0].asnumpy())
print ("lhs_nd: ", lhs_nd.stype)
print ("rhs_nd: ", rhs_nd.stype)
print ("forward output: ", outputs[0].stype)
if outputs[0].stype != 'default':
out_grad = create_sparse_array_zd(
shape, outputs[0].stype, density=ograd_density,
data_init=1,
modifier_func=lambda x: 2,
shuffle_csr_indices=shuffle_csr_indices,
rsp_indices=gen_rsp_random_indices(
shape,
density=ograd_density,
force_indices=[(shape[0]/2)] if force_grad_overlap is True else None
))
else:
if abs(ograd_density) < 1e-4:
out_grad = mx.nd.array(np.zeros(shape))
else:
out_grad = mx.nd.array(np.ones(shape))
out_grad_np = out_grad.asnumpy()
if verbose is True:
print("out_grad_np", out_grad_np)
ingrad_lhs_np, ingrad_rhs_np = backward_numpy_call(out_grad_np, lhs_np, rhs_np)
if verbose is True:
print("out_grad", out_grad.asnumpy())
print("ingrad_lhs_np", ingrad_lhs_np)
print("ingrad_rhs_np", ingrad_rhs_np)
igrads_result = check_symbolic_backward(test, location, [out_grad],
[ingrad_lhs_np, ingrad_rhs_np],
grad_stypes=grad_stypes,
equal_nan=True)
if verbose is True:
print("ingrad_lhs", igrads_result['lhs'].asnumpy())
print("ingrad_rhs", igrads_result['rhs'].asnumpy())
assert len(igrads_result) == 2
if lhs_grad_stype is not None:
assert igrads_result['lhs'].stype == lhs_grad_stype
if rhs_grad_stype is not None:
assert igrads_result['rhs'].stype == rhs_grad_stype
if not skip_gradient_check:
check_numeric_gradient(test, location,
grad_stype_dict=grad_stypes)
def check_all(l, r, check_function):
assert l.shape == r.shape
return check_function(l, r)
def gt(l, r):
return check_all(l, r, lambda a, b: a > b)
def ge(l, r):
return check_all(l, r, lambda a, b: a >= b)
def lt(l, r):
return check_all(l, r, lambda a, b: a < b)
def le(l, r):
return check_all(l, r, lambda a, b: a <= b)
def elemwise_mul_stype(lstype, rstype):
if lstype == rstype:
return lstype
elif lstype == 'default' and rstype == 'row_sparse':
return 'row_sparse'
elif lstype == 'row_sparse' and rstype == 'default':
return 'row_sparse'
else:
return 'default'
def elemwise_mul_lhs_grad_stype(lstype, rstype):
return elemwise_mul_stype(elemwise_mul_stype(lstype, rstype), rstype)
def elemwise_mul_rhs_grad_stype(lstype, rstype):
return elemwise_mul_stype(elemwise_mul_stype(lstype, rstype), lstype)
def check_elemwise_binary_ops(lhs_stype, rhs_stype, shape,
lhs_grad_stype=None, rhs_grad_stype=None,
lhs_density=.5, rhs_density=.5,
force_lr_overlap=False,
force_grad_overlap=False,
ograd_density=0.0):
test_elemwise_binary_op("elemwise_add", lhs_stype, rhs_stype, shape,
lambda l, r: mx.sym.sparse.elemwise_add(l, r),
lambda l, r: l + r,
lambda outg, l, r: (outg, outg),
lhs_grad_stype, rhs_grad_stype,
ograd_density=ograd_density,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
lhs_density=lhs_density, rhs_density=rhs_density,
verbose=False)
if ((lhs_stype is 'default' and rhs_stype is 'row_sparse') or
(lhs_stype is 'row_sparse' and rhs_stype is 'row_sparse') and (rhs_density == 0.0)):
test_elemwise_binary_op("elemwise_add", lhs_stype, rhs_stype, shape,
lambda l, r: mx.sym.sparse.elemwise_add(l, r, out=l),
lambda l, r: l + r,
lambda outg, l, r: (outg, outg),
lhs_grad_stype, rhs_grad_stype,
ograd_density=ograd_density,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
lhs_density=lhs_density, rhs_density=rhs_density,
verbose=False)
test_elemwise_binary_op("elemwise_sub", lhs_stype, rhs_stype, shape,
lambda l, r: mx.sym.sparse.elemwise_sub(l, r, out=l),
lambda l, r: l - r,
lambda outg, l, r: (outg, -outg),
lhs_grad_stype, rhs_grad_stype,
ograd_density=ograd_density,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
lhs_density=lhs_density, rhs_density=rhs_density,
verbose=False)
if ((lhs_stype is 'row_sparse' and rhs_stype is 'row_sparse') and (lhs_density == 0.0)):
test_elemwise_binary_op("elemwise_add", lhs_stype, rhs_stype, shape,
lambda l, r: mx.sym.sparse.elemwise_add(l, r, out=r),
lambda l, r: l + r,
lambda outg, l, r: (outg, outg),
lhs_grad_stype, rhs_grad_stype,
ograd_density=ograd_density,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
lhs_density=lhs_density, rhs_density=rhs_density,
verbose=False)
test_elemwise_binary_op("elemwise_sub", lhs_stype, rhs_stype, shape,
lambda l, r: mx.sym.sparse.elemwise_sub(l, r, out=l),
lambda l, r: l - r,
lambda outg, l, r: (outg, -outg),
lhs_grad_stype, rhs_grad_stype,
ograd_density=ograd_density,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
lhs_density=lhs_density, rhs_density=rhs_density,
verbose=False)
test_elemwise_binary_op("elemwise_sub", lhs_stype, rhs_stype, shape,
lambda l, r: mx.sym.sparse.elemwise_sub(l, r),
lambda l, r: l - r,
lambda outg, l, r: (outg, -outg),
lhs_grad_stype, rhs_grad_stype,
ograd_density=ograd_density,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
lhs_density=lhs_density,
rhs_density=rhs_density,
verbose=False)
test_elemwise_binary_op("elemwise_mul", lhs_stype, rhs_stype, shape,
lambda l, r: mx.sym.sparse.elemwise_mul(l, r),
lambda l, r: l * r,
lambda outg, l, r: (outg * r, outg * l),
elemwise_mul_lhs_grad_stype(lhs_stype, rhs_stype),
elemwise_mul_rhs_grad_stype(lhs_stype, rhs_stype),
expected_result_storage_type=elemwise_mul_stype(lhs_stype, rhs_stype),
ograd_density=ograd_density,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
lhs_density=lhs_density, rhs_density=rhs_density,
verbose=False)
test_elemwise_binary_op("elemwise_div", lhs_stype, rhs_stype, shape,
lambda l, r: mx.sym.sparse.elemwise_div(l, r),
lambda l, r: l / r,
lambda outg, l, r: (outg * (1/r), outg * (-l/(r*r))),
lhs_grad_stype, rhs_grad_stype,
modifier_func=lambda a: a if abs(a) > 0.25 else abs(a) + 1,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
lhs_density=lhs_density, rhs_density=rhs_density,
ograd_density=ograd_density,
expected_result_storage_type='default',
skip_gradient_check=True,
verbose=False)
test_elemwise_binary_op("maximum", lhs_stype, rhs_stype, shape,
lambda l, r: mx.sym._internal._maximum(l, r),
lambda l, r: np.maximum(l, r),
lambda outg, l, r: (outg * ge(l, r), outg * lt(l, r)),
lhs_grad_stype, rhs_grad_stype,
modifier_func=lambda a: a if abs(a) > 0.25 else abs(a) + 1,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
lhs_density=lhs_density, rhs_density=rhs_density,
skip_gradient_check=True,
ograd_density=ograd_density,
verbose=False)
test_elemwise_binary_op("minimum", lhs_stype, rhs_stype, shape,
lambda l, r: mx.sym._internal._minimum(l, r),
lambda l, r: np.minimum(l, r),
lambda outg, l, r: (outg * le(l, r), outg * gt(l, r)),
lhs_grad_stype, rhs_grad_stype,
modifier_func=lambda a: a if abs(a) > 0.25 else abs(a) + 1,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
lhs_density=lhs_density, rhs_density=rhs_density,
ograd_density=ograd_density,
skip_gradient_check=True,
verbose=False)
test_elemwise_binary_op("hypot", lhs_stype, rhs_stype, shape,
lambda l, r: mx.sym._internal._hypot(l, r),
lambda l, r: np.hypot(l, r),
lambda outg, l, r: (
outg * assign_each2(
l, r, lambda a, b: a/np.sqrt(a * a + b * b)),
outg * assign_each2(
l, r, lambda a, b: b/np.sqrt(a * a + b * b))
),
lhs_grad_stype, rhs_grad_stype,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
lhs_density=lhs_density, rhs_density=rhs_density,
ograd_density=ograd_density,
skip_gradient_check=True,
verbose=False)
# Run basic tests
with warnings.catch_warnings():
warnings.simplefilter("ignore")
for _ in range(1):
# Run defaults
check_elemwise_binary_ops('default', 'default', rand_shape_2d(5, 5))
# Try different densities
shape = rand_shape_2d(5, 5)
for lhs_density in [0.0, random.uniform(0, 1), 1.0]:
for rhs_density in [0.0, random.uniform(0, 1), 1.0]:
for ograd_density in [0.0, random.uniform(0, 1), 1.0]:
print("lhs_density={}, rhs_density={}, ograd_density={}, shape: {}".format(
lhs_density, rhs_density, ograd_density, shape))
# Try row_sparse overlaps
for force_lr_overlap in [False, True]:
for force_grad_overlap in [False, True]:
print(" force_lr_overlap={}, force_grad_overlap={}, shape={}".
format(force_lr_overlap, force_grad_overlap, shape))
# Back to left-right overlap possiblities
check_elemwise_binary_ops('row_sparse', 'row_sparse', shape,
lhs_grad_stype='row_sparse',
rhs_grad_stype='row_sparse',
lhs_density=lhs_density,
rhs_density=rhs_density,
force_lr_overlap=force_lr_overlap,
force_grad_overlap=force_grad_overlap,
ograd_density=ograd_density)
def test_elemwise_csr_same_zeros():
# Zeroes
a = mx.nd.sparse.zeros('csr', (1,1))
b = mx.nd.elemwise_add(a,a)
res = a.asnumpy() + a.asnumpy()
assert_almost_equal(b.asnumpy(), res)
def as_dense(arr):
if arr.stype != 'default':
return mx.nd.cast_storage(arr, stype='default')
else:
return arr;
# Make sure that 0's look like 0's when we do a comparison
def do_normalize(arr):
ret = arr.copy()
idx = np.isclose(arr, -0, rtol=1.e-3, atol=1.e-3, equal_nan=True)
ret[idx] = 0
return ret
def check_sparse_mathematical_core(name, stype,
forward_mxnet_call, forward_numpy_call, backward_numpy_call=None,
rhs_arg=None, data_init=9., grad_init=2., output_grad_stype=None,
input_grad_stype=None, force_overlap=False, density=.5,
ograd_density=.5, verbose=False, shuffle_csr_indices=True):
if verbose is True:
print("TESTING: " + name)
data = mx.symbol.Variable('data', stype=stype)
temp_input_grad_stype = input_grad_stype
if temp_input_grad_stype is None:
temp_input_grad_stype = stype
if rhs_arg is not None:
if is_scalar(rhs_arg):
expected_result_type, expected_grad_result_type = \
get_fw_bw_result_types_with_scalar(forward_numpy_call, stype,
backward_numpy_call, temp_input_grad_stype)
else:
expected_result_type, expected_grad_result_type = \
get_fw_bw_result_types_2(forward_numpy_call, stype,
backward_numpy_call, temp_input_grad_stype)
else:
expected_result_type, expected_grad_result_type = \
get_fw_bw_result_types(forward_numpy_call, stype,
backward_numpy_call, temp_input_grad_stype)
if input_grad_stype is not None and input_grad_stype != expected_grad_result_type:
print("{}: explicit override of deduced input grad type '{}' with '{}'".format(
name, expected_grad_result_type, input_grad_stype))
expected_grad_result_type = input_grad_stype
shape = rand_shape_2d()
if verbose is True:
print("Shape: ", shape, "density: ", density, "force_overlap", force_overlap)
if stype == 'default':
data_tmp = np.zeros(shape)
if abs(density) >= 1e-4:
data_tmp[:] = data_init
arr_data = mx.nd.array(data_tmp)
else:
arr_data = create_sparse_array_zd(
shape, stype, density=density,
data_init=data_init,
shuffle_csr_indices=shuffle_csr_indices,
rsp_indices=gen_rsp_random_indices(
shape,
density=density,
force_indices=[(shape[0]/2)] if force_overlap is True else None
)
)
data_tmp = arr_data.asnumpy()
if verbose is True:
print("arr_data indices", arr_data.indices.asnumpy())
if verbose is True:
print("input", data_tmp)
if backward_numpy_call is None:
arr_grad = None
elif expected_grad_result_type == 'default':
if abs(density) < 1e-4:
arr_grad = mx.nd.zeros(shape)
else:
arr_grad = mx.nd.ones(shape)
else:
arr_grad = create_sparse_array_zd(
shape,
expected_grad_result_type,
density=density,
data_init=1,
shuffle_csr_indices=shuffle_csr_indices,
rsp_indices=gen_rsp_random_indices(
shape,
density=density,
force_indices=[(shape[0]/2)] if force_overlap is True else None
)
)
if rhs_arg is not None:
test = forward_mxnet_call(data, rhs_arg)
else:
test = forward_mxnet_call(data)
args = list()
args.append(arr_data)
if arr_grad is not None:
exe_test = test._bind(default_device(), args=args, args_grad=[arr_grad])
else:
exe_test = test._bind(default_device(), args=args)
exe_test.forward(is_train=True)
assert exe_test.outputs[0].stype == expected_result_type
out = exe_test.outputs[0].asnumpy()
if rhs_arg is not None:
npout = forward_numpy_call(data_tmp, rhs_arg)
else:
npout = forward_numpy_call(data_tmp)
if verbose is True:
print("out", out)
print("npout", npout)
assert_almost_equal(out, npout, equal_nan=True)
if backward_numpy_call is not None:
if output_grad_stype == 'default' or output_grad_stype is None:
out_grad = mx.nd.empty(shape)
out_grad[:] = grad_init
else:
out_grad = create_sparse_array_zd(
shape, output_grad_stype,
density=density,
data_init=grad_init,
shuffle_csr_indices=shuffle_csr_indices,
rsp_indices=gen_rsp_random_indices(
shape,
density=ograd_density,
force_indices=[(shape[0]/2)] if force_overlap is True else None))
npout_grad = out_grad.asnumpy()
if verbose is True:
print("npout_grad", npout_grad)
if rhs_arg is not None:
temp = backward_numpy_call(data_tmp, rhs_arg)
else:
temp = backward_numpy_call(data_tmp)
input_grad = npout_grad * temp
if verbose is True:
print(arr_grad.asnumpy())
exe_test.backward(out_grad)
if verbose is True:
print(arr_grad.asnumpy())
assert arr_grad.stype == expected_grad_result_type
if verbose is True:
print(name)
print("arr_grad", arr_grad.asnumpy())
print("input_grad", input_grad)
assert_almost_equal(arr_grad, input_grad, equal_nan=True)
@pytest.mark.serial
@pytest.mark.skip(reason='https://github.com/apache/incubator-mxnet/issues/18829')
def test_sparse_mathematical_core():
def util_sign(a):
if np.isclose(a, -0, rtol=1.e-3, atol=1.e-3, equal_nan=True):
return 0
elif np.isclose(a, 0, rtol=1.e-3, atol=1.e-3, equal_nan=True):
return 0
elif a < 0.0:
return -1
else: # a > 0.0:
return 1
# Check scalar binary operators
def check_binary_op_with_scalar(stype,
output_grad_stype=None,
input_grad_stype=None,
density=.5, ograd_density=.5,
force_overlap=False,):
# mul_scalar
check_sparse_mathematical_core("mul_scalar", stype,
lambda x, y: x * y,
lambda x, y: x * y,
lambda input, rhs: rhs,
rhs_arg=5.0,
data_init=2, grad_init=3,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
density=density, ograd_density=ograd_density,
force_overlap=force_overlap,
verbose=False)
# plus_scalar
check_sparse_mathematical_core("plus_scalar", stype,
lambda x, y: x + y,
lambda x, y: x + y,
lambda input, rhs: 1,
rhs_arg=5.0,
data_init=2, grad_init=3,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
density=density, ograd_density=ograd_density,
force_overlap=force_overlap,
verbose=False)
# minus_scalar
check_sparse_mathematical_core("minus_scalar", stype,
lambda x, y: x - y,
lambda x, y: x - y,
lambda input, rhs: 1,
rhs_arg=5.0,
data_init=2, grad_init=3,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
density=density, ograd_density=ograd_density,
force_overlap=force_overlap,
verbose=False)
# Check many basic unary operators
def check_mathematical_core(stype, output_grad_stype=None,
input_grad_stype=None, force_overlap=False,
density=.5, ograd_density=.5):
# negative
check_sparse_mathematical_core("negative", stype,
lambda x: mx.sym.sparse.negative(x),
lambda x: np.negative(x),
force_overlap=force_overlap,
density=density,
input_grad_stype=input_grad_stype,
ograd_density=ograd_density)
# square
check_sparse_mathematical_core("square", stype,
lambda x: mx.sym.sparse.square(x),
lambda x: np.square(x),
lambda x: 2 * x,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density,
verbose=False)
# sqrt
check_sparse_mathematical_core("sqrt", stype,
lambda x: mx.sym.sparse.sqrt(x),
lambda x: np.sqrt(x),
lambda x: 1.0/(2.0 * np.sqrt(x)),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density,
verbose=False)
# cbrt
check_sparse_mathematical_core("cbrt", stype,
lambda x: mx.sym.sparse.cbrt(x),
lambda x: np.cbrt(x),
lambda x: 1.0/(3.0 * np.cbrt(x) * np.cbrt(x)),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density,
verbose=False)
# rint
check_sparse_mathematical_core("rint", stype,
lambda x: mx.sym.sparse.rint(x),
lambda x: np.rint(x),
force_overlap=force_overlap, density=density,
input_grad_stype=input_grad_stype,
ograd_density=ograd_density)
# fix
check_sparse_mathematical_core("fix", stype,
lambda x: mx.sym.sparse.fix(x),
lambda x: np.fix(x),
force_overlap=force_overlap, density=density,
input_grad_stype=input_grad_stype,
ograd_density=ograd_density)
# floor
check_sparse_mathematical_core("floor", stype, lambda x: mx.sym.sparse.floor(x),
lambda x: np.floor(x),
force_overlap=force_overlap,
input_grad_stype=input_grad_stype,
density=density, ograd_density=ograd_density)
# ceil
check_sparse_mathematical_core("ceil", stype,
lambda x: mx.sym.sparse.ceil(x),
lambda x: np.ceil(x),
force_overlap=force_overlap,
input_grad_stype=input_grad_stype,
density=density, ograd_density=ograd_density)
# round
check_sparse_mathematical_core("round", stype,
lambda x: mx.sym.sparse.round(x),
lambda x: np.round(x),
force_overlap=force_overlap,
input_grad_stype=input_grad_stype,
density=density, ograd_density=ograd_density)
# trunc
check_sparse_mathematical_core("trunc", stype,
lambda x: mx.sym.sparse.trunc(x),
lambda x: np.trunc(x),
force_overlap=force_overlap,
input_grad_stype=input_grad_stype,
density=density, ograd_density=ograd_density)
# sign
check_sparse_mathematical_core("sign", stype,
lambda x: mx.sym.sparse.sign(x),
lambda x: np.sign(x),
lambda x: np.zeros(x.shape),
output_grad_stype=output_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# log1p
check_sparse_mathematical_core("log1p", stype,
lambda x: mx.sym.sparse.log1p(x),
lambda x: np.log1p(x),
lambda x: 1. / (1.0 + x),
data_init=0.5, grad_init=0.5,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap, density=density,
ograd_density=ograd_density)
# expm1
check_sparse_mathematical_core("expm1", stype,
lambda x: mx.sym.sparse.expm1(x),
lambda x: np.expm1(x),
lambda x: np.exp(x),
data_init=0.5, grad_init=0.5,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap, density=density,
ograd_density=ograd_density)
# sin
check_sparse_mathematical_core("sin", stype,
lambda x: mx.sym.sparse.sin(x),
lambda x: np.sin(x),
lambda x: np.cos(x),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# tan
check_sparse_mathematical_core("tan", stype,
lambda x: mx.sym.sparse.tan(x),
lambda x: np.tan(x),
lambda x: np.tan(x) ** 2 + 1,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
density=density,
ograd_density=ograd_density)
# arcsin
check_sparse_mathematical_core("arcsin", stype,
lambda x: mx.sym.sparse.arcsin(x),
lambda x: np.arcsin(x),
lambda x: 1. / (1. - x ** 2) ** (1. / 2.),
data_init=0.5, grad_init=0.5,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# arctan
check_sparse_mathematical_core("arctan", stype,
lambda x: mx.sym.sparse.arctan(x),
lambda x: np.arctan(x),
lambda x: 1. / (x ** 2. + 1.),
data_init=0.5, grad_init=0.5,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# degrees
check_sparse_mathematical_core("degrees", stype,
lambda x: mx.sym.sparse.degrees(x),
lambda x: np.degrees(x),
lambda x: assign_each(x, lambda a: 180./np.pi),
data_init=0.5, grad_init=0.5,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# radians
check_sparse_mathematical_core("radians", stype,
lambda x: mx.sym.sparse.radians(x),
lambda x: np.radians(x),
lambda x: assign_each(x, lambda a: np.pi / 180.),
data_init=0.6, grad_init=1,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# sinh
check_sparse_mathematical_core("sinh", stype,
lambda x: mx.sym.sparse.sinh(x),
lambda x: np.sinh(x),
lambda x: np.cosh(x),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# tanh
check_sparse_mathematical_core("tanh", stype,
lambda x: mx.sym.sparse.tanh(x),
lambda x: np.tanh(x),
lambda x: 1. - np.tanh(x) ** 2,
data_init=0.5, grad_init=1,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap, density=density,
ograd_density=ograd_density)
# arcsinh
check_sparse_mathematical_core("arcsinh", stype,
lambda x: mx.sym.sparse.arcsinh(x),
lambda x: np.arcsinh(x),
lambda x: 1./(x**2 + 1.)**(1./2.),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap, density=density,
ograd_density=ograd_density)
# arctanh
check_sparse_mathematical_core("arctanh", stype,
lambda x: mx.sym.sparse.arctanh(x),
lambda x: np.arctanh(x),
lambda x: -1./(x**2 - 1.),
data_init=0.5,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap, density=density,
ograd_density=ograd_density)
# abs
check_sparse_mathematical_core("abs", stype,
lambda x: mx.sym.sparse.abs(x),
lambda x: np.abs(x),
lambda x: assign_each(x, function=util_sign),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
if stype != "csr":
# rsqrt
check_sparse_mathematical_core("rsqrt", stype,
lambda x: mx.sym.sparse.rsqrt(x),
lambda x: 1 / np.sqrt(x),
lambda x: -(1.0 / (2.0 * x * np.sqrt(x))),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# cos
check_sparse_mathematical_core("cos", stype,
lambda x: mx.sym.sparse.cos(x),
lambda x: np.cos(x),
lambda x: -np.sin(x),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# arccos
check_sparse_mathematical_core("arccos", stype,
lambda x: mx.sym.sparse.arccos(x),
lambda x: np.arccos(x),
lambda x: -1. / (1. - x ** 2.) ** (1. / 2.),
data_init=0.5, grad_init=0.5,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap, density=density,
ograd_density=ograd_density)
# cosh
check_sparse_mathematical_core("cosh", stype,
lambda x: mx.sym.sparse.cosh(x),
lambda x: np.cosh(x),
lambda x: np.sinh(x),
data_init=5, grad_init=5,
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# arccosh
check_sparse_mathematical_core("arccosh", stype,
lambda x: mx.sym.sparse.arccosh(x),
lambda x: np.arccosh(x),
lambda x: 1./(x**2 - 1.)**(1./2.),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap, density=density,
ograd_density=ograd_density)
# log10
check_sparse_mathematical_core("log10", stype,
lambda x: mx.sym.sparse.log10(x),
lambda x: np.log10(x),
lambda x: 1. / (x * np.log(10.)),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap, density=density,
ograd_density=ograd_density)
# log2
check_sparse_mathematical_core("log2", stype,
lambda x: mx.sym.sparse.log2(x),
lambda x: np.log2(x),
lambda x: 1. / (x * np.log(2.)),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap, density=density,
ograd_density=ograd_density)
try:
from scipy import special as scipy_special
# On scipy v1.0, psi([0, -1, -2, -3, ...]) = [ inf, inf, inf, inf, ...]
# On scipy v1.1, psi([0, -1, -2, -3, ...]) = [-inf, nan, nan, nan, ...]
# Map the behavior of v1.1 psi() to that of v1.0 for ints <= 0 for consistency
scipy_psi = np.vectorize(lambda x: np.inf if float(x).is_integer() and x <= 0 else
scipy_special.psi(x))
# gamma
check_sparse_mathematical_core("gamma", stype,
lambda x: mx.sym.sparse.gamma(x),
lambda x: scipy_special.gamma(x),
lambda x: scipy_special.gamma(x) * scipy_psi(x),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# gammaln
check_sparse_mathematical_core("gammaln", stype,
lambda x: mx.sym.sparse.gammaln(x),
lambda x: scipy_special.gammaln(x),
lambda x: scipy_psi(x),
output_grad_stype=output_grad_stype,
input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
except ImportError:
print("Could not import scipy. Skipping unit tests for special functions")
for i in range(1):
print("pass", i)
for density in [0.0, random.uniform(0, 1), 1.0]:
for ograd_density in [0.0, random.uniform(0, 1), 1.0]:
for force_overlap in [False, True]:
print("{}, {}, {}".format(density, ograd_density, force_overlap))
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# Check unary ops (unary fwd, binary bwd)
check_mathematical_core('default', force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
check_mathematical_core('row_sparse', force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
check_mathematical_core('row_sparse', output_grad_stype='default',
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
check_mathematical_core('row_sparse', output_grad_stype='row_sparse',
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
check_mathematical_core('csr', output_grad_stype='default',
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
check_mathematical_core('csr', output_grad_stype='csr',
force_overlap=force_overlap,
density=density, ograd_density=ograd_density)
# Check binary with scalar ops
check_binary_op_with_scalar('default',
density=density,
ograd_density=ograd_density,
force_overlap=force_overlap)
check_binary_op_with_scalar('row_sparse',
density=density,
ograd_density=ograd_density,
force_overlap=force_overlap)
check_binary_op_with_scalar('row_sparse', output_grad_stype='default',
density=density,
ograd_density=ograd_density,
force_overlap=force_overlap)
check_binary_op_with_scalar('row_sparse',
output_grad_stype='row_sparse',
density=density, ograd_density=ograd_density,
force_overlap=force_overlap)
check_binary_op_with_scalar('csr',
output_grad_stype='csr',
input_grad_stype='default',
density=density,
ograd_density=ograd_density,
force_overlap=force_overlap)
check_binary_op_with_scalar('csr',
output_grad_stype='csr',
input_grad_stype='csr',
density=density,
ograd_density=ograd_density,
force_overlap=force_overlap)
check_binary_op_with_scalar('csr',
output_grad_stype='default',
density=density,
ograd_density=ograd_density,
force_overlap=force_overlap)
@pytest.mark.serial
def test_elemwise_add_ex():
def check_elemwise_add_ex(lhs_stype, rhs_stype, shape, lhs_grad_stype=None, rhs_grad_stype=None):
lhs = mx.symbol.Variable('lhs', stype=lhs_stype)
rhs = mx.symbol.Variable('rhs', stype=rhs_stype)
lhs_nd = rand_ndarray(shape, lhs_stype)
rhs_nd = rand_ndarray(shape, rhs_stype)
lhs_np = lhs_nd.asnumpy()
rhs_np = rhs_nd.asnumpy()
out_np = lhs_np + rhs_np
test = mx.symbol.sparse.elemwise_add(lhs, rhs)
location = {'lhs': lhs_nd, 'rhs': rhs_nd}
check_symbolic_forward(test, location, [out_np])
check_numeric_gradient(test, location)
grad_stypes = {}
if lhs_grad_stype is not None and lhs_grad_stype != 'default':
grad_stypes['lhs'] = lhs_grad_stype
if rhs_grad_stype is not None and rhs_grad_stype != 'default':
grad_stypes['rhs'] = rhs_grad_stype
check_symbolic_backward(test, location, [out_np], [out_np, out_np],
grad_stypes=grad_stypes)
shapes = [rand_shape_2d(), rand_shape_3d()]
for shape in shapes:
check_elemwise_add_ex('default', 'default', shape)
check_elemwise_add_ex('row_sparse', 'row_sparse', shape,
lhs_grad_stype='row_sparse', rhs_grad_stype='row_sparse')
@pytest.mark.serial
def test_cast_storage_ex():
def check_cast_storage(shape, density, from_stype, to_stype, check_numeric_grad=True):
x = mx.symbol.Variable('x', stype=from_stype)
x_nd = rand_ndarray(shape, from_stype, density=density)
x_np = x_nd.asnumpy()
out_np = x_np
test = mx.symbol.cast_storage(x, stype=to_stype)
location = {'x': x_nd}
check_symbolic_forward(test, location, [out_np])
# consider disable the numeric grad check for gpu block kernel since the input is large
if check_numeric_grad:
check_numeric_gradient(test, location)
grad_stypes = {'x': to_stype}
check_symbolic_backward(test, location, [out_np], [out_np], grad_stypes=grad_stypes)
density = [1.00, 0.50, 0.01]
for d in density:
shape_2d = rand_shape_2d()
shape_3d = rand_shape_3d()
check_cast_storage(shape_2d, d, 'csr', 'default')
check_cast_storage(shape_2d, d, 'default', 'csr')
check_cast_storage(shape_2d, d, 'csr', 'csr')
check_cast_storage(shape_2d, d, 'row_sparse', 'default')
check_cast_storage(shape_2d, d, 'default', 'row_sparse')
check_cast_storage(shape_2d, d, 'row_sparse', 'row_sparse')
check_cast_storage(shape_3d, d, 'row_sparse', 'default')
check_cast_storage(shape_3d, d, 'default', 'row_sparse')
check_cast_storage(shape_3d, d, 'row_sparse', 'row_sparse')
for i in range(4, 6):
shape = rand_shape_nd(i, 5)
check_cast_storage(shape, d, 'default', 'row_sparse')
check_cast_storage(shape, d, 'row_sparse', 'default')
# Test specific gpu kernels
if default_device().device_type is 'gpu':
dim0 = rnd.randint(1, 10)
# test gpu thread kernel
check_cast_storage((dim0, rnd.randint( 1, 32)), d, 'default', 'csr')
# test gpu warp kernel
check_cast_storage((dim0, rnd.randint( 32, 512)), d, 'default', 'csr')
# test gpu block kernel
check_cast_storage((dim0, rnd.randint(512, 1024)), d, 'default', 'csr',
check_numeric_grad=False)
# check race condition in block kernel
check_cast_storage((200, 128 * 2 + 1), d, 'default', 'csr',
check_numeric_grad=False)
# test gpu thread kernel
check_cast_storage((dim0, rnd.randint( 1, 32)), d, 'default', 'row_sparse')
# test gpu warp kernel
check_cast_storage((dim0, rnd.randint( 32, 512)), d, 'default', 'row_sparse')
# test gpu block kernel
check_cast_storage((dim0, rnd.randint(512, 1024)), d, 'default', 'row_sparse',
check_numeric_grad=False)
@pytest.mark.serial
def test_sparse_dot():
def test_infer_forward_stype(lhs_shape, rhs_shape, lhs_density, rhs_density, trans_a, trans_b):
all_stypes = ["default", "csr", "row_sparse"]
lhs_nd = rand_ndarray(lhs_shape, 'default', density=lhs_density)
rhs_nd = rand_ndarray(rhs_shape, 'default', density=rhs_density)
out_nd = mx.nd.dot(lhs_nd, rhs_nd, transpose_a=trans_a, transpose_b=trans_b)
out_np = out_nd.asnumpy()
for lhs_stype in all_stypes:
for rhs_stype in all_stypes:
for forward_stype in all_stypes:
lhs = lhs_nd.tostype(lhs_stype)
rhs = rhs_nd.tostype(rhs_stype)
out = mx.nd.dot(lhs, rhs, forward_stype=forward_stype,
transpose_a=trans_a, transpose_b=trans_b)
assert_almost_equal(out.tostype('default').asnumpy(), out_np, rtol=1e-3, atol=1e-4)
lhs_var = mx.symbol.Variable('lhs', stype=lhs_stype)
rhs_var = mx.symbol.Variable('rhs', stype=rhs_stype)
out = mx.symbol.sparse.dot(lhs_var, rhs_var,
forward_stype=forward_stype,
transpose_a=trans_a, transpose_b=trans_b)
location = {'lhs': lhs, 'rhs': rhs}
check_symbolic_forward(out, location, [out_np], rtol=1e-3, atol=1e-4)
def test_dot_csr(lhs_shape, rhs_shape, rhs_stype, trans_lhs, lhs_density, rhs_density):
lhs_nd = rand_ndarray(lhs_shape, 'csr', density=lhs_density, shuffle_csr_indices=False)
lhs_dns = lhs_nd.tostype('default')
rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_density)
rhs_dns = rhs_nd if rhs_stype == 'default' else rhs_nd.tostype('default')
out = mx.nd.dot(lhs_nd, rhs_nd, transpose_a=trans_lhs)
out_dns = mx.nd.dot(lhs_dns, rhs_dns, transpose_a=trans_lhs)
out_np = out_dns.asnumpy()
assert_almost_equal(out.asnumpy(), out_np, rtol=1e-3, atol=1e-5)
# test symbolic forward
lhs = mx.symbol.Variable('lhs', stype='csr')
rhs = mx.symbol.Variable('rhs', stype=rhs_stype)
out = mx.symbol.sparse.dot(lhs, rhs, transpose_a=trans_lhs)
location = {'lhs': lhs_nd, 'rhs': rhs_nd}
check_symbolic_forward(out, location, [out_np], rtol=1e-3, atol=1e-4)
# test symbolic backward
backward_trans = not trans_lhs
rhs_backward_grad = mx.nd.dot(lhs_dns, out_dns, transpose_a=backward_trans).asnumpy()
expected = {'rhs': rhs_backward_grad}
check_symbolic_backward(out, location, [out_np], expected,
grad_req={'lhs': 'null', 'rhs': 'write'},
rtol=1e-3, atol=1e-4)
def test_dot_dns_csr(lhs_shape, rhs_shape, lhs_density, rhs_density, trans_lhs=False, trans_rhs=False):
lhs_nd = rand_ndarray(lhs_shape, stype='default', density=lhs_density)
rhs_nd = rand_ndarray(rhs_shape, stype='csr', density=rhs_density)
rhs_dns = rhs_nd.tostype('default')
if default_device() == mx.cpu():
forward_stype = 'csr'
else:
forward_stype = 'default'
out = mx.nd.sparse.dot(lhs_nd, rhs_nd, transpose_a=trans_lhs, transpose_b=trans_rhs, forward_stype=forward_stype)
out_dns = mx.nd.dot(lhs_nd, rhs_dns, transpose_a=trans_lhs, transpose_b=trans_rhs, forward_stype=forward_stype)
out_np = out_dns.asnumpy()
assert_almost_equal(out.asnumpy(), out_np, rtol=1e-3, atol=1e-5)
# test symbolic forward
lhs = mx.symbol.Variable('lhs', stype='default')
rhs = mx.symbol.Variable('rhs', stype='csr')
out = mx.symbol.sparse.dot(lhs, rhs, transpose_a=trans_lhs, transpose_b=trans_rhs, forward_stype=forward_stype)
location = {'lhs': lhs_nd, 'rhs': rhs_nd}
check_symbolic_forward(out, location, [out_np], rtol=1e-3, atol=1e-4)
if default_device() == mx.cpu():
# test symbolic backward
backward_trans = not trans_lhs
rhs_backward_grad = mx.nd.dot(lhs_nd, out_dns, transpose_a=backward_trans).asnumpy()
if trans_rhs is True:
rhs_backward_grad = rhs_backward_grad.T
expected = {'rhs': rhs_backward_grad}
check_symbolic_backward(out, location, [out_np], expected,
grad_req={'lhs': 'null', 'rhs': 'write'},
rtol=1e-3, atol=1e-4)
else:
transpose_b = not trans_rhs
lhs_backward_grad = mx.nd.dot(out_dns, rhs_dns, transpose_b=transpose_b)
expected = {'lhs': lhs_backward_grad.asnumpy()}
check_symbolic_backward(out, location, [out_np], expected,
grad_req={'lhs': 'write', 'rhs': 'null'},
rtol=1e-3, atol=1e-4)
def test_sparse_dot_zero_output(lhs_shape, trans_lhs, rhs_num_cols):
"""Test for nnr_out = 0. Before the fix, the test would fail."""
lhs = mx.nd.zeros(lhs_shape)
irow = np.random.randint(0, lhs_shape[0])
icol = np.random.randint(0, lhs_shape[1])
lhs[irow, icol] = 1.0
if trans_lhs:
rhs = rand_ndarray(shape=(lhs_shape[0], rhs_num_cols), stype='default')
rhs[irow, :] = 0
else:
rhs = rand_ndarray(shape=(lhs_shape[1], rhs_num_cols), stype='default')
rhs[icol, :] = 0
dns_out = mx.nd.dot(lhs, rhs, transpose_a=trans_lhs)
assert mx.nd.sum(mx.nd.abs(dns_out)).asscalar() == 0
sps_out = mx.nd.sparse.dot(lhs.tostype('csr'), rhs.tostype('row_sparse'), transpose_a=trans_lhs)
assert same(dns_out.asnumpy(), sps_out.asnumpy())
density = [1.00, 0.5, 0.01]
for lhs_d in density:
lhs_shape = rand_shape_2d(50, 200)
rhs_d = 1
test_dot_csr(lhs_shape, (lhs_shape[1], 1), 'default', False, lhs_d, rhs_d) # test gpu SpMV
test_dot_csr(lhs_shape, (lhs_shape[0], 1), 'default', True, lhs_d, rhs_d) # (vector kernel)
test_dot_csr(lhs_shape, (lhs_shape[1], rnd.randint(5, 10)), 'default', False, lhs_d, rhs_d) # test gpu SpMM
test_dot_csr(lhs_shape, (lhs_shape[0], rnd.randint(5, 10)), 'default', True, lhs_d, rhs_d) # (scalar kernel)
test_dot_dns_csr(lhs_shape, (lhs_shape[1], rnd.randint(50, 200)), lhs_d, lhs_d)
test_dot_dns_csr(lhs_shape, (rnd.randint(50, 200), lhs_shape[1]), lhs_d, lhs_d, trans_rhs=True)
for rhs_d in density:
test_dot_csr(lhs_shape, (lhs_shape[1], rnd.randint(1, 10)), 'row_sparse', False, lhs_d, rhs_d)
test_dot_csr(lhs_shape, (lhs_shape[0], rnd.randint(1, 10)), 'row_sparse', True, lhs_d, rhs_d)
test_infer_forward_stype(lhs_shape, (lhs_shape[1], rnd.randint(10, 20)),
lhs_d, rhs_d, False, False)
test_infer_forward_stype(lhs_shape, (rnd.randint(10, 20), lhs_shape[1]),
lhs_d, rhs_d, False, True)
test_infer_forward_stype(lhs_shape, (lhs_shape[0], rnd.randint(10, 20)),
lhs_d, rhs_d, True, False)
test_infer_forward_stype(lhs_shape, (rnd.randint(10, 20), lhs_shape[0]),
lhs_d, rhs_d, True, True)
test_sparse_dot_zero_output(rand_shape_2d(50, 200), False, 40)
test_sparse_dot_zero_output(rand_shape_2d(50, 200), True, 40)
@pytest.mark.serial
def test_sparse_dot_determinism():
def check_dot_determinism(lhs_stype, rhs_stype, lhs_density, rhs_density, transpose_a, transpose_b, forward_stype):
lhs_row = rnd.randint(50, 100)
lhs_col = rnd.randint(50, 100)
if transpose_a:
if transpose_b:
rhs_shape = (rnd.randint(50, 100), lhs_row)
else:
rhs_shape = (lhs_row, rnd.randint(50, 100))
else:
if transpose_b:
rhs_shape = (rnd.randint(50, 100), lhs_col)
else:
rhs_shape = (lhs_col, rnd.randint(50, 100))
lhs_shape = (lhs_row, lhs_col)
lhs = rand_ndarray(lhs_shape, lhs_stype, density=lhs_density)
rhs = rand_ndarray(rhs_shape, rhs_stype, density=rhs_density)
res1 = mx.nd.sparse.dot(lhs, rhs, transpose_a=transpose_a, transpose_b=transpose_b, forward_stype=forward_stype)
res2 = mx.nd.sparse.dot(lhs, rhs, transpose_a=transpose_a, transpose_b=transpose_b, forward_stype=forward_stype)
assert_almost_equal(res1.asnumpy(), res2.asnumpy(), rtol=0.0, atol=0.0)
check_dot_determinism('csr', 'default', 0.1, 1.0, True, False, 'row_sparse')
forward_stype = 'csr' if default_device() == mx.cpu() else 'default'
check_dot_determinism('default', 'csr', 1.0, 0.1, False, False, forward_stype)
check_dot_determinism('default', 'csr', 1.0, 0.1, False, True, forward_stype)
check_dot_determinism('csr', 'default', 0.1, 1.0, True, False, 'default')
def test_sparse_slice():
def check_csr_slice(shape, slice_input):
storage_type = 'csr'
B, _ = rand_sparse_ndarray(shape, storage_type)
np = B.asnumpy()
begin = rnd.randint(0, B.shape[0] - 1)
end = rnd.randint(begin + 1, B.shape[0])
nd_slice = mx.nd.crop(B, begin=begin, end=end)
assert same(nd_slice.asnumpy(), np[begin:end]), (nd_slice.asnumpy(), np[begin:end])
shape = (rnd.randint(7, 15), rnd.randint(1, 10))
check_csr_slice(shape, True)
check_csr_slice(shape, False)
@pytest.mark.serial
def test_sparse_retain():
def check_sparse_retain(shape, density, index_type=np.int64):
num_rows = shape[0]
rsp, _ = rand_sparse_ndarray(shape=shape, stype='row_sparse', density=density)
length = np.random.randint(1, num_rows + 1)
idx = random_sample(list(range(0, num_rows)), length)
idx.sort()
dns = rsp.asnumpy()
tensor_retained_expected = np.zeros(shape)
for i in idx:
tensor_retained_expected[i][:] = dns[i]
indices = mx.nd.array(idx, dtype=index_type)
rsp_retained = mx.nd.sparse.retain(rsp, indices=indices)
assert same(tensor_retained_expected, rsp_retained.asnumpy())
# check numeric gradient
data = mx.symbol.Variable('data')
idx = mx.symbol.Variable('indices')
sym = mx.sym.sparse.retain(data=data, indices=idx)
check_numeric_gradient(sym, [rsp, indices], grad_nodes=['data'],
grad_stype_dict={'data': 'row_sparse'})
shape = rand_shape_2d()
shape_3d = rand_shape_3d()
densities = [0.01, 0.5, 1.0]
index_types = [np.float32, np.int32, np.int64]
for density in densities:
for itype in index_types:
check_sparse_retain(shape, density, itype)
check_sparse_retain(shape_3d, density, itype)
def test_sparse_unary_with_numerics():
def check_sparse_simple(name, stype, mxnet_func, forward_numpy_call,
backward_numpy_call, output_grad_stype=None,
backward_is_use_output=False):
if output_grad_stype is None:
output_grad_stype = stype
expected_result_type, expected_grad_result_type = \
get_fw_bw_result_types_2(forward_numpy_call, stype, backward_numpy_call, output_grad_stype)
if backward_is_use_output is True:
expected_grad_result_type = expected_result_type
shape = (3, 4)
data = mx.symbol.Variable("data")
grad_stypes = {'data' : expected_grad_result_type}
y = mxnet_func(data)
if stype == 'default':
xa = np.random.uniform(low=-1.0, high=1.0, size=shape)
xa_np = xa
else:
xa = create_sparse_array(shape, stype, data_init=None, rsp_indices=[1],
modifier_func=lambda a: a - 0.5,
shuffle_csr_indices=True)
xa_np = xa.asnumpy()
if output_grad_stype != 'default':
out_grad = create_sparse_array(shape, output_grad_stype, data_init=None,
rsp_indices=[1, 2],
modifier_func=lambda a: a - 0.5,
shuffle_csr_indices=True)
out_grad_np = out_grad.asnumpy()
else:
out_grad_np = np.ones(xa.shape)
out_grad = mx.nd.array(out_grad_np)
output_np = forward_numpy_call(xa_np)
input_grad_np = backward_numpy_call(output_np, out_grad_np)
outputs = check_symbolic_forward(y, [xa], [output_np])
output = outputs[0]
assert output.stype == expected_result_type
input_grad_dict = check_symbolic_backward(y, location=[xa], out_grads=[out_grad],
expected=[input_grad_np],
grad_stypes=grad_stypes)
inp_grad = input_grad_dict["data"]
assert inp_grad.stype == expected_grad_result_type
def check_sparse_function(name, mxnet_func, forward_numpy_call, backward_numpy_call,
backward_is_use_output=False):
check_sparse_simple(name, 'default', mxnet_func, forward_numpy_call, backward_numpy_call)
for output_grad_stype in [None, "row_sparse", "default"]:
check_sparse_simple(name, 'row_sparse', mxnet_func, forward_numpy_call, backward_numpy_call,
output_grad_stype=output_grad_stype,
backward_is_use_output=backward_is_use_output)
for output_grad_stype in [None, "csr", "default"]:
check_sparse_simple(name, 'csr', mxnet_func, forward_numpy_call, backward_numpy_call,
output_grad_stype=output_grad_stype,
backward_is_use_output=backward_is_use_output)
check_sparse_function('relu',
lambda x: mx.sym.relu(x),
lambda x: np.maximum(x, 0.0),
lambda output, outg: outg * assign_each(output, lambda x: x > 0.0), backward_is_use_output=True)
check_sparse_function('sigmoid',
lambda x: mx.sym.sigmoid(x),
lambda x: np.divide(1.0, (1.0 + np.exp(-x))),
lambda output, outg: outg * assign_each(output, lambda x: x * (1.0 - x)),
backward_is_use_output=True)
@pytest.mark.serial
def test_sparse_nd_zeros():
def check_sparse_nd_zeros(stype, shape):
zero = mx.nd.zeros(shape)
sparse_zero = mx.nd.zeros(shape=shape, stype=stype)
assert_almost_equal(sparse_zero.asnumpy(), zero.asnumpy())
shape = rand_shape_2d()
check_sparse_nd_zeros('row_sparse', shape)
check_sparse_nd_zeros('csr', shape)
check_sparse_nd_zeros('default', shape)
@pytest.mark.serial
def test_sparse_nd_zeros_like():
def check_sparse_nd_zeros_like(stype, shape):
zero = mx.nd.zeros(shape, stype=stype)
zero_like = mx.nd.sparse.zeros_like(zero)
assert_almost_equal(zero.asnumpy(), zero_like.asnumpy())
shape = rand_shape_2d()
check_sparse_nd_zeros_like('row_sparse', shape)
check_sparse_nd_zeros_like('csr', shape)
@pytest.mark.serial
def test_sparse_axis_operations():
def test_variations(func_name):
dim0 = 30
dim1 = 100
axes = [0, 1]
densities = [0, 0.5, 1]
for density in densities:
shape = rand_shape_2d(dim0, dim1)
csr_array = rand_ndarray(shape=shape, stype='csr', density=density)
dns = csr_array.tostype('default')
for axis in axes:
ret = func_name(csr_array, axis=axis)
assert ret.stype == 'default'
ret_expected = func_name(dns, axis=axis)
assert_almost_equal(ret.asnumpy(), ret_expected.asnumpy())
def test_fallback(func_name, axis=0, keepdims=True, exclude=True):
dim0 = 30
dim1 = 100
shape = rand_shape_2d(dim0, dim1)
csr_array = rand_ndarray(shape=shape, stype='csr', density=0.01)
ret= func_name(csr_array, axis=axis, keepdims=keepdims,
exclude=exclude)
test_variations(mx.nd.sum)
test_fallback(mx.nd.sum, axis=0, keepdims=True, exclude=True)
test_variations(mx.nd.mean)
test_fallback(mx.nd.mean, axis=0, keepdims=True, exclude=True)
@pytest.mark.serial
def test_sparse_square_sum():
dim0 = 30
dim1 = 30
axes = [0, 1]
keepdims = [False, True]
densities = [0, 0.01, 0.2, 0.5, 1.0]
for density in densities:
shape = rand_shape_2d(dim0, dim1)
rsp = rand_ndarray(shape, 'row_sparse', density)
dns = rsp.tostype('default')
for axis in axes:
for keepdim in keepdims:
ret = mx.nd._internal._square_sum(rsp, axis=axis, keepdims=keepdim)
if axis == 1 and keepdim:
assert ret.stype == 'row_sparse'
else:
assert ret.stype == 'default'
ret_expected = mx.nd.sum(dns*dns, axis=axis, keepdims=keepdim)
# check forward result
assert_almost_equal(ret.asnumpy(), ret_expected.asnumpy())
rsp_data = mx.sym.Variable('data', stype='row_sparse')
test = mx.symbol._internal._square_sum(rsp_data, axis=axis, keepdims=keepdim)
# check symbolic backward since ograd can be an rsp
# and cannot be checked through check_numeric_gradient
# because it will add a loss layer as the output layer
# which makes ograd of the square_sum dense
if axis == 1 and keepdim:
dns_data = mx.sym.Variable('data')
baseline = mx.sym.sum(mx.sym.square(dns_data), axis=axis, keepdims=keepdim)
igrad_expected = mx.nd.empty(dns.shape)
baseline_exec = baseline._bind(default_device(), args=[dns],
args_grad=[igrad_expected])
baseline_exec.forward(is_train=True)
baseline_exec.backward([ret_expected])
# check backward when ograd is row sparse
check_symbolic_backward(test, [rsp], [ret_expected.tostype('row_sparse')],
[igrad_expected.asnumpy()], grad_stypes={'data': 'row_sparse'})
# check backward when ograd is dense
# the stype of output of the square_sum is deteremined in symbol binding stage.
# The ograd stype of the last layer is the same as the output stype of the last layer.
# Need to add one more layer after square_sum to trigger the kernel for ograd
# with default stype in square_sum op.
baseline1 = baseline + 1
baseline_exec1 = baseline1._bind(default_device(), args=[dns],
args_grad=[igrad_expected])
baseline_exec1.forward(is_train=True)
baseline_exec1.backward([ret_expected])
test1 = test + 1
check_symbolic_backward(test1, [rsp], [ret_expected], [igrad_expected.asnumpy()],
grad_stypes={'data': 'row_sparse'})
# check numeric gradient
check_numeric_gradient(test, [rsp], grad_stype_dict={'data': 'row_sparse'},
atol=1e-2, rtol=0.1)
@pytest.mark.serial
@pytest.mark.flaky
def test_sparse_storage_fallback():
""" test operators which don't implement FComputeEx or FStatefulComputeEx """
def check_broadcast_add(shape, lhs_stype, rhs_stype):
lhs = mx.symbol.Variable('lhs', stype=lhs_stype)
rhs = mx.symbol.Variable('rhs', stype=rhs_stype)
lhs_nd = rand_ndarray(shape, lhs_stype)
rhs_nd = rand_ndarray(shape, rhs_stype)
lhs_dns = mx.nd.cast_storage(lhs_nd, stype='default')
rhs_dns = mx.nd.cast_storage(rhs_nd, stype='default')
out_dns = (lhs_dns + rhs_dns).asnumpy()
test = mx.symbol.broadcast_add(lhs, rhs)
location = {'lhs': lhs_nd, 'rhs': rhs_nd}
check_symbolic_forward(test, location, [out_dns])
check_numeric_gradient(test, location)
check_symbolic_backward(test, location, [out_dns], [out_dns, out_dns])
def np_softmax(x, axis=-1):
# fix for old numpy on Travis not supporting keepdims
x = x - np.max(x, axis=axis, keepdims=True)
x = np.exp(x)
x /= np.sum(x, axis=axis, keepdims=True)
return x
def check_concat(shape, lhs_stype, rhs_stype):
x = mx.symbol.Variable('x', stype=lhs_stype)
w = mx.symbol.Variable('w', stype=rhs_stype)
test = mx.sym.Concat(x, w)
x_nd = rand_ndarray(shape, lhs_stype)
w_nd = rand_ndarray(shape, rhs_stype)
location = {'x': x_nd, 'w': w_nd}
check_numeric_gradient(test, location)
def check_operator_with_temp_resource(shape, stype):
x = mx.symbol.Variable('x', stype=stype)
test = mx.sym.sum(x)
x_nd = rand_ndarray(shape, stype)
location = {'x': x_nd}
check_numeric_gradient(test, location)
shape = rand_shape_2d()
stypes = ['default', 'csr', 'row_sparse']
for lhs in stypes:
check_operator_with_temp_resource(shape, lhs)
for rhs in stypes:
check_broadcast_add(shape, lhs, rhs)
check_concat(shape, lhs, rhs)
@pytest.mark.serial
def test_sparse_elementwise_sum():
def check_sparse_elementwise_sum_with_shape(stypes, shape, n):
# forward
inputs = [mx.symbol.Variable(f'arg{i}') for i in range(n)]
out = mx.symbol.sparse.add_n(*inputs, name='esum')
arr = []
arr_grad = [mx.nd.empty(shape, stype=stype) for stype in stypes]
densities = [0, 0.01, 0.5, 1.0]
for stype in stypes:
arr.append(rand_ndarray(shape, stype, densities[np.random.randint(0, len(densities))]))
exec1 = out._bind(default_device(),
args=arr,
args_grad=arr_grad)
exec1.forward(is_train=True)
out1 = exec1.outputs[0].asnumpy()
out = sum(a.asnumpy() for a in arr)
assert_almost_equal(out, out1, atol=1e-5)
out_grad = mx.nd.empty(shape)
out_grad[:] = np.random.uniform(-10, 10, shape)
# backward
exec1.backward([out_grad])
for a in arr_grad:
assert_almost_equal(a.asnumpy(), out_grad.asnumpy(), atol=1e-5)
all_stypes = ['default', 'csr', 'row_sparse']
for dim in range(2, 4):
shape = tuple(np.random.randint(5, 10, size=dim))
rsp_test_cnt = np.random.randint(1, 9)
check_sparse_elementwise_sum_with_shape(['row_sparse' for i in range(rsp_test_cnt)], shape, rsp_test_cnt)
if dim is 2:
check_sparse_elementwise_sum_with_shape(['default', 'csr', 'default'], shape, 3)
test_len = np.random.randint(5, 10)
# at least one default type
stypes = ['default']
for _ in range(test_len):
pick_side = np.random.randint(2)
pick_type = np.random.randint(3)
stypes = ([all_stypes[pick_type]] if pick_side is 0 else []) + stypes + ([all_stypes[pick_type]] if pick_side is 1 else [])
check_sparse_elementwise_sum_with_shape(stypes, shape, test_len+1)
@pytest.mark.serial
def test_sparse_embedding():
''' test sparse embedding operator '''
def check_sparse_embedding(in_dim, out_dim, batch, densities, sparse_grad):
target_stype = 'row_sparse' if sparse_grad else 'default'
# init executor
data = mx.sym.Variable("data")
weight = mx.sym.Variable("embed_weight")
embed = mx.sym.sparse.Embedding(data=data, weight=weight, input_dim=in_dim,
sparse_grad=sparse_grad, output_dim=out_dim, name='embed')
grad_req = {'data': 'null', 'embed_weight': 'write'}
args = {'embed_weight': mx.nd.zeros((in_dim, out_dim)), 'data': mx.nd.ones((batch,))}
weight_grad = mx.nd.zeros((in_dim, out_dim))
if sparse_grad:
weight_grad = weight_grad.tostype('row_sparse')
args_grad = {'embed_weight': weight_grad}
exe_test = embed._bind(default_device(), args=args, args_grad=args_grad, grad_req=grad_req)
arg_map = dict(zip(embed.list_arguments(), exe_test.arg_arrays))
grad_map = dict(zip(embed.list_arguments(), exe_test.grad_arrays))
# init data
np_data = np.random.randint(low=0, high=in_dim, size=batch)
np_onehot = np.zeros((batch, in_dim)).astype(np.float32)
np_onehot[np.arange(batch), np_data] = 1.0
arg_map["data"][:] = np_data
# weight
weight = arg_map["embed_weight"]
for density in densities:
# update weight based on density
weight[:] = rand_ndarray(weight.shape, 'default', density=density)
# check forward
exe_test.forward(is_train=True)
# init grad
np_grad = np.random.uniform(-1, 1, exe_test.outputs[0].shape)
grad = mx.nd.zeros(np_grad.shape)
grad[:] = np_grad
assert_almost_equal(exe_test.outputs[0].asnumpy(), np.dot(np_onehot, weight.asnumpy()), atol=1e-4)
# check backward
exe_test.backward([grad])
assert_almost_equal(grad_map["embed_weight"].asnumpy(), np.dot(np_onehot.T, grad.asnumpy()), atol=1e-4)
# check grad stype
assert(grad_map["embed_weight"].stype == target_stype)
densities = [0, 0.5, 1]
in_dim = 50
out_dim = 3
batch = 8
sparse_grads = [True, False]
for sparse_grad in sparse_grads:
check_sparse_embedding(in_dim, out_dim, batch, densities, sparse_grad)
def test_sparse_broadcast_add_sub():
def check_broadcast_add(mx_lhs, mx_rhs, np_lhs, np_rhs, dtype):
assert_almost_equal(mx.nd.sparse.add(mx_lhs, mx_rhs).asnumpy(), np.add(np_lhs, np_rhs), atol=1e-4)
def check_broadcast_sub(mx_lhs, mx_rhs, np_lhs, np_rhs, dtype):
assert_almost_equal(mx.nd.sparse.subtract(mx_lhs, mx_rhs).asnumpy(), np.subtract(np_lhs, np_rhs), atol=1e-4)
stype = 'csr'
shape = rand_shape_2d()
num_rows = shape[0]
num_cols = shape[1]
for density in [0.1 * i for i in range(10)]:
mx_lhs = rand_ndarray(shape, stype, density)
np_lhs = mx_lhs.asnumpy()
mx_rhs_row_2D = rand_ndarray((1, num_cols), 'default')
mx_rhs_row_1D = mx_rhs_row_2D.reshape((num_cols))
mx_rhs_col = rand_ndarray((num_rows, 1), 'default')
mx_rhs_scalar_2D = rand_ndarray((1, 1), 'default')
mx_rhs_scalar_1D = mx_rhs_scalar_2D.reshape((1, ))
for mx_rhs in [mx_rhs_row_2D, mx_rhs_row_1D, mx_rhs_col, mx_rhs_scalar_2D, mx_rhs_scalar_1D]:
np_rhs = mx_rhs.asnumpy()
check_broadcast_add(mx_lhs, mx_rhs, np_lhs, np_rhs, np.float32)
check_broadcast_sub(mx_lhs, mx_rhs, np_lhs, np_rhs, np.float32)
check_broadcast_add(mx_rhs, mx_lhs, np_rhs, np_lhs, np.float32)
check_broadcast_sub(mx_rhs, mx_lhs, np_rhs, np_lhs, np.float32)
def test_sparse_broadcast_mul_div():
def check_broadcast_mul(mx_lhs, mx_rhs, np_lhs, np_rhs, dtype):
assert_almost_equal(mx.nd.sparse.multiply(mx_lhs, mx_rhs).asnumpy(), np.multiply(np_lhs, np_rhs), atol=1e-4)
def check_broadcast_div(mx_lhs, mx_rhs, np_lhs, np_rhs, dtype):
assert_almost_equal(mx.nd.sparse.divide(mx_lhs, mx_rhs).asnumpy(), np.divide(np_lhs, np_rhs), atol=1e-4)
stype = 'csr'
shape = rand_shape_2d()
num_rows = shape[0]
num_cols = shape[1]
for density in [0.1 * i for i in range(10)]:
mx_lhs = rand_ndarray(shape, stype, density)
np_lhs = mx_lhs.asnumpy()
mx_rhs_row_2D = rand_ndarray((1, num_cols), 'default')
mx_rhs_row_1D = mx_rhs_row_2D.reshape((num_cols))
mx_rhs_col = rand_ndarray((num_rows, 1), 'default')
mx_rhs_scalar_2D = rand_ndarray((1, 1), 'default')
mx_rhs_scalar_1D = mx_rhs_scalar_2D.reshape((1, ))
for mx_rhs in [mx_rhs_row_2D, mx_rhs_row_1D, mx_rhs_col, mx_rhs_scalar_2D, mx_rhs_scalar_1D]:
np_rhs = mx_rhs.asnumpy()
check_broadcast_mul(mx_lhs, mx_rhs, np_lhs, np_rhs, np.float32)
check_broadcast_div(mx_lhs, mx_rhs, np_lhs, np_rhs, np.float32)
def test_batchnorm_fallback():
# same test as test_operator.test_batchnorm_training, but tests fallback logic of batchnorm
stype = 'row_sparse'
for shape in [(2, 3), (2, 3, 2, 2)]:
data_tmp = np.random.normal(-0.1, 0.1, size=shape)
s = shape[1],
gamma = np.ones(s)
beta = np.ones(s)
gamma[1] = 3
beta[0] = 3
rolling_mean = np.random.uniform(size=s)
rolling_std = np.random.uniform(size=s)
data = mx.symbol.Variable('data', stype=stype)
in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(gamma).tostype(stype),
mx.nd.array(beta).tostype(stype)]
mean_std = [mx.nd.array(rolling_mean).tostype(stype), mx.nd.array(rolling_std).tostype(stype)]
test = mx.symbol.BatchNorm(data, fix_gamma=True)
assertRaises(MXNetError, check_numeric_gradient, test, in_location, mean_std, numeric_eps=1e-3, rtol=0.16, atol=1e-2)
test = mx.symbol.BatchNorm(data, fix_gamma=True, use_global_stats=True)
assertRaises(MXNetError, check_numeric_gradient, test, in_location, mean_std, numeric_eps=1e-3, rtol=0.16, atol=1e-2)
test = mx.symbol.BatchNorm(data, fix_gamma=False)
check_numeric_gradient(test, in_location, mean_std, numeric_eps=1e-3, rtol=0.16, atol=1e-2)
test = mx.symbol.BatchNorm(data, fix_gamma=False, use_global_stats=True)
check_numeric_gradient(test, in_location, mean_std, numeric_eps=1e-3, rtol=0.16, atol=1e-2)
# Test varying channel axis
dim = len(shape)
for chaxis in range(-dim, dim):
chaxis_true = chaxis
if chaxis < 0:
chaxis_true = dim + chaxis
shapex = shape
channel_count = shapex[chaxis_true]
data_tmp = np.random.normal(-0.1, 0.1, size=shapex)
gamma = np.ones(channel_count)
beta = np.ones(channel_count)
if channel_count > 1:
gamma[1] = 3
beta[0] = 3
in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(gamma).tostype(stype),
mx.nd.array(beta).tostype(stype)]
xrolling_mean = np.random.uniform(size=channel_count)
xrolling_std = np.random.uniform(size=channel_count)
xmean_std = [mx.nd.array(xrolling_mean).tostype(stype),
mx.nd.array(xrolling_std).tostype(stype)]
test = mx.symbol.BatchNorm(data, fix_gamma=True, axis=chaxis)
assertRaises(MXNetError, check_numeric_gradient, test, in_location, xmean_std, numeric_eps=1e-3, rtol=0.2, atol=0.01)
test = mx.symbol.BatchNorm(data, fix_gamma=True, use_global_stats=True, axis=chaxis)
assertRaises(MXNetError, check_numeric_gradient, test, in_location, xmean_std, numeric_eps=1e-3, rtol=0.2, atol=0.01)
test = mx.symbol.BatchNorm(data, fix_gamma=False, axis=chaxis)
check_numeric_gradient(test, in_location, xmean_std, numeric_eps=1e-3, rtol=0.2, atol=0.01)
test = mx.symbol.BatchNorm(data, fix_gamma=False, use_global_stats=True, axis=chaxis)
check_numeric_gradient(test, in_location, xmean_std, numeric_eps=1e-3, rtol=0.2, atol=0.01)
@pytest.mark.serial
def test_dnnl_sparse():
# This test is trying to create a race condition describedd in
# https://github.com/apache/incubator-mxnet/issues/10189
arr = mx.nd.random.uniform(shape=(10, 10, 32, 32))
weight1 = mx.nd.random.uniform(shape=(10, 10, 3, 3))
arr = mx.nd.Convolution(data=arr, weight=weight1, no_bias=True, kernel=(3, 3), num_filter=10)
rs_arr = mx.nd.sparse.row_sparse_array((mx.nd.zeros_like(arr), np.arange(arr.shape[0])))
weight2 = mx.nd.random.uniform(shape=(10, np.prod(arr.shape[1:4])))
fc_res = mx.nd.FullyConnected(data=arr, weight=weight2, no_bias=True, num_hidden=10)
sum_res = mx.nd.elemwise_sub(arr, rs_arr)
res1 = np.dot(mx.nd.flatten(sum_res).asnumpy(), weight2.asnumpy().T)
print(res1 - fc_res.asnumpy())
almost_equal(res1, fc_res.asnumpy())
@pytest.mark.serial
def test_sparse_nd_where():
def get_forward_expected_output(condition, x, y):
original_shape = x.shape
out = np.zeros(original_shape)
if condition.shape == x.shape:
for index, c in np.ndenumerate(condition):
if c != 0:
out[index] = x[index]
else:
out[index] = y[index]
else:
raise RuntimeError("Invalid condition shape for where op")
out = out.reshape(original_shape)
return out
def get_forward_inputs_same_shape(shape):
condition_np = np.random.randint(0, 2, np.prod(shape)).reshape(shape)
x_np = np.random.randint(1, 6, np.prod(shape)).reshape(shape)
y_np = np.random.randint(7, 11, np.prod(shape)).reshape(shape)
return condition_np, x_np, y_np
def get_backward_input(shape):
return np.random.randint(20, 30, np.prod(shape)).reshape(shape)
def get_backward_expected_outputs(grad_in, condition):
shape = grad_in.shape
grad_cond = np.zeros(condition.shape)
grad_x = np.empty(shape)
grad_y = np.empty(shape)
for index, c in np.ndenumerate(condition):
if 0 != c:
grad_x[index] = grad_in[index]
grad_y[index] = 0
else:
grad_x[index] = 0
grad_y[index] = grad_in[index]
return grad_cond, grad_x, grad_y
def test_where_helper(shape):
condition_np, x_np, y_np = get_forward_inputs_same_shape(shape)
out_expected = get_forward_expected_output(condition_np, x_np, y_np)
grad_in_np = get_backward_input(shape)
grad_expected_cond, grad_expected_x, grad_expected_y \
= get_backward_expected_outputs(grad_in_np, condition_np)
condition = mx.sym.Variable('condition', stype='csr')
x = mx.sym.Variable('x')
y = mx.sym.Variable('y')
grad_in_mx = mx.nd.array(grad_in_np, dtype=np.int32)
where_sym = mx.sym.where(condition, x, y)
cond_nd = mx.nd.array(condition_np)
args = {'condition': cond_nd.tostype('csr'), 'x': mx.nd.array(x_np),
'y' : mx.nd.array(y_np)}
args_grad = {'condition': mx.nd.zeros_like(cond_nd),
'x': mx.nd.array(x_np).tostype('csr'), 'y' : mx.nd.array(y_np)}
# test req='write'
where_exe_write = where_sym._bind(ctx=default_device(), args=args,
args_grad=args_grad, grad_req='write')
# test forward req='write'
outputs = where_exe_write.forward(is_train=True)
assert same(outputs[0].asnumpy(), out_expected)
# test backward req='write'
where_exe_write.backward(grad_in_mx.astype('float32'))
assert same(where_exe_write.grad_dict['x'].asnumpy(), grad_expected_x)
assert same(where_exe_write.grad_dict['y'].asnumpy(), grad_expected_y)
assert same(where_exe_write.grad_dict['condition'].asnumpy(), grad_expected_cond)
# test req='add'
x_grad_init = np.random.randint(30, 40, np.prod(shape)).reshape(shape)
y_grad_init = np.random.randint(40, 50, np.prod(shape)).reshape(shape)
where_exe_add = where_sym._bind(ctx=default_device(), args=args,
args_grad=args_grad, grad_req='add')
where_exe_add.grad_dict['x'][:] = x_grad_init
where_exe_add.grad_dict['y'][:] = y_grad_init
# test forward req='add'
outputs = where_exe_add.forward(is_train=True)
assert same(outputs[0].asnumpy(), out_expected)
def test_where_numeric_gradient(shape):
condition = mx.sym.Variable('condition', stype='csr')
x = mx.sym.Variable('x')
y = mx.sym.Variable('y')
where_sym = mx.sym.where(condition, x, y)
condition_np, x_np, y_np = get_forward_inputs_same_shape(shape)
check_numeric_gradient(where_sym, [condition_np, x_np, y_np], grad_nodes=['x', 'y'])
test_where_helper((5, 9))
test_where_numeric_gradient((5, 9))
@pytest.mark.serial
def test_sparse_quadratic_function():
def f(x, a, b, c):
return a * x**2 + b * x + c
def check_sparse_quadratic_function(a, b, c, expected_stype):
# check forward and compare the result with dense op
ndim = 2
shape = rand_shape_nd(ndim, 5)
data = rand_ndarray(shape=shape, stype='csr')
data_np = data.asnumpy()
expected = f(data_np, a, b, c)
output = mx.nd.contrib.quadratic(data, a=a, b=b, c=c)
assert(output.stype == expected_stype)
assert_almost_equal(output.asnumpy(), expected)
a = np.random.random_sample()
b = np.random.random_sample()
check_sparse_quadratic_function(a, b, 0.0, 'csr')
check_sparse_quadratic_function(a, b, 1.0, 'default')
def test_reshape_backward_fallback():
"""
out
| \
w_x x
/
w
in which x is a sparse tensor.
Due to sparse gradient optimization in sym.dot, grad(w_x) is sparse.
Though sym.reshape itself does not have sparse version,
if we somehow make grad(w) sparse as well, e.g.,
- by setting args_grad in symbol.bind
- or, we can have out_y = sym.dot(sparse_y, w), then grad(w) will be inferred as sparse
reshape backward (from w_x to w) needs to understand how to handle sparse inputs.
"""
ctx = default_device()
w_shape = (12, 4)
w_x_shape = (1, 48)
x_nd = rand_ndarray((4, 1), 'csr')
w_nd = rand_ndarray(w_shape)
w_x_nd = w_nd.reshape(w_x_shape)
out_x_nd = mx.nd.dot(x_nd, w_x_nd)
w_x_backward_grad = mx.nd.dot(x_nd, out_x_nd, transpose_a=True).asnumpy()
expected_grad_nd = w_x_backward_grad.reshape(w_shape)
x = mx.sym.Variable('x', stype='csr')
w = mx.sym.Variable('w')
w_x = mx.sym.reshape(w, w_x_shape, name="w_x")
out = mx.sym.sparse.dot(x, w_x, name='out_x')
grad_w_nd = rand_ndarray(w_shape, 'row_sparse')
executor = out._bind(ctx=ctx, args={"x": x_nd, "w": w_nd},
args_grad={"w": grad_w_nd})
executor.forward(is_train=True)
executor.backward(out_x_nd)
assert_almost_equal(grad_w_nd.asnumpy(), expected_grad_nd)