blob: 750505c1eb4590cd3ab79473f7c25c3a94688f25 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pickle as pkl
from mxnet.ndarray import NDArray
import mxnet as mx
from mxnet.test_utils import *
from common import random_seed
from mxnet.base import mx_real_t
from numpy.testing import assert_allclose
import numpy.random as rnd
import numpy as np
import scipy.sparse as spsp
from common import assertRaises, xfail_when_nonstandard_decimal_separator
from mxnet.ndarray.sparse import RowSparseNDArray, CSRNDArray
import pytest
mx.npx.reset_np()
def sparse_nd_ones(shape, stype):
return mx.nd.ones(shape).tostype(stype)
def test_sparse_nd_elemwise_add():
def check_sparse_nd_elemwise_binary(shapes, stypes, f, g):
# generate inputs
nds = []
for i, stype in enumerate(stypes):
if stype == 'row_sparse':
nd, _ = rand_sparse_ndarray(shapes[i], stype)
elif stype == 'default':
nd = mx.nd.array(random_arrays(shapes[i]), dtype = np.float32)
else:
assert(False)
nds.append(nd)
# check result
test = f(nds[0], nds[1])
assert_almost_equal(test.asnumpy(), g(nds[0].asnumpy(), nds[1].asnumpy()))
num_repeats = 3
g = lambda x,y: x + y
op = mx.nd.elemwise_add
for _ in range(num_repeats):
shape = [rand_shape_2d()] * 2
check_sparse_nd_elemwise_binary(shape, ['default'] * 2, op, g)
check_sparse_nd_elemwise_binary(shape, ['row_sparse', 'row_sparse'], op, g)
def test_sparse_nd_copy():
def check_sparse_nd_copy(from_stype, to_stype, shape):
from_nd = rand_ndarray(shape, from_stype)
# copy to ctx
to_ctx = from_nd.copyto(default_device())
# copy to stype
to_nd = rand_ndarray(shape, to_stype)
to_nd = from_nd.copyto(to_nd)
assert np.sum(np.abs(from_nd.asnumpy() != to_ctx.asnumpy())) == 0.0
assert np.sum(np.abs(from_nd.asnumpy() != to_nd.asnumpy())) == 0.0
shape = rand_shape_2d()
shape_3d = rand_shape_3d()
stypes = ['row_sparse', 'csr']
for stype in stypes:
check_sparse_nd_copy(stype, 'default', shape)
check_sparse_nd_copy('default', stype, shape)
check_sparse_nd_copy('row_sparse', 'row_sparse', shape_3d)
check_sparse_nd_copy('row_sparse', 'default', shape_3d)
check_sparse_nd_copy('default', 'row_sparse', shape_3d)
def test_sparse_nd_basic():
def check_sparse_nd_basic_rsp():
storage_type = 'row_sparse'
shape = rand_shape_2d()
nd, (v, idx) = rand_sparse_ndarray(shape, storage_type)
assert(nd._num_aux == 1)
assert(nd.indices.dtype == np.int64)
assert(nd.stype == 'row_sparse')
check_sparse_nd_basic_rsp()
def test_sparse_nd_setitem():
def check_sparse_nd_setitem(stype, shape, dst):
x = mx.nd.zeros(shape=shape, stype=stype)
x[:] = dst
dst_nd = mx.nd.array(dst) if isinstance(dst, (np.ndarray, np.generic)) else dst
assert np.all(x.asnumpy() == dst_nd.asnumpy() if isinstance(dst_nd, NDArray) else dst)
shape = rand_shape_2d()
for stype in ['row_sparse', 'csr']:
# ndarray assignment
check_sparse_nd_setitem(stype, shape, rand_ndarray(shape, 'default'))
check_sparse_nd_setitem(stype, shape, rand_ndarray(shape, stype))
# numpy assignment
check_sparse_nd_setitem(stype, shape, np.ones(shape))
# scalar assigned to row_sparse NDArray
check_sparse_nd_setitem('row_sparse', shape, 2)
def test_sparse_nd_slice():
shape = (rnd.randint(2, 10), rnd.randint(2, 10))
stype = 'csr'
A, _ = rand_sparse_ndarray(shape, stype)
A2 = A.asnumpy()
start = rnd.randint(0, shape[0] - 1)
end = rnd.randint(start + 1, shape[0])
assert same(A[start:end].asnumpy(), A2[start:end])
assert same(A[start - shape[0]:end].asnumpy(), A2[start:end])
assert same(A[start:].asnumpy(), A2[start:])
assert same(A[:end].asnumpy(), A2[:end])
ind = rnd.randint(-shape[0], shape[0] - 1)
assert same(A[ind].asnumpy(), A2[ind][np.newaxis, :])
start_col = rnd.randint(0, shape[1] - 1)
end_col = rnd.randint(start_col + 1, shape[1])
result = mx.nd.slice(A, begin=(start, start_col), end=(end, end_col))
result_dense = mx.nd.slice(mx.nd.array(A2), begin=(start, start_col), end=(end, end_col))
assert same(result_dense.asnumpy(), result.asnumpy())
A = mx.nd.sparse.zeros('csr', shape)
A2 = A.asnumpy()
assert same(A[start:end].asnumpy(), A2[start:end])
result = mx.nd.slice(A, begin=(start, start_col), end=(end, end_col))
result_dense = mx.nd.slice(mx.nd.array(A2), begin=(start, start_col), end=(end, end_col))
assert same(result_dense.asnumpy(), result.asnumpy())
def check_slice_nd_csr_fallback(shape):
stype = 'csr'
A, _ = rand_sparse_ndarray(shape, stype)
A2 = A.asnumpy()
start = rnd.randint(0, shape[0] - 1)
end = rnd.randint(start + 1, shape[0])
# non-trivial step should fallback to dense slice op
result = mx.nd.sparse.slice(A, begin=(start,), end=(end + 1,), step=(2,))
result_dense = mx.nd.slice(mx.nd.array(A2), begin=(start,), end=(end + 1,), step=(2,))
assert same(result_dense.asnumpy(), result.asnumpy())
shape = (rnd.randint(2, 10), rnd.randint(1, 10))
check_slice_nd_csr_fallback(shape)
def test_sparse_nd_concat():
def check_concat(arrays):
ret = np.concatenate([arr.asnumpy() for arr in arrays], axis=0)
same(mx.nd.concat(*arrays, dim=0).asnumpy(), ret)
nds = []
zero_nds = []
ncols = rnd.randint(2, 10)
for _ in range(3):
shape = (rnd.randint(2, 10), ncols)
A, _ = rand_sparse_ndarray(shape, 'csr')
nds.append(A)
zero_nds.append(mx.nd.zeros(shape).tostype('csr'))
check_concat(nds)
check_concat(zero_nds)
def test_sparse_nd_equal():
for stype in ['row_sparse', 'csr']:
shape = rand_shape_2d()
x = mx.nd.zeros(shape=shape, stype=stype)
y = sparse_nd_ones(shape, stype)
z = x == y
assert (z.asnumpy() == np.zeros(shape)).all()
z = 0 == y
assert (z.asnumpy() == np.zeros(shape)).all()
assert z.stype == 'default'
z = 1 == y
assert (z.asnumpy() == np.ones(shape)).all()
assert z.stype == stype
def test_sparse_nd_not_equal():
for stype in ['row_sparse', 'csr']:
shape = rand_shape_2d()
x = mx.nd.zeros(shape=shape, stype=stype)
y = sparse_nd_ones(shape, stype)
z = x != y
assert (z.asnumpy() == np.ones(shape)).all()
z = 0 != y
assert (z.asnumpy() == np.ones(shape)).all()
assert z.stype == stype
z = 1 != y
assert (z.asnumpy() == np.zeros(shape)).all()
assert z.stype == 'default'
def test_sparse_nd_greater():
for stype in ['row_sparse', 'csr']:
shape = rand_shape_2d()
x = mx.nd.zeros(shape=shape, stype=stype)
y = sparse_nd_ones(shape, stype)
z = x > y
assert (z.asnumpy() == np.zeros(shape)).all()
z = y > 0
assert (z.asnumpy() == np.ones(shape)).all()
assert z.stype == stype
z = 0 > y
assert (z.asnumpy() == np.zeros(shape)).all()
assert z.stype == stype
z = y > 1
assert (z.asnumpy() == np.zeros(shape)).all()
assert z.stype == stype
def test_sparse_nd_greater_equal():
for stype in ['row_sparse', 'csr']:
shape = rand_shape_2d()
x = mx.nd.zeros(shape=shape, stype=stype)
y = sparse_nd_ones(shape, stype)
z = x >= y
assert (z.asnumpy() == np.zeros(shape)).all()
z = y >= 0
assert (z.asnumpy() == np.ones(shape)).all()
assert z.stype == 'default'
z = 0 >= y
assert (z.asnumpy() == np.zeros(shape)).all()
assert z.stype == 'default'
z = y >= 1
assert (z.asnumpy() == np.ones(shape)).all()
assert z.stype == stype
def test_sparse_nd_lesser():
for stype in ['row_sparse', 'csr']:
shape = rand_shape_2d()
x = mx.nd.zeros(shape=shape, stype=stype)
y = sparse_nd_ones(shape, stype)
z = y < x
assert (z.asnumpy() == np.zeros(shape)).all()
z = 0 < y
assert (z.asnumpy() == np.ones(shape)).all()
assert z.stype == stype
z = y < 0
assert (z.asnumpy() == np.zeros(shape)).all()
assert z.stype == stype
z = y < 1
assert (z.asnumpy() == np.zeros(shape)).all()
assert z.stype == 'default'
def test_sparse_nd_lesser_equal():
for stype in ['row_sparse', 'csr']:
shape = rand_shape_2d()
x = mx.nd.zeros(shape=shape, stype=stype)
y = sparse_nd_ones(shape, stype)
z = y <= x
assert (z.asnumpy() == np.zeros(shape)).all()
z = 0 <= y
assert (z.asnumpy() == np.ones(shape)).all()
assert z.stype == 'default'
z = y <= 0
assert (z.asnumpy() == np.zeros(shape)).all()
assert z.stype == 'default'
z = 1 <= y
assert (z.asnumpy() == np.ones(shape)).all()
assert z.stype == stype
def test_sparse_nd_binary():
N = 3
def check_binary(fn, stype):
for _ in range(N):
ndim = 2
oshape = np.random.randint(1, 6, size=(ndim,))
bdim = 2
lshape = list(oshape)
# one for broadcast op, another for elemwise op
rshape = list(oshape[ndim-bdim:])
for i in range(bdim):
sep = np.random.uniform(0, 1)
if sep < 0.33:
lshape[ndim-i-1] = 1
elif sep < 0.66:
rshape[bdim-i-1] = 1
lhs = np.random.uniform(0, 1, size=lshape)
rhs = np.random.uniform(0, 1, size=rshape)
lhs_nd = mx.nd.array(lhs).tostype(stype)
rhs_nd = mx.nd.array(rhs).tostype(stype)
assert_allclose(fn(lhs, rhs), fn(lhs_nd, rhs_nd).asnumpy(), rtol=1e-4, atol=1e-4)
assert_allclose(fn(lhs, lhs), fn(lhs_nd, lhs_nd).asnumpy(), rtol=1e-4, atol=1e-4)
stypes = ['row_sparse', 'csr']
for stype in stypes:
check_binary(lambda x, y: x + y, stype)
check_binary(lambda x, y: x - y, stype)
check_binary(lambda x, y: x * y, stype)
check_binary(lambda x, y: x / y, stype)
check_binary(lambda x, y: x ** y, stype)
check_binary(lambda x, y: x > y, stype)
check_binary(lambda x, y: x < y, stype)
check_binary(lambda x, y: x >= y, stype)
check_binary(lambda x, y: x <= y, stype)
check_binary(lambda x, y: x == y, stype)
@xfail_when_nonstandard_decimal_separator
def test_sparse_nd_binary_scalar_op():
N = 3
def check(fn, stype, out_stype=None):
for _ in range(N):
ndim = 2
shape = np.random.randint(1, 6, size=(ndim,))
npy = np.random.normal(0, 1, size=shape)
nd = mx.nd.array(npy).tostype(stype)
if out_stype is not None:
assert(nd.stype == out_stype)
assert_allclose(fn(npy), fn(nd).asnumpy(), rtol=1e-4, atol=1e-4)
stypes = ['row_sparse', 'csr']
for stype in stypes:
check(lambda x: 1 + x, stype)
check(lambda x: 1 - x, stype)
check(lambda x: 1 * x, stype)
check(lambda x: 1 / x, stype)
check(lambda x: 2 ** x, stype)
check(lambda x: 1 > x, stype)
check(lambda x: 0.5 > x, stype)
check(lambda x: 0.5 < x, stype)
check(lambda x: 0.5 >= x, stype)
check(lambda x: 0.5 <= x, stype)
check(lambda x: 0.5 == x, stype)
check(lambda x: x / 2, stype, out_stype=stype)
check(lambda x: x + 0, stype, out_stype=stype)
check(lambda x: x - 0, stype, out_stype=stype)
def test_sparse_nd_binary_iop():
N = 3
def check_binary(fn, stype):
for _ in range(N):
ndim = 2
oshape = np.random.randint(1, 6, size=(ndim,))
lshape = list(oshape)
rshape = list(oshape)
lhs = np.random.uniform(0, 1, size=lshape)
rhs = np.random.uniform(0, 1, size=rshape)
lhs_nd = mx.nd.array(lhs).tostype(stype)
rhs_nd = mx.nd.array(rhs).tostype(stype)
assert_allclose(fn(lhs, rhs),
fn(lhs_nd, rhs_nd).asnumpy(),
rtol=1e-4, atol=1e-4)
def inplace_add(x, y):
x += y
return x
def inplace_mul(x, y):
x *= y
return x
stypes = ['csr', 'row_sparse']
fns = [inplace_add, inplace_mul]
for stype in stypes:
for fn in fns:
check_binary(fn, stype)
def test_sparse_nd_negate():
def check_sparse_nd_negate(shape, stype):
npy = np.random.uniform(-10, 10, rand_shape_2d())
arr = mx.nd.array(npy).tostype(stype)
assert_almost_equal(npy, arr.asnumpy())
assert_almost_equal(-npy, (-arr).asnumpy())
# a final check to make sure the negation (-) is not implemented
# as inplace operation, so the contents of arr does not change after
# we compute (-arr)
assert_almost_equal(npy, arr.asnumpy())
shape = rand_shape_2d()
stypes = ['csr', 'row_sparse']
for stype in stypes:
check_sparse_nd_negate(shape, stype)
def test_sparse_nd_broadcast():
sample_num = 1000
# TODO(haibin) test with more than 2 dimensions
def test_broadcast_to(stype):
for _ in range(sample_num):
ndim = 2
target_shape = np.random.randint(1, 11, size=ndim)
shape = target_shape.copy()
axis_flags = np.random.randint(0, 2, size=ndim)
for (axis, flag) in enumerate(axis_flags):
if flag:
shape[axis] = 1
dat = np.random.rand(*shape) - 0.5
numpy_ret = dat
ndarray = mx.nd.array(dat).tostype(stype)
ndarray_ret = ndarray.broadcast_to(shape=target_shape)
if type(ndarray_ret) is mx.ndarray.NDArray:
ndarray_ret = ndarray_ret.asnumpy()
assert (ndarray_ret.shape == target_shape).all()
err = np.square(ndarray_ret - numpy_ret).mean()
assert err < 1E-8
def test_broadcast_like(stype):
for _ in range(sample_num):
ndim = 2
target_shape = np.random.randint(1, 11, size=ndim)
target = mx.nd.ones(shape=tuple(target_shape))
shape = target_shape.copy()
axis_flags = np.random.randint(0, 2, size=ndim)
for (axis, flag) in enumerate(axis_flags):
if flag:
shape[axis] = 1
dat = np.random.rand(*shape) - 0.5
numpy_ret = dat
ndarray = mx.nd.array(dat).tostype(stype)
ndarray_ret = ndarray.broadcast_like(target)
if type(ndarray_ret) is mx.ndarray.NDArray:
ndarray_ret = ndarray_ret.asnumpy()
assert (ndarray_ret.shape == target_shape).all()
err = np.square(ndarray_ret - numpy_ret).mean()
assert err < 1E-8
stypes = ['csr', 'row_sparse']
for stype in stypes:
test_broadcast_to(stype)
test_broadcast_like(stype)
def test_sparse_nd_transpose():
npy = np.random.uniform(-10, 10, rand_shape_2d())
stypes = ['csr', 'row_sparse']
for stype in stypes:
nd = mx.nd.array(npy).tostype(stype)
assert_almost_equal(npy.T, (nd.T).asnumpy())
def test_sparse_nd_storage_fallback():
def check_output_fallback(shape):
ones = mx.nd.ones(shape)
out = mx.nd.zeros(shape=shape, stype='csr')
mx.nd.broadcast_add(ones, ones * 2, out=out)
assert(np.sum(out.asnumpy() - 3) == 0)
def check_input_fallback(shape):
ones = mx.nd.ones(shape)
out = mx.nd.broadcast_add(ones.tostype('csr'), ones.tostype('row_sparse'))
assert(np.sum(out.asnumpy() - 2) == 0)
def check_fallback_with_temp_resource(shape):
ones = mx.nd.ones(shape)
out = mx.nd.sum(ones)
assert(out.asscalar() == np.prod(shape))
shape = rand_shape_2d()
check_output_fallback(shape)
check_input_fallback(shape)
check_fallback_with_temp_resource(shape)
def test_sparse_nd_random():
""" test sparse random operator on cpu """
# gpu random operator doesn't use fixed seed
if default_device().device_type is 'gpu':
return
shape = (100, 100)
fns = [mx.nd.random.uniform, mx.nd.random.normal, mx.nd.random.gamma]
for fn in fns:
rsp_out = mx.nd.zeros(shape=shape, stype='row_sparse')
dns_out = mx.nd.zeros(shape=shape, stype='default')
with random_seed(0):
fn(shape=shape, out=dns_out)
with random_seed(0):
fn(shape=shape, out=rsp_out)
assert_almost_equal(dns_out.asnumpy(), rsp_out.asnumpy())
def test_sparse_nd_astype():
stypes = ['row_sparse', 'csr']
for stype in stypes:
x = mx.nd.zeros(shape=rand_shape_2d(), stype=stype, dtype='float32')
y = x.astype('int32')
assert(y.dtype == np.int32), y.dtype
def test_sparse_nd_astype_copy():
stypes = ['row_sparse', 'csr']
for stype in stypes:
x = mx.nd.zeros(shape=rand_shape_2d(), stype=stype, dtype='int32')
y = x.astype('float32')
assert (y.dtype == np.float32)
# Test that a new ndarray has been allocated
assert (id(x) != id(y))
y = x.astype('float32', copy=False)
assert (y.dtype == np.float32)
# Test that a new ndarray has been allocated
assert (id(x) != id(y))
y = x.astype('int32')
assert (y.dtype == np.int32)
# Test that a new ndarray has been allocated
# even though they have same dtype
assert (id(x) != id(y))
# Test that a new ndarray has not been allocated
y = x.astype('int32', copy=False)
assert (id(x) == id(y))
# Test the string version 'int32'
# has the same behaviour as the np.int32
y = x.astype(np.int32, copy=False)
assert (id(x) == id(y))
def test_sparse_nd_pickle():
dim0 = 40
dim1 = 40
stypes = ['row_sparse', 'csr']
densities = [0, 0.5]
stype_dict = {'row_sparse': RowSparseNDArray, 'csr': CSRNDArray}
shape = rand_shape_2d(dim0, dim1)
for stype in stypes:
for density in densities:
a, _ = rand_sparse_ndarray(shape, stype, density)
assert isinstance(a, stype_dict[stype])
data = pkl.dumps(a)
b = pkl.loads(data)
assert isinstance(b, stype_dict[stype])
assert same(a.asnumpy(), b.asnumpy())
@pytest.mark.parametrize('save_fn', [mx.nd.save, mx.npx.savez])
def test_sparse_nd_save_load(save_fn):
stypes = ['default', 'row_sparse', 'csr']
stype_dict = {'default': NDArray, 'row_sparse': RowSparseNDArray, 'csr': CSRNDArray}
num_data = 20
densities = [0, 0.5]
fname = 'tmp_list.npz'
data_list1 = []
for _ in range(num_data):
stype = stypes[np.random.randint(0, len(stypes))]
shape = rand_shape_2d(dim0=40, dim1=40)
density = densities[np.random.randint(0, len(densities))]
data_list1.append(rand_ndarray(shape, stype, density))
assert isinstance(data_list1[-1], stype_dict[stype])
if save_fn is mx.nd.save:
save_fn(fname, data_list1)
else:
save_fn(fname, *data_list1)
data_list2 = mx.nd.load(fname)
if save_fn is mx.npx.savez:
data_list2 = [data_list2['arr_' + str(i)] for i in range(num_data)]
assert len(data_list1) == len(data_list2)
for x, y in zip(data_list1, data_list2):
assert same(x.asnumpy(), y.asnumpy())
data_map1 = {f'ndarray xx {i}': x for i, x in enumerate(data_list1)}
if save_fn is mx.nd.save:
save_fn(fname, data_map1)
else:
save_fn(fname, **data_map1)
data_map2 = mx.nd.load(fname)
assert len(data_map1) == len(data_map2)
for k, x in data_map1.items():
y = data_map2[k]
assert same(x.asnumpy(), y.asnumpy())
os.remove(fname)
@pytest.mark.parametrize('save_fn', [mx.nd.save, mx.npx.savez])
def test_sparse_ndarray_load_csr_npz_scipy(tmp_path, save_fn):
csr_sp = spsp.rand(50, 100, density=0.5, format="csr")
spsp.save_npz(tmp_path / "csr.npz", csr_sp)
csr_mx = mx.nd.load(str(tmp_path / "csr.npz"))['']
assert np.sum(csr_mx.data.asnumpy() != csr_sp.data) == 0
assert np.sum(csr_mx.indices.asnumpy() != csr_sp.indices) == 0
assert np.sum(csr_mx.indptr.asnumpy() != csr_sp.indptr) == 0
csr_mx = save_fn(str(tmp_path / "csr_mx.npz"), csr_mx)
csr_mx_loaded = mx.nd.load(str(tmp_path / "csr_mx.npz"))
csr_mx_loaded = csr_mx_loaded[0] if save_fn is mx.nd.save else csr_mx_loaded['arr_0']
assert np.sum(csr_mx_loaded.data.asnumpy() != csr_sp.data) == 0
assert np.sum(csr_mx_loaded.indices.asnumpy() != csr_sp.indices) == 0
assert np.sum(csr_mx_loaded.indptr.asnumpy() != csr_sp.indptr) == 0
def test_sparse_nd_unsupported():
nd = mx.nd.zeros((2,2), stype='row_sparse')
fn_slice = lambda x: x._slice(None, None)
fn_at = lambda x: x._at(None)
fn_reshape = lambda x: x.reshape(None)
fns = [fn_slice, fn_at, fn_reshape]
for fn in fns:
try:
fn(nd)
assert(False)
except:
pass
def test_create_csr():
def check_create_csr_from_nd(shape, density, dtype):
matrix = rand_ndarray(shape, 'csr', density)
# create data array with provided dtype and ctx
data = mx.nd.array(matrix.data.asnumpy(), dtype=dtype)
indptr = matrix.indptr
indices = matrix.indices
csr_created = mx.nd.sparse.csr_matrix((data, indices, indptr), shape=shape)
assert csr_created.stype == 'csr'
assert same(csr_created.data.asnumpy(), data.asnumpy())
assert same(csr_created.indptr.asnumpy(), indptr.asnumpy())
assert same(csr_created.indices.asnumpy(), indices.asnumpy())
# verify csr matrix dtype and ctx is consistent from the ones provided
assert csr_created.dtype == dtype, (csr_created, dtype)
assert csr_created.data.dtype == dtype, (csr_created.data.dtype, dtype)
assert csr_created.context == mx.context.current_context(), (csr_created.context, mx.context.current_context())
csr_copy = mx.nd.array(csr_created)
assert(same(csr_copy.asnumpy(), csr_created.asnumpy()))
def check_create_csr_from_coo(shape, density, dtype):
matrix = rand_ndarray(shape, 'csr', density)
sp_csr = matrix.asscipy()
sp_coo = sp_csr.tocoo()
csr_created = mx.nd.sparse.csr_matrix((sp_coo.data, (sp_coo.row, sp_coo.col)), shape=shape, dtype=dtype)
assert csr_created.stype == 'csr'
assert same(csr_created.data.asnumpy(), sp_csr.data)
assert same(csr_created.indptr.asnumpy(), sp_csr.indptr)
assert same(csr_created.indices.asnumpy(), sp_csr.indices)
csr_copy = mx.nd.array(csr_created)
assert(same(csr_copy.asnumpy(), csr_created.asnumpy()))
# verify csr matrix dtype and ctx is consistent
assert csr_created.dtype == dtype, (csr_created.dtype, dtype)
assert csr_created.data.dtype == dtype, (csr_created.data.dtype, dtype)
assert csr_created.context == mx.context.current_context(), (csr_created.context, mx.context.current_context())
def check_create_csr_from_scipy(shape, density, f):
def assert_csr_almost_equal(nd, sp):
assert_almost_equal(nd.data.asnumpy(), sp.data)
assert_almost_equal(nd.indptr.asnumpy(), sp.indptr)
assert_almost_equal(nd.indices.asnumpy(), sp.indices)
sp_csr = nd.asscipy()
assert_almost_equal(sp_csr.data, sp.data)
assert_almost_equal(sp_csr.indptr, sp.indptr)
assert_almost_equal(sp_csr.indices, sp.indices)
assert(sp.dtype == sp_csr.dtype), (sp.dtype, sp_csr.dtype)
# random canonical csr
csr_sp = spsp.rand(shape[0], shape[1], density, format="csr")
csr_nd = f(csr_sp)
assert_csr_almost_equal(csr_nd, csr_sp)
# non-canonical csr which contains duplicates and unsorted indices
indptr = np.array([0, 2, 3, 7])
indices = np.array([0, 2, 2, 0, 1, 2, 1])
data = np.array([1, 2, 3, 4, 5, 6, 1])
non_canonical_csr = spsp.csr_matrix((data, indices, indptr), shape=(3, 3), dtype=csr_nd.dtype)
canonical_csr_nd = f(non_canonical_csr, dtype=csr_nd.dtype)
canonical_csr_sp = non_canonical_csr.copy()
canonical_csr_sp.sum_duplicates()
canonical_csr_sp.sort_indices()
assert_csr_almost_equal(canonical_csr_nd, canonical_csr_sp)
dim0 = 20
dim1 = 20
densities = [0, 0.5]
dtype = np.float64
for density in densities:
shape = rand_shape_2d(dim0, dim1)
check_create_csr_from_nd(shape, density, dtype)
check_create_csr_from_coo(shape, density, dtype)
check_create_csr_from_scipy(shape, density, mx.nd.sparse.array)
check_create_csr_from_scipy(shape, density, mx.nd.array)
def test_create_row_sparse():
dim0 = 50
dim1 = 50
densities = [0, 0.5, 1]
for density in densities:
shape = rand_shape_2d(dim0, dim1)
matrix = rand_ndarray(shape, 'row_sparse', density)
data = matrix.data
indices = matrix.indices
rsp_created = mx.nd.sparse.row_sparse_array((data, indices), shape=shape)
assert rsp_created.stype == 'row_sparse'
assert same(rsp_created.data.asnumpy(), data.asnumpy())
assert same(rsp_created.indices.asnumpy(), indices.asnumpy())
rsp_copy = mx.nd.array(rsp_created)
assert(same(rsp_copy.asnumpy(), rsp_created.asnumpy()))
# add this test since we added np.int32 and np.int64 to integer_types
if len(shape) == 2:
for np_int_type in (np.int32, np.int64):
shape = list(shape)
shape = [np_int_type(x) for x in shape]
arg1 = tuple(shape)
mx.nd.sparse.row_sparse_array(arg1, tuple(shape))
shape[0] += 1
assert_exception(mx.nd.sparse.row_sparse_array, ValueError, arg1, tuple(shape))
def test_create_sparse_nd_infer_shape():
def check_create_csr_infer_shape(shape, density, dtype):
try:
matrix = rand_ndarray(shape, 'csr', density=density)
data = matrix.data
indptr = matrix.indptr
indices = matrix.indices
nd = mx.nd.sparse.csr_matrix((data, indices, indptr), dtype=dtype)
num_rows, num_cols = nd.shape
assert(num_rows == len(indptr) - 1)
assert(indices.shape[0] > 0), indices
assert(np.sum((num_cols <= indices).asnumpy()) == 0)
assert(nd.dtype == dtype), (nd.dtype, dtype)
# cannot infer on invalid shape
except ValueError:
pass
def check_create_rsp_infer_shape(shape, density, dtype):
try:
array = rand_ndarray(shape, 'row_sparse', density=density)
data = array.data
indices = array.indices
nd = mx.nd.sparse.row_sparse_array((data, indices), dtype=dtype)
inferred_shape = nd.shape
assert(inferred_shape[1:] == data.shape[1:])
assert(indices.ndim > 0)
assert(nd.dtype == dtype)
if indices.shape[0] > 0:
assert(np.sum((inferred_shape[0] <= indices).asnumpy()) == 0)
# cannot infer on invalid shape
except ValueError:
pass
dtype = np.int32
shape = rand_shape_2d()
shape_3d = rand_shape_3d()
densities = [0, 0.5, 1]
for density in densities:
check_create_csr_infer_shape(shape, density, dtype)
check_create_rsp_infer_shape(shape, density, dtype)
check_create_rsp_infer_shape(shape_3d, density, dtype)
def test_create_sparse_nd_from_dense():
def check_create_from_dns(shape, f, dense_arr, dtype, default_dtype, ctx):
arr = f(dense_arr, dtype=dtype, ctx=ctx)
assert(same(arr.asnumpy(), np.ones(shape)))
assert(arr.dtype == dtype)
assert(arr.context == ctx)
# verify the default dtype inferred from dense arr
arr2 = f(dense_arr)
assert(arr2.dtype == default_dtype)
assert(arr2.context == mx.context.current_context())
shape = rand_shape_2d()
dtype = np.int32
src_dtype = np.float64
ctx = mx.cpu(1)
dense_arrs = [mx.nd.ones(shape, dtype=src_dtype), np.ones(shape, dtype=src_dtype), \
np.ones(shape, dtype=src_dtype).tolist()]
for f in [mx.nd.sparse.csr_matrix, mx.nd.sparse.row_sparse_array]:
for dense_arr in dense_arrs:
default_dtype = dense_arr.dtype if isinstance(dense_arr, (NDArray, np.ndarray)) \
else np.float32
check_create_from_dns(shape, f, dense_arr, dtype, default_dtype, ctx)
def test_create_sparse_nd_from_sparse():
def check_create_from_sp(shape, f, sp_arr, dtype, src_dtype, ctx):
arr = f(sp_arr, dtype=dtype, ctx=ctx)
assert(same(arr.asnumpy(), np.ones(shape)))
assert(arr.dtype == dtype)
assert(arr.context == ctx)
# verify the default dtype inferred from dense arr
arr2 = f(sp_arr)
assert(arr2.dtype == src_dtype)
assert(arr2.context == mx.context.current_context())
shape = rand_shape_2d()
src_dtype = np.float64
dtype = np.int32
ctx = mx.cpu(1)
ones = mx.nd.ones(shape, dtype=src_dtype)
csr_arrs = [ones.tostype('csr')]
rsp_arrs = [ones.tostype('row_sparse')]
csr_sp = spsp.csr_matrix(np.ones(shape, dtype=src_dtype))
csr_arrs.append(csr_sp)
f_csr = mx.nd.sparse.csr_matrix
f_rsp = mx.nd.sparse.row_sparse_array
for sp_arr in csr_arrs:
check_create_from_sp(shape, f_csr, sp_arr, dtype, src_dtype, ctx)
for sp_arr in rsp_arrs:
check_create_from_sp(shape, f_rsp, sp_arr, dtype, src_dtype, ctx)
def test_create_sparse_nd_empty():
def check_empty(shape, stype):
arr = mx.nd.empty(shape, stype=stype)
assert(arr.stype == stype)
assert same(arr.asnumpy(), np.zeros(shape))
def check_csr_empty(shape, dtype, ctx):
arr = mx.nd.sparse.csr_matrix(shape, dtype=dtype, ctx=ctx)
assert(arr.stype == 'csr')
assert(arr.dtype == dtype)
assert(arr.context == ctx)
assert same(arr.asnumpy(), np.zeros(shape))
# check the default value for dtype and ctx
arr = mx.nd.sparse.csr_matrix(shape)
assert(arr.dtype == np.float32)
assert(arr.context == mx.context.current_context())
def check_rsp_empty(shape, dtype, ctx):
arr = mx.nd.sparse.row_sparse_array(shape, dtype=dtype, ctx=ctx)
assert(arr.stype == 'row_sparse')
assert(arr.dtype == dtype)
assert(arr.context == ctx)
assert same(arr.asnumpy(), np.zeros(shape))
# check the default value for dtype and ctx
arr = mx.nd.sparse.row_sparse_array(shape)
assert(arr.dtype == np.float32)
assert(arr.context == mx.context.current_context())
stypes = ['csr', 'row_sparse']
shape = rand_shape_2d()
shape_3d = rand_shape_3d()
dtype = np.int32
ctx = mx.cpu(1)
for stype in stypes:
check_empty(shape, stype)
check_csr_empty(shape, dtype, ctx)
check_rsp_empty(shape, dtype, ctx)
check_rsp_empty(shape_3d, dtype, ctx)
def test_synthetic_dataset_generator():
def test_powerlaw_generator(csr_arr, final_row=1):
"""Test power law distribution
Total Elements: 32000, Number of zeros: 3200
Every row has 2 * non zero elements of the previous row.
Also since (2047 < 3200 < 4095) this will be true till 10th row"""
indices = csr_arr.indices.asnumpy()
indptr = csr_arr.indptr.asnumpy()
for row in range(1, final_row + 1):
nextrow = row + 1
current_row_nnz = indices[indptr[row] - 1] + 1
next_row_nnz = indices[indptr[nextrow] - 1] + 1
assert next_row_nnz == 2 * current_row_nnz
# Test if density is preserved
csr_arr_cols, _ = rand_sparse_ndarray(shape=(32, 10000), stype="csr",
density=0.01, distribution="powerlaw")
csr_arr_small, _ = rand_sparse_ndarray(shape=(5, 5), stype="csr",
density=0.5, distribution="powerlaw")
csr_arr_big, _ = rand_sparse_ndarray(shape=(32, 1000000), stype="csr",
density=0.4, distribution="powerlaw")
csr_arr_square, _ = rand_sparse_ndarray(shape=(1600, 1600), stype="csr",
density=0.5, distribution="powerlaw")
assert len(csr_arr_cols.data) == 3200
test_powerlaw_generator(csr_arr_cols, final_row=9)
test_powerlaw_generator(csr_arr_small, final_row=1)
test_powerlaw_generator(csr_arr_big, final_row=4)
test_powerlaw_generator(csr_arr_square, final_row=6)
def test_sparse_nd_fluent():
def check_fluent_regular(stype, func, kwargs, shape=(5, 17), equal_nan=False):
with mx.name.NameManager():
data = mx.nd.random_uniform(shape=shape, ctx=default_device()).tostype(stype)
regular = getattr(mx.ndarray, func)(data, **kwargs)
fluent = getattr(data, func)(**kwargs)
if isinstance(regular, list):
for r, f in zip(regular, fluent):
assert almost_equal(r.asnumpy(), f.asnumpy(), equal_nan=equal_nan)
else:
assert almost_equal(regular.asnumpy(), fluent.asnumpy(), equal_nan=equal_nan)
all_funcs = ['zeros_like', 'square', 'round', 'rint', 'fix', 'floor', 'ceil', 'trunc',
'abs', 'sign', 'sin', 'degrees', 'radians', 'expm1']
for func in all_funcs:
check_fluent_regular('csr', func, {})
check_fluent_regular('row_sparse', func, {})
all_funcs = ['arcsin', 'arctan', 'tan', 'sinh', 'tanh',
'arcsinh', 'arctanh', 'log1p', 'sqrt', 'relu']
for func in all_funcs:
check_fluent_regular('csr', func, {}, equal_nan=True)
check_fluent_regular('row_sparse', func, {}, equal_nan=True)
check_fluent_regular('csr', 'slice', {'begin': (2, 5), 'end': (4, 7)}, shape=(5, 17))
check_fluent_regular('row_sparse', 'clip', {'a_min': -0.25, 'a_max': 0.75})
check_fluent_regular('csr', 'clip', {'a_min': -0.25, 'a_max': 0.75})
for func in ['sum', 'mean', 'norm']:
check_fluent_regular('csr', func, {'axis': 0})
def test_sparse_nd_exception():
""" test invalid sparse operator will throw a exception """
a = mx.nd.ones((2,2))
assertRaises(mx.base.MXNetError, mx.nd.sparse.retain, a, invalid_arg="garbage_value")
assertRaises(ValueError, mx.nd.sparse.csr_matrix, a, shape=(3,2))
assertRaises(ValueError, mx.nd.sparse.csr_matrix, (2,2), shape=(3,2))
assertRaises(ValueError, mx.nd.sparse.row_sparse_array, (2,2), shape=(3,2))
assertRaises(ValueError, mx.nd.sparse.zeros, "invalid_stype", (2,2))
def test_sparse_nd_check_format():
""" test check_format for sparse ndarray """
shape = rand_shape_2d()
stypes = ["csr", "row_sparse"]
for stype in stypes:
arr, _ = rand_sparse_ndarray(shape, stype)
arr.check_format()
arr = mx.nd.sparse.zeros(stype, shape)
arr.check_format()
# CSR format index pointer array should be less than the number of rows
shape = (3, 4)
data_list = [7, 8, 9]
indices_list = [0, 2, 1]
indptr_list = [0, 5, 2, 3]
a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape)
assertRaises(mx.base.MXNetError, a.check_format)
# CSR format indices should be in ascending order per row
indices_list = [2, 1, 1]
indptr_list = [0, 2, 2, 3]
a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape)
assertRaises(mx.base.MXNetError, a.check_format)
# CSR format indptr should end with value equal with size of indices
indices_list = [1, 2, 1]
indptr_list = [0, 2, 2, 4]
a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape)
assertRaises(mx.base.MXNetError, a.check_format)
# CSR format indices should not be negative
indices_list = [0, 2, 1]
indptr_list = [0, -2, 2, 3]
a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape)
assertRaises(mx.base.MXNetError, a.check_format)
# CSR format should be 2 Dimensional.
a = mx.nd.array([1, 2, 3])
assertRaises(ValueError, a.tostype, 'csr')
a = mx.nd.array([[[1, 2, 3]]])
assertRaises(ValueError, a.tostype, 'csr')
# Row Sparse format indices should be less than the number of rows
shape = (3, 2)
data_list = [[1, 2], [3, 4]]
indices_list = [1, 4]
a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape)
assertRaises(mx.base.MXNetError, a.check_format)
# Row Sparse format indices should be in ascending order
indices_list = [1, 0]
a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape)
assertRaises(mx.base.MXNetError, a.check_format)
# Row Sparse format indices should not be negative
indices_list = [1, -2]
a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape)
assertRaises(mx.base.MXNetError, a.check_format)
def test_sparse_nd_norm():
def check_sparse_nd_norm(stype, shape, density, **kwargs):
data, _ = rand_sparse_ndarray(shape, stype, density)
norm = data.norm(**kwargs)
expected_norm = data.tostype('default').norm(**kwargs)
assert_almost_equal(norm.asnumpy(), expected_norm.asnumpy())
shape = (5, 5)
stypes = ['row_sparse', 'csr']
densities = [0, 0.5, 1]
for stype in stypes:
for density in densities:
check_sparse_nd_norm(stype, shape, density, axis=None, keepdims=False, ord=2)
# test fallback
check_sparse_nd_norm(stype, shape, density, axis=0, keepdims=False, ord=2)
check_sparse_nd_norm(stype, shape, density, axis=None, keepdims=True, ord=2)
def test_sparse_fc():
def check_sparse_fc(batch_size, dim_in, dim_out, stype):
data = rand_ndarray((batch_size, dim_in), stype, density=0.5)
weight = rand_ndarray((dim_out, dim_in), 'row_sparse', density=1)
bias = rand_ndarray((dim_out, 1), 'row_sparse', density=1)
out = mx.nd.sparse.FullyConnected(data, weight, num_hidden=dim_out, bias=bias)
data_dns = data.tostype('default')
weight_dns = weight.tostype('default')
out_dns = mx.nd.FullyConnected(data_dns, weight_dns, num_hidden=dim_out, bias=bias)
assert_almost_equal(out.asnumpy(), out_dns.asnumpy())
# test FC with row_sparse weight w/ density=1, dense data
check_sparse_fc(5, 10, 8, 'default')
# test FC with row_sparse weight w/ density=1, csr data (fallback)
check_sparse_fc(5, 10, 8, 'csr')
def test_sparse_take():
def check_sparse_take(density, mode):
data_shape = rand_shape_2d()
idx_shape = (np.random.randint(low=1, high=10),)
data = rand_ndarray(data_shape, 'csr', density=density).astype('int32')
idx = mx.nd.array(np.random.randint(low=-5, high=15, size=idx_shape))
data_np = data.asnumpy()
idx_np = idx.asnumpy().astype('int32')
expected_result = np.take(data_np, idx_np, mode=mode, axis=0)
result = mx.nd.take(data, idx, mode=mode)
assert_almost_equal(result.asnumpy(), expected_result)
assert result.indptr[0].asscalar() == 0
densities = [0, 0.5, 1]
modes = ['clip', 'wrap']
for d in densities:
for m in modes:
check_sparse_take(d, m)
def test_sparse_getnnz():
if default_device().device_type is 'gpu':
return
def check_sparse_getnnz(density, axis):
shape = rand_shape_2d()
data = rand_ndarray(shape, 'csr', density=density)
data_sp = data.asscipy()
result = mx.nd.contrib.getnnz(data, axis=axis)
expected_result = data_sp.getnnz(axis=axis)
assert_almost_equal(result.asnumpy(), expected_result)
densities = [0, 0.5, 1]
axis = [1, None]
for d in densities:
for a in axis:
check_sparse_getnnz(d, a)