| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import pickle as pkl |
| |
| from mxnet.ndarray import NDArray |
| import mxnet as mx |
| from mxnet.test_utils import * |
| from common import random_seed |
| from mxnet.base import mx_real_t |
| from numpy.testing import assert_allclose |
| import numpy.random as rnd |
| import numpy as np |
| import scipy.sparse as spsp |
| |
| from common import assertRaises, xfail_when_nonstandard_decimal_separator |
| from mxnet.ndarray.sparse import RowSparseNDArray, CSRNDArray |
| |
| import pytest |
| |
| mx.npx.reset_np() |
| |
| def sparse_nd_ones(shape, stype): |
| return mx.nd.ones(shape).tostype(stype) |
| |
| |
| def test_sparse_nd_elemwise_add(): |
| def check_sparse_nd_elemwise_binary(shapes, stypes, f, g): |
| # generate inputs |
| nds = [] |
| for i, stype in enumerate(stypes): |
| if stype == 'row_sparse': |
| nd, _ = rand_sparse_ndarray(shapes[i], stype) |
| elif stype == 'default': |
| nd = mx.nd.array(random_arrays(shapes[i]), dtype = np.float32) |
| else: |
| assert(False) |
| nds.append(nd) |
| # check result |
| test = f(nds[0], nds[1]) |
| assert_almost_equal(test.asnumpy(), g(nds[0].asnumpy(), nds[1].asnumpy())) |
| |
| num_repeats = 3 |
| g = lambda x,y: x + y |
| op = mx.nd.elemwise_add |
| for _ in range(num_repeats): |
| shape = [rand_shape_2d()] * 2 |
| check_sparse_nd_elemwise_binary(shape, ['default'] * 2, op, g) |
| check_sparse_nd_elemwise_binary(shape, ['row_sparse', 'row_sparse'], op, g) |
| |
| |
| def test_sparse_nd_copy(): |
| def check_sparse_nd_copy(from_stype, to_stype, shape): |
| from_nd = rand_ndarray(shape, from_stype) |
| # copy to ctx |
| to_ctx = from_nd.copyto(default_device()) |
| # copy to stype |
| to_nd = rand_ndarray(shape, to_stype) |
| to_nd = from_nd.copyto(to_nd) |
| assert np.sum(np.abs(from_nd.asnumpy() != to_ctx.asnumpy())) == 0.0 |
| assert np.sum(np.abs(from_nd.asnumpy() != to_nd.asnumpy())) == 0.0 |
| |
| shape = rand_shape_2d() |
| shape_3d = rand_shape_3d() |
| stypes = ['row_sparse', 'csr'] |
| for stype in stypes: |
| check_sparse_nd_copy(stype, 'default', shape) |
| check_sparse_nd_copy('default', stype, shape) |
| check_sparse_nd_copy('row_sparse', 'row_sparse', shape_3d) |
| check_sparse_nd_copy('row_sparse', 'default', shape_3d) |
| check_sparse_nd_copy('default', 'row_sparse', shape_3d) |
| |
| def test_sparse_nd_basic(): |
| def check_sparse_nd_basic_rsp(): |
| storage_type = 'row_sparse' |
| shape = rand_shape_2d() |
| nd, (v, idx) = rand_sparse_ndarray(shape, storage_type) |
| assert(nd._num_aux == 1) |
| assert(nd.indices.dtype == np.int64) |
| assert(nd.stype == 'row_sparse') |
| |
| check_sparse_nd_basic_rsp() |
| |
| |
| def test_sparse_nd_setitem(): |
| def check_sparse_nd_setitem(stype, shape, dst): |
| x = mx.nd.zeros(shape=shape, stype=stype) |
| x[:] = dst |
| dst_nd = mx.nd.array(dst) if isinstance(dst, (np.ndarray, np.generic)) else dst |
| assert np.all(x.asnumpy() == dst_nd.asnumpy() if isinstance(dst_nd, NDArray) else dst) |
| |
| shape = rand_shape_2d() |
| for stype in ['row_sparse', 'csr']: |
| # ndarray assignment |
| check_sparse_nd_setitem(stype, shape, rand_ndarray(shape, 'default')) |
| check_sparse_nd_setitem(stype, shape, rand_ndarray(shape, stype)) |
| # numpy assignment |
| check_sparse_nd_setitem(stype, shape, np.ones(shape)) |
| # scalar assigned to row_sparse NDArray |
| check_sparse_nd_setitem('row_sparse', shape, 2) |
| |
| def test_sparse_nd_slice(): |
| shape = (rnd.randint(2, 10), rnd.randint(2, 10)) |
| stype = 'csr' |
| A, _ = rand_sparse_ndarray(shape, stype) |
| A2 = A.asnumpy() |
| start = rnd.randint(0, shape[0] - 1) |
| end = rnd.randint(start + 1, shape[0]) |
| assert same(A[start:end].asnumpy(), A2[start:end]) |
| assert same(A[start - shape[0]:end].asnumpy(), A2[start:end]) |
| assert same(A[start:].asnumpy(), A2[start:]) |
| assert same(A[:end].asnumpy(), A2[:end]) |
| ind = rnd.randint(-shape[0], shape[0] - 1) |
| assert same(A[ind].asnumpy(), A2[ind][np.newaxis, :]) |
| |
| start_col = rnd.randint(0, shape[1] - 1) |
| end_col = rnd.randint(start_col + 1, shape[1]) |
| result = mx.nd.slice(A, begin=(start, start_col), end=(end, end_col)) |
| result_dense = mx.nd.slice(mx.nd.array(A2), begin=(start, start_col), end=(end, end_col)) |
| assert same(result_dense.asnumpy(), result.asnumpy()) |
| |
| A = mx.nd.sparse.zeros('csr', shape) |
| A2 = A.asnumpy() |
| assert same(A[start:end].asnumpy(), A2[start:end]) |
| result = mx.nd.slice(A, begin=(start, start_col), end=(end, end_col)) |
| result_dense = mx.nd.slice(mx.nd.array(A2), begin=(start, start_col), end=(end, end_col)) |
| assert same(result_dense.asnumpy(), result.asnumpy()) |
| |
| def check_slice_nd_csr_fallback(shape): |
| stype = 'csr' |
| A, _ = rand_sparse_ndarray(shape, stype) |
| A2 = A.asnumpy() |
| start = rnd.randint(0, shape[0] - 1) |
| end = rnd.randint(start + 1, shape[0]) |
| |
| # non-trivial step should fallback to dense slice op |
| result = mx.nd.sparse.slice(A, begin=(start,), end=(end + 1,), step=(2,)) |
| result_dense = mx.nd.slice(mx.nd.array(A2), begin=(start,), end=(end + 1,), step=(2,)) |
| assert same(result_dense.asnumpy(), result.asnumpy()) |
| |
| shape = (rnd.randint(2, 10), rnd.randint(1, 10)) |
| check_slice_nd_csr_fallback(shape) |
| |
| |
| def test_sparse_nd_concat(): |
| def check_concat(arrays): |
| ret = np.concatenate([arr.asnumpy() for arr in arrays], axis=0) |
| same(mx.nd.concat(*arrays, dim=0).asnumpy(), ret) |
| nds = [] |
| zero_nds = [] |
| ncols = rnd.randint(2, 10) |
| for _ in range(3): |
| shape = (rnd.randint(2, 10), ncols) |
| A, _ = rand_sparse_ndarray(shape, 'csr') |
| nds.append(A) |
| zero_nds.append(mx.nd.zeros(shape).tostype('csr')) |
| check_concat(nds) |
| check_concat(zero_nds) |
| |
| |
| def test_sparse_nd_equal(): |
| for stype in ['row_sparse', 'csr']: |
| shape = rand_shape_2d() |
| x = mx.nd.zeros(shape=shape, stype=stype) |
| y = sparse_nd_ones(shape, stype) |
| z = x == y |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| z = 0 == y |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| assert z.stype == 'default' |
| z = 1 == y |
| assert (z.asnumpy() == np.ones(shape)).all() |
| assert z.stype == stype |
| |
| |
| def test_sparse_nd_not_equal(): |
| for stype in ['row_sparse', 'csr']: |
| shape = rand_shape_2d() |
| x = mx.nd.zeros(shape=shape, stype=stype) |
| y = sparse_nd_ones(shape, stype) |
| z = x != y |
| assert (z.asnumpy() == np.ones(shape)).all() |
| z = 0 != y |
| assert (z.asnumpy() == np.ones(shape)).all() |
| assert z.stype == stype |
| z = 1 != y |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| assert z.stype == 'default' |
| |
| |
| def test_sparse_nd_greater(): |
| for stype in ['row_sparse', 'csr']: |
| shape = rand_shape_2d() |
| x = mx.nd.zeros(shape=shape, stype=stype) |
| y = sparse_nd_ones(shape, stype) |
| z = x > y |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| z = y > 0 |
| assert (z.asnumpy() == np.ones(shape)).all() |
| assert z.stype == stype |
| z = 0 > y |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| assert z.stype == stype |
| z = y > 1 |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| assert z.stype == stype |
| |
| |
| def test_sparse_nd_greater_equal(): |
| for stype in ['row_sparse', 'csr']: |
| shape = rand_shape_2d() |
| x = mx.nd.zeros(shape=shape, stype=stype) |
| y = sparse_nd_ones(shape, stype) |
| z = x >= y |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| z = y >= 0 |
| assert (z.asnumpy() == np.ones(shape)).all() |
| assert z.stype == 'default' |
| z = 0 >= y |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| assert z.stype == 'default' |
| z = y >= 1 |
| assert (z.asnumpy() == np.ones(shape)).all() |
| assert z.stype == stype |
| |
| |
| def test_sparse_nd_lesser(): |
| for stype in ['row_sparse', 'csr']: |
| shape = rand_shape_2d() |
| x = mx.nd.zeros(shape=shape, stype=stype) |
| y = sparse_nd_ones(shape, stype) |
| z = y < x |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| z = 0 < y |
| assert (z.asnumpy() == np.ones(shape)).all() |
| assert z.stype == stype |
| z = y < 0 |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| assert z.stype == stype |
| z = y < 1 |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| assert z.stype == 'default' |
| |
| |
| def test_sparse_nd_lesser_equal(): |
| for stype in ['row_sparse', 'csr']: |
| shape = rand_shape_2d() |
| x = mx.nd.zeros(shape=shape, stype=stype) |
| y = sparse_nd_ones(shape, stype) |
| z = y <= x |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| z = 0 <= y |
| assert (z.asnumpy() == np.ones(shape)).all() |
| assert z.stype == 'default' |
| z = y <= 0 |
| assert (z.asnumpy() == np.zeros(shape)).all() |
| assert z.stype == 'default' |
| z = 1 <= y |
| assert (z.asnumpy() == np.ones(shape)).all() |
| assert z.stype == stype |
| |
| |
| def test_sparse_nd_binary(): |
| N = 3 |
| def check_binary(fn, stype): |
| for _ in range(N): |
| ndim = 2 |
| oshape = np.random.randint(1, 6, size=(ndim,)) |
| bdim = 2 |
| lshape = list(oshape) |
| # one for broadcast op, another for elemwise op |
| rshape = list(oshape[ndim-bdim:]) |
| for i in range(bdim): |
| sep = np.random.uniform(0, 1) |
| if sep < 0.33: |
| lshape[ndim-i-1] = 1 |
| elif sep < 0.66: |
| rshape[bdim-i-1] = 1 |
| lhs = np.random.uniform(0, 1, size=lshape) |
| rhs = np.random.uniform(0, 1, size=rshape) |
| lhs_nd = mx.nd.array(lhs).tostype(stype) |
| rhs_nd = mx.nd.array(rhs).tostype(stype) |
| assert_allclose(fn(lhs, rhs), fn(lhs_nd, rhs_nd).asnumpy(), rtol=1e-4, atol=1e-4) |
| assert_allclose(fn(lhs, lhs), fn(lhs_nd, lhs_nd).asnumpy(), rtol=1e-4, atol=1e-4) |
| |
| stypes = ['row_sparse', 'csr'] |
| for stype in stypes: |
| check_binary(lambda x, y: x + y, stype) |
| check_binary(lambda x, y: x - y, stype) |
| check_binary(lambda x, y: x * y, stype) |
| check_binary(lambda x, y: x / y, stype) |
| check_binary(lambda x, y: x ** y, stype) |
| check_binary(lambda x, y: x > y, stype) |
| check_binary(lambda x, y: x < y, stype) |
| check_binary(lambda x, y: x >= y, stype) |
| check_binary(lambda x, y: x <= y, stype) |
| check_binary(lambda x, y: x == y, stype) |
| |
| |
| @xfail_when_nonstandard_decimal_separator |
| def test_sparse_nd_binary_scalar_op(): |
| N = 3 |
| def check(fn, stype, out_stype=None): |
| for _ in range(N): |
| ndim = 2 |
| shape = np.random.randint(1, 6, size=(ndim,)) |
| npy = np.random.normal(0, 1, size=shape) |
| nd = mx.nd.array(npy).tostype(stype) |
| if out_stype is not None: |
| assert(nd.stype == out_stype) |
| assert_allclose(fn(npy), fn(nd).asnumpy(), rtol=1e-4, atol=1e-4) |
| |
| stypes = ['row_sparse', 'csr'] |
| for stype in stypes: |
| check(lambda x: 1 + x, stype) |
| check(lambda x: 1 - x, stype) |
| check(lambda x: 1 * x, stype) |
| check(lambda x: 1 / x, stype) |
| check(lambda x: 2 ** x, stype) |
| check(lambda x: 1 > x, stype) |
| check(lambda x: 0.5 > x, stype) |
| check(lambda x: 0.5 < x, stype) |
| check(lambda x: 0.5 >= x, stype) |
| check(lambda x: 0.5 <= x, stype) |
| check(lambda x: 0.5 == x, stype) |
| check(lambda x: x / 2, stype, out_stype=stype) |
| check(lambda x: x + 0, stype, out_stype=stype) |
| check(lambda x: x - 0, stype, out_stype=stype) |
| |
| |
| def test_sparse_nd_binary_iop(): |
| N = 3 |
| def check_binary(fn, stype): |
| for _ in range(N): |
| ndim = 2 |
| oshape = np.random.randint(1, 6, size=(ndim,)) |
| lshape = list(oshape) |
| rshape = list(oshape) |
| lhs = np.random.uniform(0, 1, size=lshape) |
| rhs = np.random.uniform(0, 1, size=rshape) |
| lhs_nd = mx.nd.array(lhs).tostype(stype) |
| rhs_nd = mx.nd.array(rhs).tostype(stype) |
| assert_allclose(fn(lhs, rhs), |
| fn(lhs_nd, rhs_nd).asnumpy(), |
| rtol=1e-4, atol=1e-4) |
| |
| def inplace_add(x, y): |
| x += y |
| return x |
| def inplace_mul(x, y): |
| x *= y |
| return x |
| stypes = ['csr', 'row_sparse'] |
| fns = [inplace_add, inplace_mul] |
| for stype in stypes: |
| for fn in fns: |
| check_binary(fn, stype) |
| |
| |
| def test_sparse_nd_negate(): |
| def check_sparse_nd_negate(shape, stype): |
| npy = np.random.uniform(-10, 10, rand_shape_2d()) |
| arr = mx.nd.array(npy).tostype(stype) |
| assert_almost_equal(npy, arr.asnumpy()) |
| assert_almost_equal(-npy, (-arr).asnumpy()) |
| |
| # a final check to make sure the negation (-) is not implemented |
| # as inplace operation, so the contents of arr does not change after |
| # we compute (-arr) |
| assert_almost_equal(npy, arr.asnumpy()) |
| |
| shape = rand_shape_2d() |
| stypes = ['csr', 'row_sparse'] |
| for stype in stypes: |
| check_sparse_nd_negate(shape, stype) |
| |
| |
| def test_sparse_nd_broadcast(): |
| sample_num = 1000 |
| # TODO(haibin) test with more than 2 dimensions |
| def test_broadcast_to(stype): |
| for _ in range(sample_num): |
| ndim = 2 |
| target_shape = np.random.randint(1, 11, size=ndim) |
| shape = target_shape.copy() |
| axis_flags = np.random.randint(0, 2, size=ndim) |
| for (axis, flag) in enumerate(axis_flags): |
| if flag: |
| shape[axis] = 1 |
| dat = np.random.rand(*shape) - 0.5 |
| numpy_ret = dat |
| ndarray = mx.nd.array(dat).tostype(stype) |
| ndarray_ret = ndarray.broadcast_to(shape=target_shape) |
| if type(ndarray_ret) is mx.ndarray.NDArray: |
| ndarray_ret = ndarray_ret.asnumpy() |
| assert (ndarray_ret.shape == target_shape).all() |
| err = np.square(ndarray_ret - numpy_ret).mean() |
| assert err < 1E-8 |
| |
| def test_broadcast_like(stype): |
| for _ in range(sample_num): |
| ndim = 2 |
| target_shape = np.random.randint(1, 11, size=ndim) |
| target = mx.nd.ones(shape=tuple(target_shape)) |
| shape = target_shape.copy() |
| axis_flags = np.random.randint(0, 2, size=ndim) |
| for (axis, flag) in enumerate(axis_flags): |
| if flag: |
| shape[axis] = 1 |
| dat = np.random.rand(*shape) - 0.5 |
| numpy_ret = dat |
| ndarray = mx.nd.array(dat).tostype(stype) |
| ndarray_ret = ndarray.broadcast_like(target) |
| if type(ndarray_ret) is mx.ndarray.NDArray: |
| ndarray_ret = ndarray_ret.asnumpy() |
| assert (ndarray_ret.shape == target_shape).all() |
| err = np.square(ndarray_ret - numpy_ret).mean() |
| assert err < 1E-8 |
| |
| stypes = ['csr', 'row_sparse'] |
| for stype in stypes: |
| test_broadcast_to(stype) |
| test_broadcast_like(stype) |
| |
| |
| def test_sparse_nd_transpose(): |
| npy = np.random.uniform(-10, 10, rand_shape_2d()) |
| stypes = ['csr', 'row_sparse'] |
| for stype in stypes: |
| nd = mx.nd.array(npy).tostype(stype) |
| assert_almost_equal(npy.T, (nd.T).asnumpy()) |
| |
| |
| def test_sparse_nd_storage_fallback(): |
| def check_output_fallback(shape): |
| ones = mx.nd.ones(shape) |
| out = mx.nd.zeros(shape=shape, stype='csr') |
| mx.nd.broadcast_add(ones, ones * 2, out=out) |
| assert(np.sum(out.asnumpy() - 3) == 0) |
| |
| def check_input_fallback(shape): |
| ones = mx.nd.ones(shape) |
| out = mx.nd.broadcast_add(ones.tostype('csr'), ones.tostype('row_sparse')) |
| assert(np.sum(out.asnumpy() - 2) == 0) |
| |
| def check_fallback_with_temp_resource(shape): |
| ones = mx.nd.ones(shape) |
| out = mx.nd.sum(ones) |
| assert(out.asscalar() == np.prod(shape)) |
| |
| shape = rand_shape_2d() |
| check_output_fallback(shape) |
| check_input_fallback(shape) |
| check_fallback_with_temp_resource(shape) |
| |
| |
| def test_sparse_nd_random(): |
| """ test sparse random operator on cpu """ |
| # gpu random operator doesn't use fixed seed |
| if default_device().device_type is 'gpu': |
| return |
| shape = (100, 100) |
| fns = [mx.nd.random.uniform, mx.nd.random.normal, mx.nd.random.gamma] |
| for fn in fns: |
| rsp_out = mx.nd.zeros(shape=shape, stype='row_sparse') |
| dns_out = mx.nd.zeros(shape=shape, stype='default') |
| with random_seed(0): |
| fn(shape=shape, out=dns_out) |
| with random_seed(0): |
| fn(shape=shape, out=rsp_out) |
| assert_almost_equal(dns_out.asnumpy(), rsp_out.asnumpy()) |
| |
| |
| def test_sparse_nd_astype(): |
| stypes = ['row_sparse', 'csr'] |
| for stype in stypes: |
| x = mx.nd.zeros(shape=rand_shape_2d(), stype=stype, dtype='float32') |
| y = x.astype('int32') |
| assert(y.dtype == np.int32), y.dtype |
| |
| |
| def test_sparse_nd_astype_copy(): |
| stypes = ['row_sparse', 'csr'] |
| for stype in stypes: |
| x = mx.nd.zeros(shape=rand_shape_2d(), stype=stype, dtype='int32') |
| y = x.astype('float32') |
| assert (y.dtype == np.float32) |
| # Test that a new ndarray has been allocated |
| assert (id(x) != id(y)) |
| |
| y = x.astype('float32', copy=False) |
| assert (y.dtype == np.float32) |
| # Test that a new ndarray has been allocated |
| assert (id(x) != id(y)) |
| |
| y = x.astype('int32') |
| assert (y.dtype == np.int32) |
| # Test that a new ndarray has been allocated |
| # even though they have same dtype |
| assert (id(x) != id(y)) |
| |
| # Test that a new ndarray has not been allocated |
| y = x.astype('int32', copy=False) |
| assert (id(x) == id(y)) |
| |
| # Test the string version 'int32' |
| # has the same behaviour as the np.int32 |
| y = x.astype(np.int32, copy=False) |
| assert (id(x) == id(y)) |
| |
| |
| def test_sparse_nd_pickle(): |
| dim0 = 40 |
| dim1 = 40 |
| stypes = ['row_sparse', 'csr'] |
| densities = [0, 0.5] |
| stype_dict = {'row_sparse': RowSparseNDArray, 'csr': CSRNDArray} |
| shape = rand_shape_2d(dim0, dim1) |
| for stype in stypes: |
| for density in densities: |
| a, _ = rand_sparse_ndarray(shape, stype, density) |
| assert isinstance(a, stype_dict[stype]) |
| data = pkl.dumps(a) |
| b = pkl.loads(data) |
| assert isinstance(b, stype_dict[stype]) |
| assert same(a.asnumpy(), b.asnumpy()) |
| |
| |
| @pytest.mark.parametrize('save_fn', [mx.nd.save, mx.npx.savez]) |
| def test_sparse_nd_save_load(save_fn): |
| stypes = ['default', 'row_sparse', 'csr'] |
| stype_dict = {'default': NDArray, 'row_sparse': RowSparseNDArray, 'csr': CSRNDArray} |
| num_data = 20 |
| densities = [0, 0.5] |
| fname = 'tmp_list.npz' |
| data_list1 = [] |
| for _ in range(num_data): |
| stype = stypes[np.random.randint(0, len(stypes))] |
| shape = rand_shape_2d(dim0=40, dim1=40) |
| density = densities[np.random.randint(0, len(densities))] |
| data_list1.append(rand_ndarray(shape, stype, density)) |
| assert isinstance(data_list1[-1], stype_dict[stype]) |
| if save_fn is mx.nd.save: |
| save_fn(fname, data_list1) |
| else: |
| save_fn(fname, *data_list1) |
| |
| data_list2 = mx.nd.load(fname) |
| if save_fn is mx.npx.savez: |
| data_list2 = [data_list2['arr_' + str(i)] for i in range(num_data)] |
| assert len(data_list1) == len(data_list2) |
| for x, y in zip(data_list1, data_list2): |
| assert same(x.asnumpy(), y.asnumpy()) |
| |
| data_map1 = {f'ndarray xx {i}': x for i, x in enumerate(data_list1)} |
| if save_fn is mx.nd.save: |
| save_fn(fname, data_map1) |
| else: |
| save_fn(fname, **data_map1) |
| data_map2 = mx.nd.load(fname) |
| assert len(data_map1) == len(data_map2) |
| for k, x in data_map1.items(): |
| y = data_map2[k] |
| assert same(x.asnumpy(), y.asnumpy()) |
| os.remove(fname) |
| |
| |
| @pytest.mark.parametrize('save_fn', [mx.nd.save, mx.npx.savez]) |
| def test_sparse_ndarray_load_csr_npz_scipy(tmp_path, save_fn): |
| csr_sp = spsp.rand(50, 100, density=0.5, format="csr") |
| spsp.save_npz(tmp_path / "csr.npz", csr_sp) |
| |
| csr_mx = mx.nd.load(str(tmp_path / "csr.npz"))[''] |
| assert np.sum(csr_mx.data.asnumpy() != csr_sp.data) == 0 |
| assert np.sum(csr_mx.indices.asnumpy() != csr_sp.indices) == 0 |
| assert np.sum(csr_mx.indptr.asnumpy() != csr_sp.indptr) == 0 |
| |
| csr_mx = save_fn(str(tmp_path / "csr_mx.npz"), csr_mx) |
| csr_mx_loaded = mx.nd.load(str(tmp_path / "csr_mx.npz")) |
| csr_mx_loaded = csr_mx_loaded[0] if save_fn is mx.nd.save else csr_mx_loaded['arr_0'] |
| assert np.sum(csr_mx_loaded.data.asnumpy() != csr_sp.data) == 0 |
| assert np.sum(csr_mx_loaded.indices.asnumpy() != csr_sp.indices) == 0 |
| assert np.sum(csr_mx_loaded.indptr.asnumpy() != csr_sp.indptr) == 0 |
| |
| |
| def test_sparse_nd_unsupported(): |
| nd = mx.nd.zeros((2,2), stype='row_sparse') |
| fn_slice = lambda x: x._slice(None, None) |
| fn_at = lambda x: x._at(None) |
| fn_reshape = lambda x: x.reshape(None) |
| fns = [fn_slice, fn_at, fn_reshape] |
| for fn in fns: |
| try: |
| fn(nd) |
| assert(False) |
| except: |
| pass |
| |
| |
| def test_create_csr(): |
| def check_create_csr_from_nd(shape, density, dtype): |
| matrix = rand_ndarray(shape, 'csr', density) |
| # create data array with provided dtype and ctx |
| data = mx.nd.array(matrix.data.asnumpy(), dtype=dtype) |
| indptr = matrix.indptr |
| indices = matrix.indices |
| csr_created = mx.nd.sparse.csr_matrix((data, indices, indptr), shape=shape) |
| assert csr_created.stype == 'csr' |
| assert same(csr_created.data.asnumpy(), data.asnumpy()) |
| assert same(csr_created.indptr.asnumpy(), indptr.asnumpy()) |
| assert same(csr_created.indices.asnumpy(), indices.asnumpy()) |
| # verify csr matrix dtype and ctx is consistent from the ones provided |
| assert csr_created.dtype == dtype, (csr_created, dtype) |
| assert csr_created.data.dtype == dtype, (csr_created.data.dtype, dtype) |
| assert csr_created.context == mx.context.current_context(), (csr_created.context, mx.context.current_context()) |
| csr_copy = mx.nd.array(csr_created) |
| assert(same(csr_copy.asnumpy(), csr_created.asnumpy())) |
| |
| def check_create_csr_from_coo(shape, density, dtype): |
| matrix = rand_ndarray(shape, 'csr', density) |
| sp_csr = matrix.asscipy() |
| sp_coo = sp_csr.tocoo() |
| csr_created = mx.nd.sparse.csr_matrix((sp_coo.data, (sp_coo.row, sp_coo.col)), shape=shape, dtype=dtype) |
| assert csr_created.stype == 'csr' |
| assert same(csr_created.data.asnumpy(), sp_csr.data) |
| assert same(csr_created.indptr.asnumpy(), sp_csr.indptr) |
| assert same(csr_created.indices.asnumpy(), sp_csr.indices) |
| csr_copy = mx.nd.array(csr_created) |
| assert(same(csr_copy.asnumpy(), csr_created.asnumpy())) |
| # verify csr matrix dtype and ctx is consistent |
| assert csr_created.dtype == dtype, (csr_created.dtype, dtype) |
| assert csr_created.data.dtype == dtype, (csr_created.data.dtype, dtype) |
| assert csr_created.context == mx.context.current_context(), (csr_created.context, mx.context.current_context()) |
| |
| def check_create_csr_from_scipy(shape, density, f): |
| def assert_csr_almost_equal(nd, sp): |
| assert_almost_equal(nd.data.asnumpy(), sp.data) |
| assert_almost_equal(nd.indptr.asnumpy(), sp.indptr) |
| assert_almost_equal(nd.indices.asnumpy(), sp.indices) |
| sp_csr = nd.asscipy() |
| assert_almost_equal(sp_csr.data, sp.data) |
| assert_almost_equal(sp_csr.indptr, sp.indptr) |
| assert_almost_equal(sp_csr.indices, sp.indices) |
| assert(sp.dtype == sp_csr.dtype), (sp.dtype, sp_csr.dtype) |
| |
| # random canonical csr |
| csr_sp = spsp.rand(shape[0], shape[1], density, format="csr") |
| csr_nd = f(csr_sp) |
| assert_csr_almost_equal(csr_nd, csr_sp) |
| # non-canonical csr which contains duplicates and unsorted indices |
| indptr = np.array([0, 2, 3, 7]) |
| indices = np.array([0, 2, 2, 0, 1, 2, 1]) |
| data = np.array([1, 2, 3, 4, 5, 6, 1]) |
| non_canonical_csr = spsp.csr_matrix((data, indices, indptr), shape=(3, 3), dtype=csr_nd.dtype) |
| canonical_csr_nd = f(non_canonical_csr, dtype=csr_nd.dtype) |
| canonical_csr_sp = non_canonical_csr.copy() |
| canonical_csr_sp.sum_duplicates() |
| canonical_csr_sp.sort_indices() |
| assert_csr_almost_equal(canonical_csr_nd, canonical_csr_sp) |
| |
| dim0 = 20 |
| dim1 = 20 |
| densities = [0, 0.5] |
| dtype = np.float64 |
| for density in densities: |
| shape = rand_shape_2d(dim0, dim1) |
| check_create_csr_from_nd(shape, density, dtype) |
| check_create_csr_from_coo(shape, density, dtype) |
| check_create_csr_from_scipy(shape, density, mx.nd.sparse.array) |
| check_create_csr_from_scipy(shape, density, mx.nd.array) |
| |
| |
| def test_create_row_sparse(): |
| dim0 = 50 |
| dim1 = 50 |
| densities = [0, 0.5, 1] |
| for density in densities: |
| shape = rand_shape_2d(dim0, dim1) |
| matrix = rand_ndarray(shape, 'row_sparse', density) |
| data = matrix.data |
| indices = matrix.indices |
| rsp_created = mx.nd.sparse.row_sparse_array((data, indices), shape=shape) |
| assert rsp_created.stype == 'row_sparse' |
| assert same(rsp_created.data.asnumpy(), data.asnumpy()) |
| assert same(rsp_created.indices.asnumpy(), indices.asnumpy()) |
| rsp_copy = mx.nd.array(rsp_created) |
| assert(same(rsp_copy.asnumpy(), rsp_created.asnumpy())) |
| |
| # add this test since we added np.int32 and np.int64 to integer_types |
| if len(shape) == 2: |
| for np_int_type in (np.int32, np.int64): |
| shape = list(shape) |
| shape = [np_int_type(x) for x in shape] |
| arg1 = tuple(shape) |
| mx.nd.sparse.row_sparse_array(arg1, tuple(shape)) |
| shape[0] += 1 |
| assert_exception(mx.nd.sparse.row_sparse_array, ValueError, arg1, tuple(shape)) |
| |
| |
| |
| def test_create_sparse_nd_infer_shape(): |
| def check_create_csr_infer_shape(shape, density, dtype): |
| try: |
| matrix = rand_ndarray(shape, 'csr', density=density) |
| data = matrix.data |
| indptr = matrix.indptr |
| indices = matrix.indices |
| nd = mx.nd.sparse.csr_matrix((data, indices, indptr), dtype=dtype) |
| num_rows, num_cols = nd.shape |
| assert(num_rows == len(indptr) - 1) |
| assert(indices.shape[0] > 0), indices |
| assert(np.sum((num_cols <= indices).asnumpy()) == 0) |
| assert(nd.dtype == dtype), (nd.dtype, dtype) |
| # cannot infer on invalid shape |
| except ValueError: |
| pass |
| |
| def check_create_rsp_infer_shape(shape, density, dtype): |
| try: |
| array = rand_ndarray(shape, 'row_sparse', density=density) |
| data = array.data |
| indices = array.indices |
| nd = mx.nd.sparse.row_sparse_array((data, indices), dtype=dtype) |
| inferred_shape = nd.shape |
| assert(inferred_shape[1:] == data.shape[1:]) |
| assert(indices.ndim > 0) |
| assert(nd.dtype == dtype) |
| if indices.shape[0] > 0: |
| assert(np.sum((inferred_shape[0] <= indices).asnumpy()) == 0) |
| # cannot infer on invalid shape |
| except ValueError: |
| pass |
| |
| dtype = np.int32 |
| shape = rand_shape_2d() |
| shape_3d = rand_shape_3d() |
| densities = [0, 0.5, 1] |
| for density in densities: |
| check_create_csr_infer_shape(shape, density, dtype) |
| check_create_rsp_infer_shape(shape, density, dtype) |
| check_create_rsp_infer_shape(shape_3d, density, dtype) |
| |
| |
| def test_create_sparse_nd_from_dense(): |
| def check_create_from_dns(shape, f, dense_arr, dtype, default_dtype, ctx): |
| arr = f(dense_arr, dtype=dtype, ctx=ctx) |
| assert(same(arr.asnumpy(), np.ones(shape))) |
| assert(arr.dtype == dtype) |
| assert(arr.context == ctx) |
| # verify the default dtype inferred from dense arr |
| arr2 = f(dense_arr) |
| assert(arr2.dtype == default_dtype) |
| assert(arr2.context == mx.context.current_context()) |
| shape = rand_shape_2d() |
| dtype = np.int32 |
| src_dtype = np.float64 |
| ctx = mx.cpu(1) |
| dense_arrs = [mx.nd.ones(shape, dtype=src_dtype), np.ones(shape, dtype=src_dtype), \ |
| np.ones(shape, dtype=src_dtype).tolist()] |
| for f in [mx.nd.sparse.csr_matrix, mx.nd.sparse.row_sparse_array]: |
| for dense_arr in dense_arrs: |
| default_dtype = dense_arr.dtype if isinstance(dense_arr, (NDArray, np.ndarray)) \ |
| else np.float32 |
| check_create_from_dns(shape, f, dense_arr, dtype, default_dtype, ctx) |
| |
| def test_create_sparse_nd_from_sparse(): |
| def check_create_from_sp(shape, f, sp_arr, dtype, src_dtype, ctx): |
| arr = f(sp_arr, dtype=dtype, ctx=ctx) |
| assert(same(arr.asnumpy(), np.ones(shape))) |
| assert(arr.dtype == dtype) |
| assert(arr.context == ctx) |
| # verify the default dtype inferred from dense arr |
| arr2 = f(sp_arr) |
| assert(arr2.dtype == src_dtype) |
| assert(arr2.context == mx.context.current_context()) |
| |
| shape = rand_shape_2d() |
| src_dtype = np.float64 |
| dtype = np.int32 |
| ctx = mx.cpu(1) |
| ones = mx.nd.ones(shape, dtype=src_dtype) |
| csr_arrs = [ones.tostype('csr')] |
| rsp_arrs = [ones.tostype('row_sparse')] |
| csr_sp = spsp.csr_matrix(np.ones(shape, dtype=src_dtype)) |
| csr_arrs.append(csr_sp) |
| f_csr = mx.nd.sparse.csr_matrix |
| f_rsp = mx.nd.sparse.row_sparse_array |
| for sp_arr in csr_arrs: |
| check_create_from_sp(shape, f_csr, sp_arr, dtype, src_dtype, ctx) |
| for sp_arr in rsp_arrs: |
| check_create_from_sp(shape, f_rsp, sp_arr, dtype, src_dtype, ctx) |
| |
| |
| def test_create_sparse_nd_empty(): |
| def check_empty(shape, stype): |
| arr = mx.nd.empty(shape, stype=stype) |
| assert(arr.stype == stype) |
| assert same(arr.asnumpy(), np.zeros(shape)) |
| |
| def check_csr_empty(shape, dtype, ctx): |
| arr = mx.nd.sparse.csr_matrix(shape, dtype=dtype, ctx=ctx) |
| assert(arr.stype == 'csr') |
| assert(arr.dtype == dtype) |
| assert(arr.context == ctx) |
| assert same(arr.asnumpy(), np.zeros(shape)) |
| # check the default value for dtype and ctx |
| arr = mx.nd.sparse.csr_matrix(shape) |
| assert(arr.dtype == np.float32) |
| assert(arr.context == mx.context.current_context()) |
| |
| def check_rsp_empty(shape, dtype, ctx): |
| arr = mx.nd.sparse.row_sparse_array(shape, dtype=dtype, ctx=ctx) |
| assert(arr.stype == 'row_sparse') |
| assert(arr.dtype == dtype) |
| assert(arr.context == ctx) |
| assert same(arr.asnumpy(), np.zeros(shape)) |
| # check the default value for dtype and ctx |
| arr = mx.nd.sparse.row_sparse_array(shape) |
| assert(arr.dtype == np.float32) |
| assert(arr.context == mx.context.current_context()) |
| |
| stypes = ['csr', 'row_sparse'] |
| shape = rand_shape_2d() |
| shape_3d = rand_shape_3d() |
| dtype = np.int32 |
| ctx = mx.cpu(1) |
| for stype in stypes: |
| check_empty(shape, stype) |
| check_csr_empty(shape, dtype, ctx) |
| check_rsp_empty(shape, dtype, ctx) |
| check_rsp_empty(shape_3d, dtype, ctx) |
| |
| |
| def test_synthetic_dataset_generator(): |
| def test_powerlaw_generator(csr_arr, final_row=1): |
| """Test power law distribution |
| Total Elements: 32000, Number of zeros: 3200 |
| Every row has 2 * non zero elements of the previous row. |
| Also since (2047 < 3200 < 4095) this will be true till 10th row""" |
| indices = csr_arr.indices.asnumpy() |
| indptr = csr_arr.indptr.asnumpy() |
| for row in range(1, final_row + 1): |
| nextrow = row + 1 |
| current_row_nnz = indices[indptr[row] - 1] + 1 |
| next_row_nnz = indices[indptr[nextrow] - 1] + 1 |
| assert next_row_nnz == 2 * current_row_nnz |
| |
| # Test if density is preserved |
| csr_arr_cols, _ = rand_sparse_ndarray(shape=(32, 10000), stype="csr", |
| density=0.01, distribution="powerlaw") |
| |
| csr_arr_small, _ = rand_sparse_ndarray(shape=(5, 5), stype="csr", |
| density=0.5, distribution="powerlaw") |
| |
| csr_arr_big, _ = rand_sparse_ndarray(shape=(32, 1000000), stype="csr", |
| density=0.4, distribution="powerlaw") |
| |
| csr_arr_square, _ = rand_sparse_ndarray(shape=(1600, 1600), stype="csr", |
| density=0.5, distribution="powerlaw") |
| assert len(csr_arr_cols.data) == 3200 |
| test_powerlaw_generator(csr_arr_cols, final_row=9) |
| test_powerlaw_generator(csr_arr_small, final_row=1) |
| test_powerlaw_generator(csr_arr_big, final_row=4) |
| test_powerlaw_generator(csr_arr_square, final_row=6) |
| |
| |
| def test_sparse_nd_fluent(): |
| def check_fluent_regular(stype, func, kwargs, shape=(5, 17), equal_nan=False): |
| with mx.name.NameManager(): |
| data = mx.nd.random_uniform(shape=shape, ctx=default_device()).tostype(stype) |
| regular = getattr(mx.ndarray, func)(data, **kwargs) |
| fluent = getattr(data, func)(**kwargs) |
| if isinstance(regular, list): |
| for r, f in zip(regular, fluent): |
| assert almost_equal(r.asnumpy(), f.asnumpy(), equal_nan=equal_nan) |
| else: |
| assert almost_equal(regular.asnumpy(), fluent.asnumpy(), equal_nan=equal_nan) |
| |
| all_funcs = ['zeros_like', 'square', 'round', 'rint', 'fix', 'floor', 'ceil', 'trunc', |
| 'abs', 'sign', 'sin', 'degrees', 'radians', 'expm1'] |
| for func in all_funcs: |
| check_fluent_regular('csr', func, {}) |
| check_fluent_regular('row_sparse', func, {}) |
| |
| all_funcs = ['arcsin', 'arctan', 'tan', 'sinh', 'tanh', |
| 'arcsinh', 'arctanh', 'log1p', 'sqrt', 'relu'] |
| for func in all_funcs: |
| check_fluent_regular('csr', func, {}, equal_nan=True) |
| check_fluent_regular('row_sparse', func, {}, equal_nan=True) |
| |
| check_fluent_regular('csr', 'slice', {'begin': (2, 5), 'end': (4, 7)}, shape=(5, 17)) |
| check_fluent_regular('row_sparse', 'clip', {'a_min': -0.25, 'a_max': 0.75}) |
| check_fluent_regular('csr', 'clip', {'a_min': -0.25, 'a_max': 0.75}) |
| |
| for func in ['sum', 'mean', 'norm']: |
| check_fluent_regular('csr', func, {'axis': 0}) |
| |
| |
| def test_sparse_nd_exception(): |
| """ test invalid sparse operator will throw a exception """ |
| a = mx.nd.ones((2,2)) |
| assertRaises(mx.base.MXNetError, mx.nd.sparse.retain, a, invalid_arg="garbage_value") |
| assertRaises(ValueError, mx.nd.sparse.csr_matrix, a, shape=(3,2)) |
| assertRaises(ValueError, mx.nd.sparse.csr_matrix, (2,2), shape=(3,2)) |
| assertRaises(ValueError, mx.nd.sparse.row_sparse_array, (2,2), shape=(3,2)) |
| assertRaises(ValueError, mx.nd.sparse.zeros, "invalid_stype", (2,2)) |
| |
| def test_sparse_nd_check_format(): |
| """ test check_format for sparse ndarray """ |
| shape = rand_shape_2d() |
| stypes = ["csr", "row_sparse"] |
| for stype in stypes: |
| arr, _ = rand_sparse_ndarray(shape, stype) |
| arr.check_format() |
| arr = mx.nd.sparse.zeros(stype, shape) |
| arr.check_format() |
| # CSR format index pointer array should be less than the number of rows |
| shape = (3, 4) |
| data_list = [7, 8, 9] |
| indices_list = [0, 2, 1] |
| indptr_list = [0, 5, 2, 3] |
| a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) |
| assertRaises(mx.base.MXNetError, a.check_format) |
| # CSR format indices should be in ascending order per row |
| indices_list = [2, 1, 1] |
| indptr_list = [0, 2, 2, 3] |
| a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) |
| assertRaises(mx.base.MXNetError, a.check_format) |
| # CSR format indptr should end with value equal with size of indices |
| indices_list = [1, 2, 1] |
| indptr_list = [0, 2, 2, 4] |
| a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) |
| assertRaises(mx.base.MXNetError, a.check_format) |
| # CSR format indices should not be negative |
| indices_list = [0, 2, 1] |
| indptr_list = [0, -2, 2, 3] |
| a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) |
| assertRaises(mx.base.MXNetError, a.check_format) |
| # CSR format should be 2 Dimensional. |
| a = mx.nd.array([1, 2, 3]) |
| assertRaises(ValueError, a.tostype, 'csr') |
| a = mx.nd.array([[[1, 2, 3]]]) |
| assertRaises(ValueError, a.tostype, 'csr') |
| # Row Sparse format indices should be less than the number of rows |
| shape = (3, 2) |
| data_list = [[1, 2], [3, 4]] |
| indices_list = [1, 4] |
| a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape) |
| assertRaises(mx.base.MXNetError, a.check_format) |
| # Row Sparse format indices should be in ascending order |
| indices_list = [1, 0] |
| a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape) |
| assertRaises(mx.base.MXNetError, a.check_format) |
| # Row Sparse format indices should not be negative |
| indices_list = [1, -2] |
| a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape) |
| assertRaises(mx.base.MXNetError, a.check_format) |
| |
| def test_sparse_nd_norm(): |
| def check_sparse_nd_norm(stype, shape, density, **kwargs): |
| data, _ = rand_sparse_ndarray(shape, stype, density) |
| norm = data.norm(**kwargs) |
| expected_norm = data.tostype('default').norm(**kwargs) |
| assert_almost_equal(norm.asnumpy(), expected_norm.asnumpy()) |
| |
| shape = (5, 5) |
| stypes = ['row_sparse', 'csr'] |
| densities = [0, 0.5, 1] |
| for stype in stypes: |
| for density in densities: |
| check_sparse_nd_norm(stype, shape, density, axis=None, keepdims=False, ord=2) |
| |
| # test fallback |
| check_sparse_nd_norm(stype, shape, density, axis=0, keepdims=False, ord=2) |
| check_sparse_nd_norm(stype, shape, density, axis=None, keepdims=True, ord=2) |
| |
| def test_sparse_fc(): |
| def check_sparse_fc(batch_size, dim_in, dim_out, stype): |
| data = rand_ndarray((batch_size, dim_in), stype, density=0.5) |
| weight = rand_ndarray((dim_out, dim_in), 'row_sparse', density=1) |
| bias = rand_ndarray((dim_out, 1), 'row_sparse', density=1) |
| out = mx.nd.sparse.FullyConnected(data, weight, num_hidden=dim_out, bias=bias) |
| data_dns = data.tostype('default') |
| weight_dns = weight.tostype('default') |
| out_dns = mx.nd.FullyConnected(data_dns, weight_dns, num_hidden=dim_out, bias=bias) |
| assert_almost_equal(out.asnumpy(), out_dns.asnumpy()) |
| |
| # test FC with row_sparse weight w/ density=1, dense data |
| check_sparse_fc(5, 10, 8, 'default') |
| # test FC with row_sparse weight w/ density=1, csr data (fallback) |
| check_sparse_fc(5, 10, 8, 'csr') |
| |
| def test_sparse_take(): |
| def check_sparse_take(density, mode): |
| data_shape = rand_shape_2d() |
| idx_shape = (np.random.randint(low=1, high=10),) |
| data = rand_ndarray(data_shape, 'csr', density=density).astype('int32') |
| idx = mx.nd.array(np.random.randint(low=-5, high=15, size=idx_shape)) |
| data_np = data.asnumpy() |
| idx_np = idx.asnumpy().astype('int32') |
| expected_result = np.take(data_np, idx_np, mode=mode, axis=0) |
| result = mx.nd.take(data, idx, mode=mode) |
| assert_almost_equal(result.asnumpy(), expected_result) |
| assert result.indptr[0].asscalar() == 0 |
| densities = [0, 0.5, 1] |
| modes = ['clip', 'wrap'] |
| for d in densities: |
| for m in modes: |
| check_sparse_take(d, m) |
| |
| def test_sparse_getnnz(): |
| if default_device().device_type is 'gpu': |
| return |
| def check_sparse_getnnz(density, axis): |
| shape = rand_shape_2d() |
| data = rand_ndarray(shape, 'csr', density=density) |
| data_sp = data.asscipy() |
| result = mx.nd.contrib.getnnz(data, axis=axis) |
| expected_result = data_sp.getnnz(axis=axis) |
| assert_almost_equal(result.asnumpy(), expected_result) |
| |
| densities = [0, 0.5, 1] |
| axis = [1, None] |
| for d in densities: |
| for a in axis: |
| check_sparse_getnnz(d, a) |
| |