blob: 7ee7842949f2b98e02d065abbec1029a79bf998e [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import copy
import sys
import os
import logging
import re
import json
import mxnet as mx
import numpy as np
from common import assertRaises, models, TemporaryDirectory
from mxnet.base import NotImplementedForSymbol
from mxnet.test_utils import discard_stderr, rand_shape_nd, use_np, environment
from mxnet.util import np_shape
import pickle as pkl
def test_symbol_basic():
mlist = []
mlist.append(models.mlp2())
for m in mlist:
m.list_arguments()
m.list_outputs()
def test_symbol_bool():
x = mx.symbol.Variable('x')
assertRaises(NotImplementedForSymbol, bool, x)
def test_symbol_compose():
data = mx.symbol.Variable('data')
net1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=10)
net1 = mx.symbol.FullyConnected(data=net1, name='fc2', num_hidden=100)
net1.list_arguments() == ['data',
'fc1_weight', 'fc1_bias',
'fc2_weight', 'fc2_bias']
net2 = mx.symbol.FullyConnected(name='fc3', num_hidden=10)
net2 = mx.symbol.Activation(data=net2, act_type='relu')
net2 = mx.symbol.FullyConnected(data=net2, name='fc4', num_hidden=20)
composed = net2(fc3_data=net1, name='composed')
multi_out = mx.symbol.Group([composed, net1])
assert len(multi_out.list_outputs()) == 2
assert len(multi_out) == 2
def test_symbol_copy():
data = mx.symbol.Variable('data')
data_2 = copy.deepcopy(data)
data_3 = copy.copy(data)
assert data.tojson() == data_2.tojson()
assert data.tojson() == data_3.tojson()
def test_symbol_internal():
data = mx.symbol.Variable('data')
oldfc = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=10)
net1 = mx.symbol.FullyConnected(data=oldfc, name='fc2', num_hidden=100)
assert net1.list_arguments() == ['data', 'fc1_weight', 'fc1_bias', 'fc2_weight', 'fc2_bias']
internal = net1.get_internals()
fc1 = internal['fc1_output']
assert fc1.list_arguments() == oldfc.list_arguments()
def test_symbol_children():
data = mx.symbol.Variable('data')
oldfc = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=10)
net1 = mx.symbol.FullyConnected(data=oldfc, name='fc2', num_hidden=100)
assert net1.get_children().list_outputs() == ['fc1_output', 'fc2_weight', 'fc2_bias']
assert len(net1.get_children()) == 3
assert net1.get_children().get_children().list_outputs() == ['data', 'fc1_weight', 'fc1_bias']
assert len(net1.get_children().get_children()) == 3
assert net1.get_children()['fc2_weight'].list_arguments() == ['fc2_weight']
assert net1.get_children()['fc2_weight'].get_children() is None
data = mx.sym.Variable('data')
sliced = mx.sym.SliceChannel(data, num_outputs=3, name='slice')
concat = mx.sym.Concat(*list(sliced))
assert concat.get_children().list_outputs() == \
['slice_output0', 'slice_output1', 'slice_output2']
assert sliced.get_children().list_outputs() == ['data']
def test_symbol_pickle():
mlist = [models.mlp2()]
data = pkl.dumps(mlist)
mlist2 = pkl.loads(data)
for x, y in zip(mlist, mlist2):
assert x.tojson() == y.tojson()
def test_symbol_saveload():
sym = models.mlp2()
fname = 'tmp_sym.json'
sym.save(fname)
data2 = mx.symbol.load(fname)
# save because of order
assert sym.tojson() == data2.tojson()
os.remove(fname)
def test_symbol_infer_shape():
num_hidden = 128
num_dim = 64
num_sample = 10
data = mx.symbol.Variable('data')
prev = mx.symbol.Variable('prevstate')
x2h = mx.symbol.FullyConnected(data=data, name='x2h', num_hidden=num_hidden)
h2h = mx.symbol.FullyConnected(data=prev, name='h2h', num_hidden=num_hidden)
out = mx.symbol.Activation(data=mx.sym.elemwise_add(x2h, h2h), name='out', act_type='relu')
# shape inference will fail because information is not available for h2h
ret = out.infer_shape(data=(num_sample, num_dim))
assert ret == (None, None, None)
arg, out_shapes, aux_shapes = out.infer_shape_partial(data=(num_sample, num_dim))
arg_shapes = dict(zip(out.list_arguments(), arg))
assert arg_shapes['data'] == (num_sample, num_dim)
assert arg_shapes['x2h_weight'] == (num_hidden, num_dim)
assert arg_shapes['h2h_weight'] == ()
# now we can do full shape inference
state_shape = out_shapes[0]
arg, out_shapes, aux_shapes = out.infer_shape(data=(num_sample, num_dim), prevstate=state_shape)
arg_shapes = dict(zip(out.list_arguments(), arg))
assert arg_shapes['data'] == (num_sample, num_dim)
assert arg_shapes['x2h_weight'] == (num_hidden, num_dim)
assert arg_shapes['h2h_weight'] == (num_hidden, num_hidden)
# Partial shape inference with some unknown dimensions
data_shape = (1, 0, 0, 0)
data = mx.sym.Variable('data', shape=data_shape)
weight = mx.sym.Variable('weight')
cdata = mx.sym.cast(data, dtype='float16')
cweight = mx.sym.cast(weight, dtype='float16')
test = mx.sym.Convolution(data=cdata, weight=cweight, pad=(3, 3), num_filter=64, stride=(2, 2), no_bias=True, kernel=(7, 7))
arg, _, _ = test.infer_shape_partial()
arg_shapes = dict(zip(test.list_arguments(), arg))
assert arg_shapes['data'] == data_shape
assert arg_shapes['weight'] == (64, 0, 7, 7)
def test_symbol_infer_shape_var():
"Test specifying shape information when constructing a variable"
shape = (2, 3)
a = mx.symbol.Variable('a', shape=shape)
b = mx.symbol.Variable('b')
c = mx.symbol.elemwise_add(a, b)
arg_shapes, out_shapes, aux_shapes = c.infer_shape()
assert arg_shapes[0] == shape
assert arg_shapes[1] == shape
assert out_shapes[0] == shape
overwrite_shape = (5, 6)
arg_shapes, out_shapes, aux_shapes = c.infer_shape(a=overwrite_shape)
assert arg_shapes[0] == overwrite_shape
assert arg_shapes[1] == overwrite_shape
assert out_shapes[0] == overwrite_shape
def test_symbol_magic_abs():
for dim in range(1, 7):
with mx.name.NameManager():
data = mx.symbol.Variable('data')
method = data.abs(name='abs0')
magic = abs(data)
regular = mx.symbol.abs(data, name='abs0')
ctx = {'ctx': mx.context.current_context(), 'data': rand_shape_nd(dim)}
mx.test_utils.check_consistency(
[method, magic], ctx_list=[ctx, ctx])
mx.test_utils.check_consistency(
[regular, magic], ctx_list=[ctx, ctx])
def test_symbol_fluent():
has_grad = set(['flatten', 'expand_dims', 'flip', 'tile', 'transpose', 'sum', 'nansum', 'prod',
'nanprod', 'mean', 'max', 'min', 'reshape', 'broadcast_to', 'split',
'broadcast_axes', 'broadcast_like', 'pad', 'swapaxes', 'slice', 'slice_axis', 'slice_like',
'take', 'one_hot', 'pick', 'sort', 'topk', 'argsort', 'argmax', 'argmin',
'clip', 'abs', 'sign', 'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan',
'degrees', 'radians', 'sinh', 'cosh', 'tanh', 'arcsinh', 'arccosh', 'arctanh',
'exp', 'expm1', 'log', 'log10', 'log2', 'log1p', 'sqrt', 'rsqrt',
'square', 'reciprocal' 'reshape_like', 'cbrt', 'rcbrt', 'relu', 'sigmoid',
'softmax', 'log_softmax', 'softmin', 'rint', 'ceil', 'floor', 'trunc', 'fix'])
def check_fluent_regular(func, kwargs, shape=(5, 17, 1), equal_nan=False):
with mx.name.NameManager():
data = mx.symbol.Variable('data')
regular = getattr(mx.symbol, func)(data, name=func+'0', **kwargs)
fluent = getattr(data, func)(**kwargs)
check_symbol_consistency(regular, fluent, {'ctx': mx.context.current_context(),
'data': shape},
skip_grad=func not in has_grad,
equal_nan=equal_nan)
for func in ['flatten', 'norm', 'round', 'rint', 'fix', 'floor', 'ceil', 'trunc', 'zeros_like',
'ones_like', 'abs', 'sign', 'sin', 'cos', 'degrees', 'radians', 'exp', 'expm1',
'square', 'reciprocal', 'argmax_channel', 'shape_array', 'size_array']:
check_fluent_regular(func, {})
for func in ['arccosh', 'arcsin', 'arccos', 'arctan', 'tan', 'sinh', 'cosh', 'tanh',
'arcsinh', 'arctanh', 'log', 'log10', 'log2', 'log1p', 'sqrt', 'rsqrt',
'cbrt', 'rcbrt', 'relu', 'sigmoid', 'softmax', 'log_softmax', 'softmin']:
check_fluent_regular(func, {}, equal_nan=True)
for func in ['expand_dims', 'flip', 'sort', 'topk', 'argsort', 'argmax', 'argmin']:
check_fluent_regular(func, {'axis': 1})
check_fluent_regular('one_hot', {'depth': 15})
check_fluent_regular('tile', {'reps': (1,2)})
check_fluent_regular('repeat', {'repeats': 3})
check_fluent_regular('transpose', {'axes': (1,0,2)})
check_fluent_regular('split', {'axis': 2, 'num_outputs': 3}, shape=(5, 17, 6))
check_fluent_regular('slice', {'begin': (2, 5, 1), 'end': (4, 7, 6)}, shape=(5, 17, 6))
check_fluent_regular('slice_axis', {'axis': 1, 'begin': 5, 'end': 7})
check_fluent_regular('slice_like', {'axes': (0, -2), 'shape_like': mx.sym.zeros((3, 3))})
check_fluent_regular('clip', {'a_min': 0.25, 'a_max': 0.75})
check_fluent_regular('broadcast_axes', {'axis': (2,), 'size': (5,)})
check_fluent_regular('broadcast_like', {'rhs': mx.sym.ones((1, 5)), 'lhs_axes': (0,), 'rhs_axes': (1,)}, shape=(1,9))
check_fluent_regular('pad', {'mode': 'constant', 'pad_width': (0,0,0,0,3,0,0,4)}, shape=(5, 17, 2, 3))
check_fluent_regular('reshape_like', {'rhs': mx.sym.ones((30, 17))}, shape=(5, 17, 2, 3))
for func in ['sum', 'nansum', 'prod', 'nanprod', 'mean', 'max', 'min', 'norm']:
check_fluent_regular(func, {'axis': (1, 2)})
check_fluent_regular('reshape', {'shape': (17, 1, 5)})
check_fluent_regular('broadcast_to', {'shape': (5, 17, 47)})
check_fluent_regular('squeeze', {'axis': (1, 3)}, shape=(2, 1, 3, 1, 4))
check_fluent_regular('squeeze', {}, shape=(2, 1, 3, 1, 4))
def check_symbol_consistency(sym1, sym2, ctx, skip_grad=False, equal_nan=False):
assert sym1.list_arguments() == sym2.list_arguments()
assert sym1.list_auxiliary_states() == sym2.list_auxiliary_states()
assert sym1.list_outputs() == sym2.list_outputs()
mx.test_utils.check_consistency([sym1, sym2], ctx_list=[ctx, ctx],
grad_req='null' if skip_grad else 'write',
equal_nan=equal_nan)
def test_blockgrad():
a = mx.sym.Variable('a')
b = mx.sym.BlockGrad(2*a)
exe = b._simple_bind(ctx=mx.cpu(), a=(10,10))
def test_zero_prop2():
x = mx.sym.Variable('x')
idx = mx.sym.Variable('idx')
y = mx.sym.batch_take(x, idx)
z = mx.sym.stop_gradient(y)
exe = z._simple_bind(ctx=mx.cpu(), x=(10, 10), idx=(10,),
type_dict={'x': np.float32, 'idx': np.int32})
exe.forward(is_train=True)
exe.backward()
mx.nd.waitall()
def test_simple_bind_incomplete_shape_inference_in_one_forward_pass():
r"""This is a special case that results in shape inference
failure after moving _simple_bind logic from frontend to backend.
Added here for testing against the network similar to the following one.
Network diagram:
weight --> abs_op --> sum_op --
\ |--> add_op
data --> fc_op --> sum_op --
Given data's shape, if the shape inference starts from weight node,
then the node entries of negative_op and sum_op are unknown in the
forward pass. Therefore, there are several unknown shapes after the
first forward pass is done. Now the backward inference pass starts with
the assumption that there are no unknown-shape node entries in the forward
pass, and consequently, leads to CHECK_EQ failure.
"""
data_shape = (5, 13)
data = mx.sym.Variable('data')
fc = mx.sym.FullyConnected(data=data, num_hidden=1, no_bias=True, name='fc')
modified_weight = mx.sym.abs(fc.get_internals()['fc_weight'])
net = mx.sym.sum(modified_weight) + mx.sym.sum(fc)
net._simple_bind(ctx=mx.cpu(), data=data_shape)
def test_simple_bind_gradient_graph_possible_with_cycle():
"""This is a special case that results in a cycle in the gradient graph
before this bug was fixed. With the following symbol, the node entries
passed into function AggregateGradient(std::vector<nnvm::NodeEntry>&& v)
are the outputs of the same node. Therefore, adding a node to the
control_deps of itself must be skipped.
See GitHub issue:
https://github.com/apache/mxnet/issues/8029
for more details."""
data = mx.symbol.Variable('data')
res = data + data + data + data + data + data + data + data
res._simple_bind(ctx=mx.cpu(), data=(1,))
def test_children_same_name():
a = mx.sym.Variable('data')
b = a + a
for _ in b.get_children():
pass
def test_transpose_nullop():
for dim in range(1, 7):
a = mx.sym.Variable('a')
b = mx.sym.transpose(a, axes=tuple(np.random.permutation(dim)))
c = mx.sym.zeros_like(b)
shape = rand_shape_nd(dim)
nd_a = mx.nd.random.normal(shape=shape)
c_out = c.eval(ctx=mx.cpu(), a=nd_a)
b_out = b.eval(ctx=mx.cpu(), a=nd_a)
assert mx.test_utils.same(c_out[0].asnumpy(),
np.zeros_like(b_out[0].asnumpy()))
def test_gen_atomic_symbol_multiple_outputs():
data=mx.sym.Variable('data')
p = mx.sym.Variable('param')
h0 = mx.sym.Variable('h0')
h1 = mx.sym.Variable('h1')
s = mx.sym.RNN(data, p, h0, h1, state_size=10, num_layers=2,
bidirectional=True, state_outputs=True, mode='lstm')
atomic_sym = s._gen_atomic_symbol()
def test_eliminate_common_expr():
# helper function to test a single model
def check_cse_on_symbol(sym, expected_savings, check_data, **kwargs):
inputs = sym.list_inputs()
shapes = {inp : kwargs[inp].shape for inp in inputs}
rtol = {'float16' : 1e-2,
'float32' : 1.5e-6,
'float64' : 1.5e-6,
}
atol = {'float16' : 1e-3,
'float32' : 1e-7,
'float64' : 1e-7,
}
for dtype in ['float16', 'float32', 'float64']:
data = {inp : kwargs[inp].astype(dtype) for inp in inputs}
for grad_req in ['write', 'add']:
type_dict = {inp : dtype for inp in inputs}
with environment({'MXNET_ELIMINATE_COMMON_EXPR': '0'}):
orig_exec = sym._simple_bind(ctx=mx.cpu(0), grad_req=grad_req,
type_dict=type_dict, **shapes)
with environment({'MXNET_ELIMINATE_COMMON_EXPR': '1'}):
cse_exec = sym._simple_bind(ctx=mx.cpu(0), grad_req=grad_req,
type_dict=type_dict, **shapes)
fwd_orig = orig_exec.forward(is_train=True, **data)
out_grads = [mx.nd.ones_like(arr) for arr in fwd_orig]
orig_exec.backward(out_grads=out_grads)
fwd_cse = cse_exec.forward(is_train=True, **data)
cse_exec.backward(out_grads=out_grads)
if check_data:
for orig, cse in zip(fwd_orig, fwd_cse):
np.testing.assert_allclose(orig.asnumpy(), cse.asnumpy(),
rtol=rtol[dtype], atol=atol[dtype])
for orig, cse in zip(orig_exec.grad_arrays, cse_exec.grad_arrays):
if orig is None and cse is None:
continue
assert orig is not None
assert cse is not None
np.testing.assert_allclose(orig.asnumpy(), cse.asnumpy(),
rtol=rtol[dtype], atol=atol[dtype])
orig_sym_internals = orig_exec.get_optimized_symbol().get_internals()
cse_sym_internals = cse_exec.get_optimized_symbol().get_internals()
# test that the graph has been simplified as expected
assert (len(cse_sym_internals) + expected_savings) == len(orig_sym_internals)
a = mx.sym.Variable('a')
b = mx.sym.Variable('b')
c = mx.sym.Variable('c')
shape = rand_shape_nd(2)
arr1 = mx.random.uniform(shape=shape)
arr2 = mx.random.uniform(shape=shape)
arr3 = mx.random.uniform(shape=shape)
check_cse_on_symbol((a+1) + (a+2), expected_savings=0, check_data=True, a=arr1, b=arr2)
check_cse_on_symbol((a+b) + (a+b), expected_savings=1, check_data=True, a=arr1, b=arr2)
check_cse_on_symbol(((a+b)+c) +((a+b)+c), expected_savings=2, check_data=True,
a=arr1, b=arr2, c=arr3)
d = a + 1
# a*d node gets eliminated, but then a copy is inserted to isolate the outputs, so no net gain.
check_cse_on_symbol(mx.sym.Group([a*d, a*d]), expected_savings=0, check_data=True, a=arr1)
# a*d node gets eliminated, then the duplicated add-of-b, but then a copy is added for net of 1.
check_cse_on_symbol(mx.sym.Group([a*d+b, a*d+b]), expected_savings=1, check_data=True,
a=arr1, b=arr2)
# dropout uses a resource that precludes any optimization
check_cse_on_symbol(mx.sym.Dropout(a) +
mx.sym.Dropout(a), expected_savings=0, check_data=False, a=arr1)
def test_load_save_symbol():
batch_size = 10
num_hdidden = 128
num_features = 784
def get_net():
data = mx.sym.var('data')
weight = mx.sym.var('weight', shape=(num_hdidden, 0))
return mx.sym.FullyConnected(data, weight, num_hidden=num_hdidden)
for flag1 in [False, True]:
with np_shape(flag1):
net_json_str = get_net().tojson()
net_data = json.loads(net_json_str)
assert "attrs" in net_data
if flag1:
assert "is_np_shape" in net_data["attrs"]
else:
assert "is_np_shape" not in net_data["attrs"]
with TemporaryDirectory() as work_dir:
fname = os.path.join(work_dir, 'test_sym.json')
with open(fname, 'w') as fp:
json.dump(net_data, fp)
# test loading 1.5.0 symbol file since 1.6.0
# w/ or w/o np_shape semantics
for flag2 in [False, True]:
if flag1: # Do not need to test this case since 0 indicates zero-size dim
continue
with np_shape(flag2):
net = mx.sym.load(fname)
arg_shapes, out_shapes, aux_shapes = net.infer_shape(data=(batch_size, num_features))
assert arg_shapes[0] == (batch_size, num_features) # data
assert arg_shapes[1] == (num_hdidden, num_features) # weight
assert arg_shapes[2] == (num_hdidden,) # bias
assert out_shapes[0] == (batch_size, num_hdidden) # output
assert len(aux_shapes) == 0
def test_infershape_happens_for_all_ops_in_graph():
v = mx.sym.Variable('V')
s = mx.sym.transpose(v)
x = mx.sym.Variable('x')
s2 = x + v
s3 = s + s2
with discard_stderr():
try:
# This should throw an exception as you cannot add arrays
# with shapes [2,3] and [3,2]
e = s3._simple_bind(ctx=mx.cpu(), x=(2,3), grad_req='null')
except:
return
assert False
def test_symbol_copy():
a = mx.sym.Variable('a')
b = copy.copy(a)
b._set_attr(name='b')
assert a.name == 'a' and b.name == 'b'
a = mx.sym.Variable('a').as_np_ndarray()
b = copy.copy(a)
b._set_attr(name='b')
assert a.name == 'a' and b.name == 'b'