tests/python/unittest/test_contrib_autograd.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import mxnet.ndarray as nd
 from mxnet.contrib.autograd import *
 from mxnet.test_utils import *
 from common import setup_module, with_seed, teardown

 def autograd_assert(*args, **kwargs):
     func   = kwargs["func"]
     grad_f = kwargs["grad_func"]
     argnum = kwargs["argnum"] if 'argnum' in kwargs else None

     grad_func = grad_and_loss(func, argnum)
     grad_vals, output = grad_func(*args)
     res = func(*args)
     assert same(output.asnumpy(), res.asnumpy())
     grad_res = grad_f(*args)
     assert len(grad_vals) == len(grad_res)
     for a, b in zip(grad_vals, grad_res):
         assert same(a.asnumpy(), b.asnumpy())

 @with_seed()
 def test_unary_func():
     x = nd.uniform(shape=(4, 5))
     f_exp         = lambda x: nd.exp(x)
     f_exp_grad    = lambda x: [nd.exp(x)]
     autograd_assert(x, func=f_exp, grad_func=f_exp_grad)
     f_half        = lambda x: x/2
     f_half_grad   = lambda x: [nd.ones(x.shape) * 0.5]
     autograd_assert(x, func=f_half, grad_func=f_half_grad)
     f_square      = lambda x: x**2
     f_square_grad = lambda x: [2*x]
     autograd_assert(x, func=f_square, grad_func=f_square_grad)

 @with_seed()
 def test_binary_func():
     x = nd.uniform(shape=(4, 5))
     y = nd.uniform(shape=(4, 5))
     f_add      = lambda x, y: x+y
     f_add_grad = lambda x, y: [nd.ones(x.shape), nd.ones(y.shape)]
     autograd_assert(x, y, func=f_add, grad_func=f_add_grad)
     f_mul      = lambda x, y: x*y
     f_mul_grad = lambda x, y: [y, x]
     autograd_assert(x, y, func=f_mul, grad_func=f_mul_grad)
     f_compose  = lambda x, y: x+x*y
     f_compose_grad = lambda x, y: [nd.ones(x.shape) + y, x]
     autograd_assert(x, y, func=f_compose, grad_func=f_compose_grad)

 @with_seed()
 def test_operator_with_state():
     def f_fc(a, b, weight, bias):
         x = a*b
         fc = nd.FullyConnected(
             x, weight, bias, num_hidden=32)
         return fc

     a = nd.uniform(shape=(64, 50))
     b = nd.uniform(shape=(64, 50))
     weight = nd.uniform(shape=(32, 50))
     bias = nd.uniform(shape=(32, ))

     grad_func = grad_and_loss(f_fc)
     grad_vals, outputs = grad_func(a, b, weight, bias)
     # (TODO) assert

 @with_seed()
 def test_argnum():
     def f_with_mode(a, b, mode):
         if mode:
             return a+b
         else:
             return a*b

     a = nd.uniform(shape=(3, 2))
     b = nd.uniform(shape=(3, 2))
     f_add_grad = lambda x, y, mode: [nd.ones(x.shape), nd.ones(y.shape)]
     f_mul_grad = lambda x, y, mode: [y, x]
     autograd_assert(a, b, True,
         argnum=[0, 1], func=f_with_mode, grad_func=f_add_grad)
     autograd_assert(a, b, False,
         argnum=[0, 1], func=f_with_mode, grad_func=f_mul_grad)


 @with_seed()
 def test_training():
     x = nd.ones((10, 10))
     with train_section():
         y = nd.Dropout(x, p=0.5)
         assert not (y.asnumpy() == x.asnumpy()).all()
         with test_section():
             y = nd.Dropout(x, p=0.5)
             assert (y.asnumpy() == x.asnumpy()).all()


 @with_seed()
 def test_out_grads():
     x = nd.ones((3, 5))
     dx = nd.zeros_like(x)
     mark_variables([x], [dx])
     da = None
     db = nd.array([1,2,3,4,5])
     dc = nd.array([5,4,3,2,1])

     with train_section():
         a, b, c = nd.split(x, axis=0, num_outputs=3, squeeze_axis=True)
         backward([a, b, c], [da, db, dc])

     assert (dx.asnumpy() == np.array(
         [[1,1,1,1,1],
          [1,2,3,4,5],
          [5,4,3,2,1]])).all()


 @with_seed()
 def test_detach_updated_grad():
     x = nd.ones((2, 2))
     dx = nd.zeros_like(x)
     y = nd.ones_like(x)
     dy = nd.zeros_like(x)
     mark_variables([x, y], [dx, dy])
     assert x._fresh_grad == False
     assert y._fresh_grad == False

     with train_section():
         x2 = x + 2
         y2  = x2 + y
         y2.backward()
     assert (dx.asnumpy() == 1).all()
     assert x._fresh_grad == True
     assert y._fresh_grad == True

     dx[:] = 0
     x._fresh_grad = False
     y._fresh_grad = False
     assert x._fresh_grad == False
     assert y._fresh_grad == False
     with train_section():
         x2 = x + 2
         x2 = x2.detach()
         y2  = x2 + y
         y2.backward()
     assert (dx.asnumpy() == 0).all()
     assert y._fresh_grad == True
     assert x._fresh_grad == False


 @with_seed()
 def test_retain_grad():
     x = mx.nd.ones((2, 2))
     dx = mx.nd.zeros((2, 2))
     mark_variables([x], [dx], grad_reqs='add')
     with train_section():
         y = x + 1
         y.backward(retain_graph=False)
     assert (dx.asnumpy() == 1).all()

     dx[:] = 0
     with train_section():
         y = x + 1
         y.backward(retain_graph=True)
         y.backward(retain_graph=False)
     assert (dx.asnumpy() == 2).all()

     # The following sequence should throw an exception. We discard the expected
     # stderr stack trace output for this operation to keep the test logs clean.
     with discard_stderr():
         try:
             with train_section():
                 y = x + 1
                 y.backward()
                 y.backward()
         except Exception:
             return

     raise AssertionError(
         "differentiating the same graph twice without retain_graph should fail")


 if __name__ == "__main__":
     import nose
     nose.runmodule()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import mxnet.ndarray as nd
	from mxnet.contrib.autograd import *
	from mxnet.test_utils import *
	from common import setup_module, with_seed, teardown

	def autograd_assert(args, *kwargs):
	func = kwargs["func"]
	grad_f = kwargs["grad_func"]
	argnum = kwargs["argnum"] if 'argnum' in kwargs else None

	grad_func = grad_and_loss(func, argnum)
	grad_vals, output = grad_func(*args)
	res = func(*args)
	assert same(output.asnumpy(), res.asnumpy())
	grad_res = grad_f(*args)
	assert len(grad_vals) == len(grad_res)
	for a, b in zip(grad_vals, grad_res):
	assert same(a.asnumpy(), b.asnumpy())

	@with_seed()
	def test_unary_func():
	x = nd.uniform(shape=(4, 5))
	f_exp = lambda x: nd.exp(x)
	f_exp_grad = lambda x: [nd.exp(x)]
	autograd_assert(x, func=f_exp, grad_func=f_exp_grad)
	f_half = lambda x: x/2
	f_half_grad = lambda x: [nd.ones(x.shape) * 0.5]
	autograd_assert(x, func=f_half, grad_func=f_half_grad)
	f_square = lambda x: x**2
	f_square_grad = lambda x: [2*x]
	autograd_assert(x, func=f_square, grad_func=f_square_grad)

	@with_seed()
	def test_binary_func():
	x = nd.uniform(shape=(4, 5))
	y = nd.uniform(shape=(4, 5))
	f_add = lambda x, y: x+y
	f_add_grad = lambda x, y: [nd.ones(x.shape), nd.ones(y.shape)]
	autograd_assert(x, y, func=f_add, grad_func=f_add_grad)
	f_mul = lambda x, y: x*y
	f_mul_grad = lambda x, y: [y, x]
	autograd_assert(x, y, func=f_mul, grad_func=f_mul_grad)
	f_compose = lambda x, y: x+x*y
	f_compose_grad = lambda x, y: [nd.ones(x.shape) + y, x]
	autograd_assert(x, y, func=f_compose, grad_func=f_compose_grad)

	@with_seed()
	def test_operator_with_state():
	def f_fc(a, b, weight, bias):
	x = a*b
	fc = nd.FullyConnected(
	x, weight, bias, num_hidden=32)
	return fc

	a = nd.uniform(shape=(64, 50))
	b = nd.uniform(shape=(64, 50))
	weight = nd.uniform(shape=(32, 50))
	bias = nd.uniform(shape=(32, ))

	grad_func = grad_and_loss(f_fc)
	grad_vals, outputs = grad_func(a, b, weight, bias)
	# (TODO) assert

	@with_seed()
	def test_argnum():
	def f_with_mode(a, b, mode):
	if mode:
	return a+b
	else:
	return a*b

	a = nd.uniform(shape=(3, 2))
	b = nd.uniform(shape=(3, 2))
	f_add_grad = lambda x, y, mode: [nd.ones(x.shape), nd.ones(y.shape)]
	f_mul_grad = lambda x, y, mode: [y, x]
	autograd_assert(a, b, True,
	argnum=[0, 1], func=f_with_mode, grad_func=f_add_grad)
	autograd_assert(a, b, False,
	argnum=[0, 1], func=f_with_mode, grad_func=f_mul_grad)


	@with_seed()
	def test_training():
	x = nd.ones((10, 10))
	with train_section():
	y = nd.Dropout(x, p=0.5)
	assert not (y.asnumpy() == x.asnumpy()).all()
	with test_section():
	y = nd.Dropout(x, p=0.5)
	assert (y.asnumpy() == x.asnumpy()).all()


	@with_seed()
	def test_out_grads():
	x = nd.ones((3, 5))
	dx = nd.zeros_like(x)
	mark_variables([x], [dx])
	da = None
	db = nd.array([1,2,3,4,5])
	dc = nd.array([5,4,3,2,1])

	with train_section():
	a, b, c = nd.split(x, axis=0, num_outputs=3, squeeze_axis=True)
	backward([a, b, c], [da, db, dc])

	assert (dx.asnumpy() == np.array(
	[[1,1,1,1,1],
	[1,2,3,4,5],
	[5,4,3,2,1]])).all()


	@with_seed()
	def test_detach_updated_grad():
	x = nd.ones((2, 2))
	dx = nd.zeros_like(x)
	y = nd.ones_like(x)
	dy = nd.zeros_like(x)
	mark_variables([x, y], [dx, dy])
	assert x._fresh_grad == False
	assert y._fresh_grad == False

	with train_section():
	x2 = x + 2
	y2 = x2 + y
	y2.backward()
	assert (dx.asnumpy() == 1).all()
	assert x._fresh_grad == True
	assert y._fresh_grad == True

	dx[:] = 0
	x._fresh_grad = False
	y._fresh_grad = False
	assert x._fresh_grad == False
	assert y._fresh_grad == False
	with train_section():
	x2 = x + 2
	x2 = x2.detach()
	y2 = x2 + y
	y2.backward()
	assert (dx.asnumpy() == 0).all()
	assert y._fresh_grad == True
	assert x._fresh_grad == False


	@with_seed()
	def test_retain_grad():
	x = mx.nd.ones((2, 2))
	dx = mx.nd.zeros((2, 2))
	mark_variables([x], [dx], grad_reqs='add')
	with train_section():
	y = x + 1
	y.backward(retain_graph=False)
	assert (dx.asnumpy() == 1).all()

	dx[:] = 0
	with train_section():
	y = x + 1
	y.backward(retain_graph=True)
	y.backward(retain_graph=False)
	assert (dx.asnumpy() == 2).all()

	# The following sequence should throw an exception. We discard the expected
	# stderr stack trace output for this operation to keep the test logs clean.
	with discard_stderr():
	try:
	with train_section():
	y = x + 1
	y.backward()
	y.backward()
	except Exception:
	return

	raise AssertionError(
	"differentiating the same graph twice without retain_graph should fail")


	if __name__ == "__main__":
	import nose
	nose.runmodule()