| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import mxnet.ndarray as nd |
| from mxnet.contrib.autograd import * |
| from mxnet.test_utils import * |
| from common import setup_module, with_seed, teardown |
| |
| def autograd_assert(*args, **kwargs): |
| func = kwargs["func"] |
| grad_f = kwargs["grad_func"] |
| argnum = kwargs["argnum"] if 'argnum' in kwargs else None |
| |
| grad_func = grad_and_loss(func, argnum) |
| grad_vals, output = grad_func(*args) |
| res = func(*args) |
| assert same(output.asnumpy(), res.asnumpy()) |
| grad_res = grad_f(*args) |
| assert len(grad_vals) == len(grad_res) |
| for a, b in zip(grad_vals, grad_res): |
| assert same(a.asnumpy(), b.asnumpy()) |
| |
| @with_seed() |
| def test_unary_func(): |
| x = nd.uniform(shape=(4, 5)) |
| f_exp = lambda x: nd.exp(x) |
| f_exp_grad = lambda x: [nd.exp(x)] |
| autograd_assert(x, func=f_exp, grad_func=f_exp_grad) |
| f_half = lambda x: x/2 |
| f_half_grad = lambda x: [nd.ones(x.shape) * 0.5] |
| autograd_assert(x, func=f_half, grad_func=f_half_grad) |
| f_square = lambda x: x**2 |
| f_square_grad = lambda x: [2*x] |
| autograd_assert(x, func=f_square, grad_func=f_square_grad) |
| |
| @with_seed() |
| def test_binary_func(): |
| x = nd.uniform(shape=(4, 5)) |
| y = nd.uniform(shape=(4, 5)) |
| f_add = lambda x, y: x+y |
| f_add_grad = lambda x, y: [nd.ones(x.shape), nd.ones(y.shape)] |
| autograd_assert(x, y, func=f_add, grad_func=f_add_grad) |
| f_mul = lambda x, y: x*y |
| f_mul_grad = lambda x, y: [y, x] |
| autograd_assert(x, y, func=f_mul, grad_func=f_mul_grad) |
| f_compose = lambda x, y: x+x*y |
| f_compose_grad = lambda x, y: [nd.ones(x.shape) + y, x] |
| autograd_assert(x, y, func=f_compose, grad_func=f_compose_grad) |
| |
| @with_seed() |
| def test_operator_with_state(): |
| def f_fc(a, b, weight, bias): |
| x = a*b |
| fc = nd.FullyConnected( |
| x, weight, bias, num_hidden=32) |
| return fc |
| |
| a = nd.uniform(shape=(64, 50)) |
| b = nd.uniform(shape=(64, 50)) |
| weight = nd.uniform(shape=(32, 50)) |
| bias = nd.uniform(shape=(32, )) |
| |
| grad_func = grad_and_loss(f_fc) |
| grad_vals, outputs = grad_func(a, b, weight, bias) |
| # (TODO) assert |
| |
| @with_seed() |
| def test_argnum(): |
| def f_with_mode(a, b, mode): |
| if mode: |
| return a+b |
| else: |
| return a*b |
| |
| a = nd.uniform(shape=(3, 2)) |
| b = nd.uniform(shape=(3, 2)) |
| f_add_grad = lambda x, y, mode: [nd.ones(x.shape), nd.ones(y.shape)] |
| f_mul_grad = lambda x, y, mode: [y, x] |
| autograd_assert(a, b, True, |
| argnum=[0, 1], func=f_with_mode, grad_func=f_add_grad) |
| autograd_assert(a, b, False, |
| argnum=[0, 1], func=f_with_mode, grad_func=f_mul_grad) |
| |
| |
| @with_seed() |
| def test_training(): |
| x = nd.ones((10, 10)) |
| with train_section(): |
| y = nd.Dropout(x, p=0.5) |
| assert not (y.asnumpy() == x.asnumpy()).all() |
| with test_section(): |
| y = nd.Dropout(x, p=0.5) |
| assert (y.asnumpy() == x.asnumpy()).all() |
| |
| |
| @with_seed() |
| def test_out_grads(): |
| x = nd.ones((3, 5)) |
| dx = nd.zeros_like(x) |
| mark_variables([x], [dx]) |
| da = None |
| db = nd.array([1,2,3,4,5]) |
| dc = nd.array([5,4,3,2,1]) |
| |
| with train_section(): |
| a, b, c = nd.split(x, axis=0, num_outputs=3, squeeze_axis=True) |
| backward([a, b, c], [da, db, dc]) |
| |
| assert (dx.asnumpy() == np.array( |
| [[1,1,1,1,1], |
| [1,2,3,4,5], |
| [5,4,3,2,1]])).all() |
| |
| |
| @with_seed() |
| def test_detach_updated_grad(): |
| x = nd.ones((2, 2)) |
| dx = nd.zeros_like(x) |
| y = nd.ones_like(x) |
| dy = nd.zeros_like(x) |
| mark_variables([x, y], [dx, dy]) |
| assert x._fresh_grad == False |
| assert y._fresh_grad == False |
| |
| with train_section(): |
| x2 = x + 2 |
| y2 = x2 + y |
| y2.backward() |
| assert (dx.asnumpy() == 1).all() |
| assert x._fresh_grad == True |
| assert y._fresh_grad == True |
| |
| dx[:] = 0 |
| x._fresh_grad = False |
| y._fresh_grad = False |
| assert x._fresh_grad == False |
| assert y._fresh_grad == False |
| with train_section(): |
| x2 = x + 2 |
| x2 = x2.detach() |
| y2 = x2 + y |
| y2.backward() |
| assert (dx.asnumpy() == 0).all() |
| assert y._fresh_grad == True |
| assert x._fresh_grad == False |
| |
| |
| @with_seed() |
| def test_retain_grad(): |
| x = mx.nd.ones((2, 2)) |
| dx = mx.nd.zeros((2, 2)) |
| mark_variables([x], [dx], grad_reqs='add') |
| with train_section(): |
| y = x + 1 |
| y.backward(retain_graph=False) |
| assert (dx.asnumpy() == 1).all() |
| |
| dx[:] = 0 |
| with train_section(): |
| y = x + 1 |
| y.backward(retain_graph=True) |
| y.backward(retain_graph=False) |
| assert (dx.asnumpy() == 2).all() |
| |
| # The following sequence should throw an exception. We discard the expected |
| # stderr stack trace output for this operation to keep the test logs clean. |
| with discard_stderr(): |
| try: |
| with train_section(): |
| y = x + 1 |
| y.backward() |
| y.backward() |
| except Exception: |
| return |
| |
| raise AssertionError( |
| "differentiating the same graph twice without retain_graph should fail") |
| |
| |
| if __name__ == "__main__": |
| import nose |
| nose.runmodule() |