perl-package/AI-MXNet/t/test_autograd.t - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 use strict;
 use warnings;
 use AI::MXNet qw(mx);
 use AI::MXNet::AutoGrad qw(autograd);
 use AI::MXNet::TestUtils qw(same almost_equal rand_ndarray);
 use AI::MXNet::Base qw(:DEFAULT pones);
 use Test::More tests => 246;
 $ENV{MXNET_STORAGE_FALLBACK_LOG_VERBOSE} = 0;

 sub autograd_assert
 {
     my $kwargs = {};
     if(ref $_[-1] eq 'HASH') { $kwargs = pop(@_) };
     my @args = @_;
     my $func   = $kwargs->{func};
     my $grad_f = $kwargs->{grad_func};
     my $argnum = $kwargs->{argnum};
     my $grad_func = autograd->grad_and_loss($func, $argnum);
     my ($grad_vals, $output) = $grad_func->(@args);
     my $res = $func->(@args);
     ok(same($output->aspdl, $res->aspdl));
     my $grad_res = $grad_f->(@args);
     ok(@$grad_vals == @$grad_res);
     for(zip($grad_vals, $grad_res)) {
         my ($a, $b) = @$_;
         ok(same($a->aspdl, $b->aspdl));
     }
 }

 sub test_unary_func
 {
     my $check_unary_func = sub {
         my ($x) = @_;
         my $f_exp       = sub { $_[0]->exp };
         my $f_exp_grad  = sub { [$_[0]->exp] };
         autograd_assert($x, { func => $f_exp, grad_func => $f_exp_grad });
         my $f_half      = sub { $_[0]/2 };
         my $f_half_grad = sub { [mx->nd->ones($_[0]->shape) * 0.5] };
         autograd_assert($x, { func => $f_half, grad_func => $f_half_grad });
         my $f_square    = sub { $_[0]**2 };
         my $f_square_grad = sub { [2*$_[0]] };
         autograd_assert($x, { func => $f_square, grad_func => $f_square_grad });
     };
     my $uniform = mx->nd->uniform(shape=>[4, 5]);
     $check_unary_func->($uniform);
     my $stypes = ['row_sparse', 'csr', 'default'];
     for my $stype (@$stypes)
     {
         $check_unary_func->($uniform->tostype($stype));
     }
 }

 test_unary_func();

 sub test_binary_func
 {
     my $check_binary_func = sub {
         my ($x, $y) = @_;
         my $f_add      = sub { $_[0]+$_[1] };
         my $f_add_grad = sub { [map { mx->nd->ones($_->shape) } @_] };
         autograd_assert($x, $y, { func => $f_add, grad_func => $f_add_grad });
         my $f_mul      = sub { $_[0]*$_[1] };
         my $f_mul_grad = sub { [reverse(@_)] };
         autograd_assert($x, $y, { func => $f_mul, grad_func => $f_mul_grad });
         my $f_compose  = sub { $_[0]+$_[0]*$_[1] };
         my $f_compose_grad = sub { [mx->nd->ones($_[0]->shape) + $y, $x] };
         autograd_assert($x, $y, { func => $f_compose, grad_func => $f_compose_grad });
     };
     my $uniform_x = mx->nd->uniform(shape=>[4, 5]);
     my $uniform_y = mx->nd->uniform(shape=>[4, 5]);
     $check_binary_func->($uniform_x, $uniform_y);
     my $stypes = ['row_sparse', 'csr', 'default'];
     for my $stype_x (@$stypes)
     {
         for my $stype_y (@$stypes)
         {
             my $x = $uniform_x->tostype($stype_x);
             my $y = $uniform_y->tostype($stype_y);
             $check_binary_func->($x, $y);
         }
     }
 }

 test_binary_func();

 sub test_operator_with_state
 {
     my $f_fc = sub {
         my ($a, $b, $weight, $bias) = @_;
         my $x = $a*$b;
         my $fc = mx->nd->FullyConnected(
             $x, $weight, $bias, num_hidden=>32);
         return $fc;
     };

     my $a = mx->nd->uniform(shape=>[64, 50]);
     my $b = mx->nd->uniform(shape=>[64, 50]);
     my $weight = mx->nd->uniform(shape=>[32, 50]);
     my $bias = mx->nd->uniform(shape=>[32]);

     my $grad_func = autograd->grad_and_loss($f_fc);
     my ($grad_vals, $outputs) = $grad_func->($a, $b, $weight, $bias);
 }

 test_operator_with_state();

 sub test_argnum
 {
     my $f_with_mode = sub {
         my ($a, $b, $mode) = @_;
         if($mode)
         {
             return $a+$b;
         }
         else
         {
             return $a*$b;
         }
     };
     my $a = mx->nd->uniform(shape=>[3, 2]);
     my $b = mx->nd->uniform(shape=>[3, 2]);
     my $f_add_grad = sub { [map { mx->nd->ones($_->shape) } @_[0,1]] };
     my $f_mul_grad = sub { [reverse(@_[0,1])] };
     autograd_assert($a, $b, 1,
         { argnum=>[0, 1], func=>$f_with_mode, grad_func=>$f_add_grad });
     autograd_assert($a, $b, 0,
         { argnum=>[0, 1], func=>$f_with_mode, grad_func=>$f_mul_grad });
 }

 test_argnum();

 sub test_training
 {
     my $x = mx->nd->ones([10, 10]);
     autograd->record(sub {
         my $y = mx->nd->Dropout($x, p=>0.5);
         ok(not ($y->aspdl == $x->aspdl)->all);
         autograd->pause(sub {
             my $y = mx->nd->Dropout($x, p=>0.5);
             ok(($y->aspdl == $x->aspdl)->all);
         });
     });
 }

 test_training();

 sub test_out_grads
 {
     my $x = mx->nd->ones([3, 5]);
     my $dx = mx->nd->zeros_like($x);
     autograd->mark_variables([$x], [$dx]);
     my $da;
     my $db = mx->nd->array([1,2,3,4,5]);
     my $dc = mx->nd->array([5,4,3,2,1]);

     autograd->record(sub {
         my ($a, $b, $c) = @{ $x };
         autograd->backward([$a, $b, $c], head_grads => [$da, $db, $dc]);
     });
     ok(($dx->aspdl == pdl(
         [[1,1,1,1,1],
          [1,2,3,4,5],
          [5,4,3,2,1]]))->all);
 }

 test_out_grads();

 sub test_detach_updated_grad
 {
     my $x = mx->nd->ones([2, 2]);
     my $dx = mx->nd->zeros_like($x);
     my $y = mx->nd->ones_like($x);
     my $dy = mx->nd->zeros_like($x);
     autograd->mark_variables([$x, $y], [$dx, $dy]);
     ok($x->_fresh_grad == 0);
     ok($y->_fresh_grad == 0);

     autograd->record(sub {
         my $x2 = $x + 2;
         my $y2  = $x2 + $y;
         $y2->backward();
     });
     ok(($dx->aspdl == 1)->all);
     ok($x->_fresh_grad == 1);
     ok($y->_fresh_grad == 1);

     $dx .= 0;
     $x->_fresh_grad(0);
     $y->_fresh_grad(0);
     ok($x->_fresh_grad == 0);
     ok($y->_fresh_grad == 0);

     autograd->record(sub {
         my $x2 = $x + 2;
         $x2 = $x2->detach;
         my $y2  = $x2 + $y;
         $y2->backward();
     });
     ok(($dx->aspdl == 0)->all);
     ok($x->_fresh_grad == 0);
     ok($y->_fresh_grad == 1);
 }

 test_detach_updated_grad();

 sub test_retain_grad
 {
     my $x = mx->nd->ones([2, 2]);
     my $dx = mx->nd->zeros([2, 2]);
     autograd->mark_variables([$x], [$dx], grad_reqs=>'add');
     autograd->record(sub {
         my $y = $x + 1;
         $y->backward(retain_graph=>0);
     });
     ok(($dx->aspdl == 1)->all);

     $dx .= 0;
     autograd->record(sub {
         my $y = $x + 1;
         $y->backward(retain_graph=>1);
         $y->backward(retain_graph=>0);
     });
     ok(($dx->aspdl == 2)->all);
     no warnings;
     open(CPERR, ">&STDERR");
     open(STDERR, ">/dev/null");
     eval {
         autograd->record(sub {
             my $y = $x + 1;
             $y->backward();
             $y->backward();
         });
     };
     open(STDERR, ">&CPERR");
     ok($@);
 }

 test_retain_grad();

 sub test_attach_grad
 {
     my $check_attach_grad = sub {
         my ($x) = @_;
         ok(not defined $x->grad);
         $x->attach_grad();
         autograd->record(sub {
             my $y = $x * 2;
             ok(not defined $y->grad);
             $y->backward;
         });
         ok(($x->grad->aspdl == 2)->all);
     };
     my $zeros = mx->nd->zeros([10, 10]);
     $check_attach_grad->($zeros);
     my @stypes = ('default', 'row_sparse', 'csr');
     for my $stype (@stypes)
     {
         my $x = $zeros->tostype($stype);
         $check_attach_grad->($x);
     }
 }

 test_attach_grad();

 sub test_is_train
 {
     my $x = mx->nd->ones([10, 10]);
     $x->attach_grad();
     autograd->record(sub {
         ok(autograd->is_recording());
         ok(autograd->is_training());
         my $y = mx->nd->Dropout($x, p=>0.5);
         ok($y->aspdl->max == 2 and $y->aspdl->min == 0);
         $y->backward();
         ok(($x->grad->aspdl == $y->aspdl)->all);
         autograd->predict_mode(sub {
             ok(autograd->is_recording());
             ok(not autograd->is_training());
             my $y = mx->nd->Dropout($x, p=>0.5);
             ok(($y->aspdl == $x->aspdl)->all);
             $y->backward(train_mode=>0);
             ok(($x->grad->aspdl == $x->aspdl)->all);
         });
     }, train_mode => 1);

     autograd->record(sub {
         ok(autograd->is_recording());
         ok(not autograd->is_training());
         my $y = mx->nd->Dropout($x, p=>0.5);
         ok(($y->aspdl == $x->aspdl)->all);
         $y->backward(train_mode=>0);
         ok(($x->grad->aspdl == $x->aspdl)->all);

         autograd->train_mode(sub {
             ok(autograd->is_recording);
             ok(autograd->is_training);
             my $y = mx->nd->Dropout($x, p=>0.5);
             ok($y->aspdl->max == 2 and $y->aspdl->min == 0);
             $y->backward;
             ok(($x->grad->aspdl == $y->aspdl)->all);
         });
     }, train_mode => 0);

     ok(not autograd->is_recording);
     ok(not autograd->is_training);
     my $y = mx->nd->Dropout($x, p=>0.5);
     ok(($y->aspdl == $x->aspdl)->all);

     autograd->train_mode(sub {
         ok(not autograd->is_recording);
         ok(autograd->is_training);
         my $y = mx->nd->Dropout($x, p=>0.5);
         ok($y->aspdl->max == 2 and $y->aspdl->min == 0);
     });
 }

 test_is_train();

 sub test_get_symbol
 {
     my $x = mx->nd->ones([1]);
     $x->attach_grad;
     my $y;
     autograd->record(sub {
         $y = $x*$x + 2*$x - 1;
     });
     ok(@{ autograd->get_symbol($y)->list_arguments } == 1);

     my $z = mx->nd->ones([1]);
     $z->attach_grad;
     autograd->record(sub {
         $y = $x*$x + 2*$z - 1;
     });
     ok(@{ autograd->get_symbol($y)->list_arguments } == 2);
 }

 test_get_symbol();

 sub test_gradient
 {
     my $x = mx->nd->ones([1]);
     $x->attach_grad;
     my $z;
     mx->autograd->record(sub {
         $z = mx->nd->elemwise_add($x->exp, $x);
     });
     my $dx = mx->autograd->grad($z, $x, create_graph=>1);
     ok(abs($dx->asscalar - 3.71828175) < 1e-7);
     $dx->backward;
     ok(abs($x->grad->asscalar - 2.71828175) < 1e-7);
 }

 test_gradient();

 sub test_grad_with_stype
 {
     my $check_grad_with_stype = sub { my ($array_stype, $grad_stype, $expected_stype) = @_;
         my $x = mx->nd->zeros([1, 1], stype=>$array_stype);
         $x->attach_grad(stype=>$grad_stype);
         # check grad attached
         ok($x->grad->stype eq $expected_stype);
         my $y = $x->detach();
         # check array detached
         ok($y->stype eq $array_stype);
     };
     my @stypes = ('default', 'csr', 'row_sparse');
     for my $stype (@stypes)
     {
         # check the default stype of the gradient (same as the array stype)
         $check_grad_with_stype->($stype, undef, $stype);
         for my $grad_stype (@stypes)
         {
             # check the stype of the gradient when provided
             $check_grad_with_stype->($stype, $grad_stype, $grad_stype);
         }
     }
 }

 test_grad_with_stype();

 sub test_sparse_dot_grad
 {
     my $check_sparse_dot_grad = sub { my ($rhs) = @_;
         my $lhs = rand_ndarray([2, 8], 'csr');
         my $y;
         mx->autograd->record(sub {
             $y = mx->nd->dot($lhs, $rhs);
         });
         $y->backward();
         my $grad = $rhs->grad;
         my $grad_pdl = $lhs->aspdl->transpose x pones($rhs->shape->[1], $lhs->shape->[0]);
         ok($grad->stype eq 'row_sparse');
         ok(almost_equal($grad->aspdl, $grad_pdl));
     };

     # check grad with row_sparse weight
     my $shape = [8, 3];
     my $rsp = mx->nd->ones($shape)->tostype('row_sparse');
     $rsp->attach_grad();
     $check_sparse_dot_grad->($rsp);

     # check grad with dense weight
     my $dns = mx->nd->ones($shape);
     $dns->attach_grad(stype=>'row_sparse');
     $check_sparse_dot_grad->($dns);
 }

 test_sparse_dot_grad();
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	use strict;
	use warnings;
	use AI::MXNet qw(mx);
	use AI::MXNet::AutoGrad qw(autograd);
	use AI::MXNet::TestUtils qw(same almost_equal rand_ndarray);
	use AI::MXNet::Base qw(:DEFAULT pones);
	use Test::More tests => 246;
	$ENV{MXNET_STORAGE_FALLBACK_LOG_VERBOSE} = 0;

	sub autograd_assert
	{
	my $kwargs = {};
	if(ref $_[-1] eq 'HASH') { $kwargs = pop(@_) };
	my @args = @_;
	my $func = $kwargs->{func};
	my $grad_f = $kwargs->{grad_func};
	my $argnum = $kwargs->{argnum};
	my $grad_func = autograd->grad_and_loss($func, $argnum);
	my ($grad_vals, $output) = $grad_func->(@args);
	my $res = $func->(@args);
	ok(same($output->aspdl, $res->aspdl));
	my $grad_res = $grad_f->(@args);
	ok(@$grad_vals == @$grad_res);
	for(zip($grad_vals, $grad_res)) {
	my ($a, $b) = @$_;
	ok(same($a->aspdl, $b->aspdl));
	}
	}

	sub test_unary_func
	{
	my $check_unary_func = sub {
	my ($x) = @_;
	my $f_exp = sub { $_[0]->exp };
	my $f_exp_grad = sub { [$_[0]->exp] };
	autograd_assert($x, { func => $f_exp, grad_func => $f_exp_grad });
	my $f_half = sub { $_[0]/2 };
	my $f_half_grad = sub { [mx->nd->ones($_[0]->shape) * 0.5] };
	autograd_assert($x, { func => $f_half, grad_func => $f_half_grad });
	my $f_square = sub { $_[0]**2 };
	my $f_square_grad = sub { [2*$_[0]] };
	autograd_assert($x, { func => $f_square, grad_func => $f_square_grad });
	};
	my $uniform = mx->nd->uniform(shape=>[4, 5]);
	$check_unary_func->($uniform);
	my $stypes = ['row_sparse', 'csr', 'default'];
	for my $stype (@$stypes)
	{
	$check_unary_func->($uniform->tostype($stype));
	}
	}

	test_unary_func();

	sub test_binary_func
	{
	my $check_binary_func = sub {
	my ($x, $y) = @_;
	my $f_add = sub { $_[0]+$_[1] };
	my $f_add_grad = sub { [map { mx->nd->ones($_->shape) } @_] };
	autograd_assert($x, $y, { func => $f_add, grad_func => $f_add_grad });
	my $f_mul = sub { $_[0]*$_[1] };
	my $f_mul_grad = sub { [reverse(@_)] };
	autograd_assert($x, $y, { func => $f_mul, grad_func => $f_mul_grad });
	my $f_compose = sub { $_[0]+$_[0]*$_[1] };
	my $f_compose_grad = sub { [mx->nd->ones($_[0]->shape) + $y, $x] };
	autograd_assert($x, $y, { func => $f_compose, grad_func => $f_compose_grad });
	};
	my $uniform_x = mx->nd->uniform(shape=>[4, 5]);
	my $uniform_y = mx->nd->uniform(shape=>[4, 5]);
	$check_binary_func->($uniform_x, $uniform_y);
	my $stypes = ['row_sparse', 'csr', 'default'];
	for my $stype_x (@$stypes)
	{
	for my $stype_y (@$stypes)
	{
	my $x = $uniform_x->tostype($stype_x);
	my $y = $uniform_y->tostype($stype_y);
	$check_binary_func->($x, $y);
	}
	}
	}

	test_binary_func();

	sub test_operator_with_state
	{
	my $f_fc = sub {
	my ($a, $b, $weight, $bias) = @_;
	my $x = $a*$b;
	my $fc = mx->nd->FullyConnected(
	$x, $weight, $bias, num_hidden=>32);
	return $fc;
	};

	my $a = mx->nd->uniform(shape=>[64, 50]);
	my $b = mx->nd->uniform(shape=>[64, 50]);
	my $weight = mx->nd->uniform(shape=>[32, 50]);
	my $bias = mx->nd->uniform(shape=>[32]);

	my $grad_func = autograd->grad_and_loss($f_fc);
	my ($grad_vals, $outputs) = $grad_func->($a, $b, $weight, $bias);
	}

	test_operator_with_state();

	sub test_argnum
	{
	my $f_with_mode = sub {
	my ($a, $b, $mode) = @_;
	if($mode)
	{
	return $a+$b;
	}
	else
	{
	return $a*$b;
	}
	};
	my $a = mx->nd->uniform(shape=>[3, 2]);
	my $b = mx->nd->uniform(shape=>[3, 2]);
	my $f_add_grad = sub { [map { mx->nd->ones($_->shape) } @_[0,1]] };
	my $f_mul_grad = sub { [reverse(@_[0,1])] };
	autograd_assert($a, $b, 1,
	{ argnum=>[0, 1], func=>$f_with_mode, grad_func=>$f_add_grad });
	autograd_assert($a, $b, 0,
	{ argnum=>[0, 1], func=>$f_with_mode, grad_func=>$f_mul_grad });
	}

	test_argnum();

	sub test_training
	{
	my $x = mx->nd->ones([10, 10]);
	autograd->record(sub {
	my $y = mx->nd->Dropout($x, p=>0.5);
	ok(not ($y->aspdl == $x->aspdl)->all);
	autograd->pause(sub {
	my $y = mx->nd->Dropout($x, p=>0.5);
	ok(($y->aspdl == $x->aspdl)->all);
	});
	});
	}

	test_training();

	sub test_out_grads
	{
	my $x = mx->nd->ones([3, 5]);
	my $dx = mx->nd->zeros_like($x);
	autograd->mark_variables([$x], [$dx]);
	my $da;
	my $db = mx->nd->array([1,2,3,4,5]);
	my $dc = mx->nd->array([5,4,3,2,1]);

	autograd->record(sub {
	my ($a, $b, $c) = @{ $x };
	autograd->backward([$a, $b, $c], head_grads => [$da, $db, $dc]);
	});
	ok(($dx->aspdl == pdl(
	[[1,1,1,1,1],
	[1,2,3,4,5],
	[5,4,3,2,1]]))->all);
	}

	test_out_grads();

	sub test_detach_updated_grad
	{
	my $x = mx->nd->ones([2, 2]);
	my $dx = mx->nd->zeros_like($x);
	my $y = mx->nd->ones_like($x);
	my $dy = mx->nd->zeros_like($x);
	autograd->mark_variables([$x, $y], [$dx, $dy]);
	ok($x->_fresh_grad == 0);
	ok($y->_fresh_grad == 0);

	autograd->record(sub {
	my $x2 = $x + 2;
	my $y2 = $x2 + $y;
	$y2->backward();
	});
	ok(($dx->aspdl == 1)->all);
	ok($x->_fresh_grad == 1);
	ok($y->_fresh_grad == 1);

	$dx .= 0;
	$x->_fresh_grad(0);
	$y->_fresh_grad(0);
	ok($x->_fresh_grad == 0);
	ok($y->_fresh_grad == 0);

	autograd->record(sub {
	my $x2 = $x + 2;
	$x2 = $x2->detach;
	my $y2 = $x2 + $y;
	$y2->backward();
	});
	ok(($dx->aspdl == 0)->all);
	ok($x->_fresh_grad == 0);
	ok($y->_fresh_grad == 1);
	}

	test_detach_updated_grad();

	sub test_retain_grad
	{
	my $x = mx->nd->ones([2, 2]);
	my $dx = mx->nd->zeros([2, 2]);
	autograd->mark_variables([$x], [$dx], grad_reqs=>'add');
	autograd->record(sub {
	my $y = $x + 1;
	$y->backward(retain_graph=>0);
	});
	ok(($dx->aspdl == 1)->all);

	$dx .= 0;
	autograd->record(sub {
	my $y = $x + 1;
	$y->backward(retain_graph=>1);
	$y->backward(retain_graph=>0);
	});
	ok(($dx->aspdl == 2)->all);
	no warnings;
	open(CPERR, ">&STDERR");
	open(STDERR, ">/dev/null");
	eval {
	autograd->record(sub {
	my $y = $x + 1;
	$y->backward();
	$y->backward();
	});
	};
	open(STDERR, ">&CPERR");
	ok($@);
	}

	test_retain_grad();

	sub test_attach_grad
	{
	my $check_attach_grad = sub {
	my ($x) = @_;
	ok(not defined $x->grad);
	$x->attach_grad();
	autograd->record(sub {
	my $y = $x * 2;
	ok(not defined $y->grad);
	$y->backward;
	});
	ok(($x->grad->aspdl == 2)->all);
	};
	my $zeros = mx->nd->zeros([10, 10]);
	$check_attach_grad->($zeros);
	my @stypes = ('default', 'row_sparse', 'csr');
	for my $stype (@stypes)
	{
	my $x = $zeros->tostype($stype);
	$check_attach_grad->($x);
	}
	}

	test_attach_grad();

	sub test_is_train
	{
	my $x = mx->nd->ones([10, 10]);
	$x->attach_grad();
	autograd->record(sub {
	ok(autograd->is_recording());
	ok(autograd->is_training());
	my $y = mx->nd->Dropout($x, p=>0.5);
	ok($y->aspdl->max == 2 and $y->aspdl->min == 0);
	$y->backward();
	ok(($x->grad->aspdl == $y->aspdl)->all);
	autograd->predict_mode(sub {
	ok(autograd->is_recording());
	ok(not autograd->is_training());
	my $y = mx->nd->Dropout($x, p=>0.5);
	ok(($y->aspdl == $x->aspdl)->all);
	$y->backward(train_mode=>0);
	ok(($x->grad->aspdl == $x->aspdl)->all);
	});
	}, train_mode => 1);

	autograd->record(sub {
	ok(autograd->is_recording());
	ok(not autograd->is_training());
	my $y = mx->nd->Dropout($x, p=>0.5);
	ok(($y->aspdl == $x->aspdl)->all);
	$y->backward(train_mode=>0);
	ok(($x->grad->aspdl == $x->aspdl)->all);

	autograd->train_mode(sub {
	ok(autograd->is_recording);
	ok(autograd->is_training);
	my $y = mx->nd->Dropout($x, p=>0.5);
	ok($y->aspdl->max == 2 and $y->aspdl->min == 0);
	$y->backward;
	ok(($x->grad->aspdl == $y->aspdl)->all);
	});
	}, train_mode => 0);

	ok(not autograd->is_recording);
	ok(not autograd->is_training);
	my $y = mx->nd->Dropout($x, p=>0.5);
	ok(($y->aspdl == $x->aspdl)->all);

	autograd->train_mode(sub {
	ok(not autograd->is_recording);
	ok(autograd->is_training);
	my $y = mx->nd->Dropout($x, p=>0.5);
	ok($y->aspdl->max == 2 and $y->aspdl->min == 0);
	});
	}

	test_is_train();

	sub test_get_symbol
	{
	my $x = mx->nd->ones([1]);
	$x->attach_grad;
	my $y;
	autograd->record(sub {
	$y = $x$x + 2$x - 1;
	});
	ok(@{ autograd->get_symbol($y)->list_arguments } == 1);

	my $z = mx->nd->ones([1]);
	$z->attach_grad;
	autograd->record(sub {
	$y = $x$x + 2$z - 1;
	});
	ok(@{ autograd->get_symbol($y)->list_arguments } == 2);
	}

	test_get_symbol();

	sub test_gradient
	{
	my $x = mx->nd->ones([1]);
	$x->attach_grad;
	my $z;
	mx->autograd->record(sub {
	$z = mx->nd->elemwise_add($x->exp, $x);
	});
	my $dx = mx->autograd->grad($z, $x, create_graph=>1);
	ok(abs($dx->asscalar - 3.71828175) < 1e-7);
	$dx->backward;
	ok(abs($x->grad->asscalar - 2.71828175) < 1e-7);
	}

	test_gradient();

	sub test_grad_with_stype
	{
	my $check_grad_with_stype = sub { my ($array_stype, $grad_stype, $expected_stype) = @_;
	my $x = mx->nd->zeros([1, 1], stype=>$array_stype);
	$x->attach_grad(stype=>$grad_stype);
	# check grad attached
	ok($x->grad->stype eq $expected_stype);
	my $y = $x->detach();
	# check array detached
	ok($y->stype eq $array_stype);
	};
	my @stypes = ('default', 'csr', 'row_sparse');
	for my $stype (@stypes)
	{
	# check the default stype of the gradient (same as the array stype)
	$check_grad_with_stype->($stype, undef, $stype);
	for my $grad_stype (@stypes)
	{
	# check the stype of the gradient when provided
	$check_grad_with_stype->($stype, $grad_stype, $grad_stype);
	}
	}
	}

	test_grad_with_stype();

	sub test_sparse_dot_grad
	{
	my $check_sparse_dot_grad = sub { my ($rhs) = @_;
	my $lhs = rand_ndarray([2, 8], 'csr');
	my $y;
	mx->autograd->record(sub {
	$y = mx->nd->dot($lhs, $rhs);
	});
	$y->backward();
	my $grad = $rhs->grad;
	my $grad_pdl = $lhs->aspdl->transpose x pones($rhs->shape->[1], $lhs->shape->[0]);
	ok($grad->stype eq 'row_sparse');
	ok(almost_equal($grad->aspdl, $grad_pdl));
	};

	# check grad with row_sparse weight
	my $shape = [8, 3];
	my $rsp = mx->nd->ones($shape)->tostype('row_sparse');
	$rsp->attach_grad();
	$check_sparse_dot_grad->($rsp);

	# check grad with dense weight
	my $dns = mx->nd->ones($shape);
	$dns->attach_grad(stype=>'row_sparse');
	$check_sparse_dot_grad->($dns);
	}

	test_sparse_dot_grad();