blob: 3b1130af4ecf367a2ca873805122986d85089181 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
use strict;
use warnings;
use Test::More tests => 30;
use AI::MXNet qw(mx);
use AI::MXNet::Gluon qw(gluon);
use AI::MXNet::Gluon::NN qw(nn);
use AI::MXNet::TestUtils qw(almost_equal dies_ok);
use Scalar::Util qw(refaddr);
use AI::MXNet::Base;
$ENV{MXNET_STORAGE_FALLBACK_LOG_VERBOSE} = 0;
$ENV{MXNET_SUBGRAPH_VERBOSE} = 0;
sub test_multi_trainer
{
my $x = gluon->Parameter('x', shape=>[10], stype=>'row_sparse');
$x->initialize();
# test set trainer
my $trainer0 = gluon->Trainer([$x], 'sgd');
ok(refaddr($x->_trainer) == refaddr($trainer0));
# test unset trainer
$x->_set_trainer(undef);
ok(not defined $x->_trainer);
$x->_set_trainer($trainer0);
# multiple trainers for a sparse Parameter are not allowed
dies_ok(sub { gluon->Trainer([$x], 'sgd') });
}
sub test_trainer
{
my $dict_equ = sub { my ($a, $b) = @_;
is_deeply({ map { $_ => 1 } keys %$a }, { map { $_ => 1 } keys %$b });
for my $k (keys %$a)
{
ok(($a->{$k}->aspdl == $b->{$k}->aspdl)->all);
}
};
my $x = gluon->Parameter('x', shape=>[10]);
$x->initialize(ctx=>[mx->cpu(0), mx->cpu(1)], init=>'zeros');
my $trainer = gluon->Trainer([$x], 'sgd', {'learning_rate'=> 1.0, 'momentum'=> 0.5});
my $y;
mx->autograd->record(sub {
for my $w (@{ $x->list_data() })
{
$y = $w + 1;
$y->backward();
}
});
$trainer->step(1);
ok(($x->data(mx->cpu(1))->aspdl == -2)->all);
$x->lr_mult(0.5);
mx->autograd->record(sub {
for my $w (@{ $x->list_data() })
{
$y = $w + 1;
$y->backward();
}
});
$trainer->step(1);
ok(($x->data(mx->cpu(1))->aspdl == -4)->all);
$trainer->save_states('test_trainer.states');
my $states;
if($trainer->update_on_kvstore)
{
$states = { %{ $trainer->kvstore->_updater->states } };
}
else
{
$states = { %{ $trainer->_updaters->[0]->states } };
}
$trainer->load_states('test_trainer.states');
if($trainer->update_on_kvstore)
{
$dict_equ->($trainer->kvstore->_updater->states, $states);
ok($trainer->_optimizer eq $trainer->kvstore->_updater->optimizer);
}
else
{
for my $updater (@{ $trainer->_updaters })
{
$dict_equ->($updater->states, $states);
}
ok($trainer->_optimizer eq $trainer->_updaters->[0]->optimizer);
}
dies_ok(sub { $trainer->update(1 ) });
dies_ok(sub { $trainer->allreduce_grads() });
$x = gluon->Parameter('x', shape=>[10]);
$x->initialize(ctx=>[mx->cpu(0), mx->cpu(1)], init=>'zeros');
my $trainer2 = gluon->Trainer([$x], 'sgd', {learning_rate => 1.0, momentum => 0.5},
update_on_kvstore=>0);
mx->autograd->record(sub {
for(enumerate($x->list_data))
{
my ($i, $w) = @$_;
my $y = $i*$w;
$y->backward;
}
});
ok(($x->grad(mx->cpu(0))->aspdl != $x->grad(mx->cpu(1))->aspdl)->all);
$trainer2->allreduce_grads;
ok(($x->grad(mx->cpu(0))->aspdl == $x->grad(mx->cpu(1))->aspdl)->all);
$trainer2->update(1);
ok(($x->data(mx->cpu(1))->aspdl == -1)->all);
}
test_trainer();
sub test_trainer_sparse_save_load
{
my $x = gluon->Parameter('x', shape=>[10, 1], lr_mult=>1.0, stype=>'row_sparse');
$x->initialize(ctx=>[mx->cpu(0)], init=>'zeros');
my $trainer = gluon->Trainer([$x], 'sgd', {learning_rate => 0.1});
my $all_rows = mx->nd->arange(start => 0, stop => 10, ctx => mx->cpu(0));
mx->autograd->record(sub {
for my $w (@{ $x->list_row_sparse_data($all_rows) })
{
my $y = $w * 1;
$y->backward();
}
});
$trainer->step(1);
ok($trainer->kvstore->_updater->optimizer->_get_lr(0) == 0.1);
$trainer->save_states('test_trainer_save_load.states');
$trainer->load_states('test_trainer_save_load.states');
$x->lr_mult(2.0);
# check if parameter dict is correctly associated with optimizer after load_state
ok($trainer->kvstore->_updater->optimizer->_get_lr(0) == 0.2);
}
test_trainer_sparse_save_load();
sub test_trainer_multi_layer_init
{
local($ENV{MXNET_STORAGE_FALLBACK_LOG_VERBOSE}) = 0;
package Net {
use AI::MXNet::Gluon::Mouse;
extends 'AI::MXNet::Gluon::Block';
use AI::MXNet::Function::Parameters;
sub BUILD {
my $self = shift;
$self->name_scope(sub {
# sparse param
$self->embed_weight($self->params->get('embed_weight', stype=>'row_sparse',
shape=>[4,3], grad_stype=>'row_sparse'));
# dense param from a hybrid block
$self->dense0(nn->Dense(2));
});
}
method forward($x)
{
my $embed_weight = $self->embed_weight->row_sparse_data($x);
my $embed = mx->nd->Embedding(data=>$x, weight=>$embed_weight,
input_dim=>4, output_dim=>3, sparse_grad=>1);
return $self->dense0->($embed);
}
};
my $check_init = sub { my ($ctxes) = @_;
my $net = Net->new(prefix=>'net_');
$net->initialize(mx->init->One(), ctx=>$ctxes);
my $trainer = gluon->Trainer($net->collect_params(), 'sgd', {learning_rate => 1});
my $data = mx->nd->array([[0,2], [1,2]]);
my $xs = gluon->utils->split_and_load($data, ctx_list => $ctxes);
my @ys;
mx->autograd->record(sub {
for my $x (@{ $xs })
{
my $y = $net->($x);
push @ys, $y;
}
});
for my $y (@ys)
{
$y->backward;
}
$trainer->step(1);
# all parameters should be initialized
ok(not @{ $trainer->_params_to_init });
my $all_rows = mx->nd->arange(start => 0, stop => 4, ctx=>mx->cpu(1));
# check the updated weights
my $weight = $net->embed_weight->row_sparse_data($all_rows)->aspdl;
ok(($weight->at(0) == -1)->all);
ok(($weight->at(1) == -1)->all);
ok(($weight->at(2) == -3)->all);
ok(($weight->at(3) == 1)->all);
};
$check_init->([mx->cpu(1), mx->cpu(2)]);
$check_init->([mx->cpu(1)]);
}
test_trainer_multi_layer_init();
sub test_trainer_reset_kv
{
my $check_trainer_reset_kv = sub { my ($kv) = @_;
my $params = gluon->ParameterDict();
my $x = $params->get('x', shape=>[10], lr_mult=>1.0);
$params->initialize(ctx=>[mx->cpu(0), mx->cpu(1)], init=>'zeros');
my $trainer = gluon->Trainer($params, 'sgd', {learning_rate => 0.1}, kvstore=>$kv);
$params->save('test_trainer_reset_kv.params');
mx->autograd->record(sub {
for my $w (@{ $x->list_data })
{
my $y = $w + 1;
$y->backward;
}
});
$trainer->step(1);
is($trainer->kvstore->type, $kv);
# load would reset kvstore
$params->load('test_trainer_reset_kv.params', ctx => [mx->cpu(0), mx->cpu(1)]);
ok(not defined $trainer->kvstore);
ok (defined $trainer->_kv_initialized and not $trainer->_kv_initialized);
mx->autograd->record(sub {
for my $w (@{ $x->list_data })
{
my $y = $w + 1;
$y->backward;
}
});
$trainer->step(1);
# the updated parameter should be based on the loaded checkpoint
ok(($x->data(mx->cpu()) == -0.2)->aspdl->all);
};
my @kvs = ('local', 'device');
for my $kv (@kvs)
{
$check_trainer_reset_kv->($kv);
}
}
test_trainer_reset_kv();