| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| use strict; |
| use warnings; |
| use Test::More tests => 428; |
| use AI::MXNet qw(mx); |
| use AI::MXNet::Base; |
| use AI::MXNet::TestUtils qw(almost_equal enumerate same_array dies_like rand_ndarray); |
| $ENV{MXNET_STORAGE_FALLBACK_LOG_VERBOSE} = 0; |
| |
| sub test_module_layout |
| { |
| my $sym = mx->sym->Variable('data'); |
| $sym = mx->sym->Activation(data=>$sym, act_type=>'relu', __layout__=>'TNC'); |
| |
| my $dshape = [3, 8, 7]; |
| my $mod = mx->mod->Module( |
| $sym, |
| data_names=>['data'], |
| context=>[mx->cpu(0), mx->cpu(1)] |
| ); |
| $mod->bind( |
| data_shapes=>[mx->io->DataDesc('data', $dshape, layout=>'TNC')] |
| ); |
| $mod->init_params(); |
| $mod->forward( |
| mx->io->DataBatch( |
| data=>[mx->nd->ones($dshape)] |
| ), |
| is_train => 1 |
| ); |
| $mod->backward([mx->nd->ones($dshape)]); |
| is_deeply($mod->get_outputs()->[0]->shape, $dshape); |
| |
| my $hdshape = [3, 4, 7]; |
| for my $x (@{ $mod->get_outputs(0)->[0] }) |
| { |
| is_deeply($x->shape, $hdshape); |
| } |
| } |
| |
| sub test_save_load |
| { |
| my $dict_equ = sub { |
| is_deeply([sort keys %$a], [sort keys %$b]); |
| for my $k (keys %$a) |
| { |
| ok(($a->{$k}->aspdl == $b->{$k}->aspdl)->all); |
| } |
| }; |
| my $sym = mx->sym->Variable('data'); |
| $sym = mx->sym->FullyConnected($sym, num_hidden=>100); |
| |
| # single device |
| my $mod = mx->mod->Module($sym, data_names=>['data']); |
| $mod->bind(data_shapes=>[['data', [10, 10]]]); |
| $mod->init_params(); |
| $mod->init_optimizer(optimizer_params=>{learning_rate => 0.1, momentum => 0.9}); |
| $mod->update(); |
| $mod->save_checkpoint('test', 0, 1); |
| |
| my $mod2 = mx->mod->Module->load('test', 0, 1, data_names=>['data']); |
| $mod2->bind(data_shapes=>[['data', [10, 10]]]); |
| $mod2->init_optimizer(optimizer_params=>{learning_rate => 0.1, momentum => 0.9}); |
| is($mod->_symbol->tojson(), $mod2->_symbol->tojson()); |
| $dict_equ->(($mod->get_params())[0], ($mod2->get_params())[0]); |
| $dict_equ->($mod->_updater->states, $mod2->_updater->states); |
| |
| # multi device |
| $mod = mx->mod->Module($sym, data_names=>['data'], context=>[mx->cpu(0), mx->cpu(1)]); |
| $mod->bind(data_shapes=>[['data', [10, 10]]]); |
| $mod->init_params(); |
| $mod->init_optimizer(optimizer_params=>{learning_rate => 0.1, momentum => 0.9}); |
| $mod->update(); |
| $mod->save_checkpoint('test', 0, 1); |
| |
| $mod2 = mx->mod->Module->load('test', 0, 1, data_names=>['data']); |
| $mod2->bind(data_shapes=>[['data', [10, 10]]]); |
| $mod2->init_optimizer(optimizer_params=>{learning_rate => 0.1, momentum => 0.9}); |
| is($mod->_symbol->tojson(), $mod2->_symbol->tojson()); |
| $dict_equ->(($mod->get_params())[0], ($mod2->get_params())[0]); |
| $dict_equ->($mod->_kvstore->_updater->states, $mod2->_updater->states); |
| unlink('test-0000.params'); |
| unlink('test-0000.states'); |
| unlink('test-symbol.json'); |
| } |
| |
| |
| sub test_module_reshape |
| { |
| my $data = mx->sym->Variable('data'); |
| my $sym = mx->sym->FullyConnected($data, num_hidden=>20, name=>'fc'); |
| |
| my $dshape = [7, 20]; |
| my $mod = mx->mod->Module($sym, data_names=>['data'], context=>[mx->cpu(0), mx->cpu(1)]); |
| $mod->bind(data_shapes=>[['data', $dshape]]); |
| $mod->init_params(); |
| $mod->init_optimizer(optimizer_params=>{learning_rate => 1}); |
| |
| $mod->forward( |
| mx->io->DataBatch( |
| data=>[mx->nd->ones($dshape)] |
| ), |
| is_train => 1 |
| ); |
| $mod->backward([mx->nd->ones($dshape)]); |
| $mod->update(); |
| is_deeply($mod->get_outputs()->[0]->shape, $dshape); |
| ok((($mod->get_params())[0]{fc_bias}->aspdl == -1)->all); |
| |
| $dshape = [14, 20]; |
| $mod->reshape(data_shapes=>[['data', $dshape]]); |
| $mod->forward( |
| mx->io->DataBatch( |
| data=>[mx->nd->ones($dshape)] |
| ), |
| is_train => 1 |
| ); |
| $mod->backward([mx->nd->ones($dshape)]); |
| $mod->update(); |
| is_deeply($mod->get_outputs()->[0]->shape, $dshape); |
| ok((($mod->get_params())[0]{fc_bias}->aspdl == -3)->all); |
| } |
| |
| |
| sub test_module_states |
| { |
| my $stack = mx->rnn->SequentialRNNCell(); |
| for my $i (0..1) |
| { |
| $stack->add(mx->rnn->LSTMCell(num_hidden=>20, prefix=>"lstm_l${i}_")); |
| } |
| my $begin_state = $stack->begin_state(func=>mx->sym->can('Variable')); |
| my (undef, $states) = $stack->unroll(10, begin_state=>$begin_state, inputs=>mx->sym->Variable('data')); |
| |
| my $state_names = [map { $_->name } @$begin_state]; |
| my $mod = mx->mod->Module( |
| mx->sym->Group($states), context=>[mx->cpu(0), mx->cpu(1)], |
| state_names=>$state_names |
| ); |
| $mod->bind(data_shapes=>[['data', [5, 10]]], for_training=>0); |
| $mod->init_params(); |
| my $batch = mx->io->DataBatch(data=>[mx->nd->zeros([5, 10])], label=>[]); |
| |
| $mod->set_states(value=>1); |
| $mod->forward($batch); |
| my $out = $mod->get_outputs(0); |
| my $out1 = $mod->get_outputs(1); |
| |
| $mod->set_states(states=>$out); |
| $mod->forward($batch); |
| my $out2 = $mod->get_outputs(1); |
| |
| for(zip($out1, $out2)) { |
| my ($x1, $x2) = @$_; |
| ok(not almost_equal($x1->aspdl, $x2->aspdl, 1e-3)); |
| } |
| } |
| |
| sub test_module_switch_bucket |
| { |
| my $vocab_dim = 5000; |
| my $num_hidden = 100; |
| my $num_embedding = 100; |
| my $num_layer = 2; |
| my $default_key = 10; |
| my $test_key = 5; |
| my $batch_size = 32; |
| my $contexts = [mx->cpu(0)]; |
| my $initializer = mx->init->Xavier(factor_type=>"in", magnitude=>2.34); |
| |
| #generate symbols for an LSTM network |
| my $gen_sym = sub { |
| my $seq_len = shift; |
| my $data = mx->sym->Variable('data'); |
| my $label = mx->sym->Variable('softmax_label'); |
| my $embed = mx->sym->Embedding(data=>$data, input_dim=>$vocab_dim, |
| output_dim=>$num_embedding, name=>'embed'); |
| my $stack = mx->rnn->SequentialRNNCell(); |
| for my $i (0..$num_layer-1) |
| { |
| $stack->add(mx->rnn->LSTMCell(num_hidden=>$num_hidden, prefix=>"lstm_l${i}_")); |
| } |
| my ($outputs, $states) = $stack->unroll($seq_len, inputs=>$embed, merge_outputs=>1); |
| |
| my $pred = mx->sym->Reshape($outputs, shape=>[-1, $num_hidden]); |
| $pred = mx->sym->FullyConnected(data=>$pred, num_hidden=>$vocab_dim, name=>'pred'); |
| |
| $label = mx->sym->Reshape($label, shape=>[-1]); |
| $pred = mx->sym->SoftmaxOutput(data=>$pred, label=>$label, name=>'softmax'); |
| |
| return ($pred, ['data'], ['softmax_label']); |
| }; |
| my $create_bucketing_module = sub { my $key = shift; |
| my $model = mx->mod->BucketingModule( |
| sym_gen => $gen_sym, |
| default_bucket_key => $key, |
| context => $contexts |
| ); |
| $model->bind(data_shapes=>[['data', [$batch_size, $key]]], |
| label_shapes=>[['softmax_label', [$batch_size, $key]]] |
| ); |
| $model->init_params(initializer=>$initializer); |
| return $model; |
| }; |
| #initialize the bucketing module with the default bucket key |
| my $bucketing_model = $create_bucketing_module->($default_key); |
| #switch to test_key |
| $bucketing_model->switch_bucket( |
| bucket_key => $test_key, |
| data_shapes => [['data', [$batch_size, $test_key]]], |
| label_shapes => [['softmax_label', [$batch_size, $test_key]]] |
| ); |
| |
| delete $bucketing_model->_buckets->{$test_key}; |
| |
| $bucketing_model->switch_bucket( |
| bucket_key => $test_key, |
| data_shapes => [['data', [$batch_size, $test_key]]], |
| label_shapes => [['softmax_label', [$batch_size, $test_key]]] |
| ); |
| } |
| |
| sub test_monitor |
| { |
| mx->random->seed(11); |
| my $data = mx->nd->array([[0.05, .10]]); |
| my $label = mx->nd->array([[.01, 0.99]]); |
| my $train_data = mx->io->NDArrayIter($data, label => $label, batch_size=>1); |
| |
| # symbols |
| my $x = mx->symbol->Variable('data'); |
| $x = mx->symbol->FullyConnected(name=>'fc_0', data=>$x, num_hidden=>2); |
| $x = mx->symbol->Activation(name=>"act_0", data=>$x, act_type=>'sigmoid'); |
| $x = mx->symbol->FullyConnected(name=>'fc_1', data=>$x, num_hidden=>2); |
| $x = mx->symbol->Activation(name=>"act_1", data=>$x, act_type=>'sigmoid'); |
| $x = mx->symbol->LinearRegressionOutput(data=>$x, name=>'softmax', grad_scale=>2); |
| |
| # create monitor |
| my $mean_abs = sub { my ($x) = @_; |
| return $x->abs->sum/$x->size; |
| }; |
| my $mon = mx->mon->Monitor(1, stat_func=>$mean_abs, pattern=>'.*', sort=>1); |
| |
| # create module |
| my $mod = mx->mod->Module($x, context=>[mx->cpu()]); |
| $mod->bind(data_shapes=>$train_data->provide_data, label_shapes=>$train_data->provide_label, |
| for_training=>1); |
| $mod->install_monitor($mon); |
| my $arg_params = {fc_0_weight => mx->nd->array([[.15, .20], [.25, .30]]), |
| fc_0_bias => mx->nd->array([.35, .35]), |
| fc_1_weight => mx->nd->array([[.40, .45], [.50, .55]]), |
| fc_1_bias => mx->nd->array([.60, .60])}; |
| $mod->init_params(arg_params=>$arg_params); |
| |
| my $data_batch = <$train_data>; |
| $mon->tic(); |
| $mod->forward_backward($data_batch); |
| my $res = $mon->toc(); |
| my $keys = ['act_0', 'act_1', 'data', 'fc_0', 'fc_1', 'softmax']; |
| my $mon_result_counts = [0, 0, 0, 0, 0, 0]; |
| ok(@$res == 21); |
| for my $r (@$res) |
| { |
| my ($n, $k, $v) = @$r; |
| enumerate(sub { |
| my ($idx, $key) = @_; |
| if($k =~ /^$key/) |
| { |
| $mon_result_counts->[$idx] += 1; |
| return; |
| } |
| }, $keys); |
| } |
| is_deeply($mon_result_counts, [2, 2, 1, 6, 6, 4]); |
| } |
| |
| sub test_module_dtype |
| { |
| my $dtype = 'float16'; |
| my $dshape = [3, 8, 7]; |
| |
| my $sym = mx->sym->Variable('data'); |
| $sym = mx->sym->Activation(data=>$sym, act_type=>'relu', __layout__=>'TNC'); |
| |
| my $mod = mx->mod->Module($sym, data_names=>['data'], context => [mx->cpu(0), mx->cpu(1)]); |
| $mod->bind(data_shapes=>[ |
| mx->io->DataDesc('data', $dshape, dtype => $dtype, layout=>'TNC') |
| ]); |
| $mod->init_params(); |
| $mod->forward( |
| mx->io->DataBatch( |
| data=>[mx->nd->ones($dshape, dtype=>$dtype)] |
| ) |
| ); |
| $mod->backward([mx->nd->ones($dshape, dtype=>$dtype)]); |
| |
| for my $x (@{ $mod->get_outputs() }) |
| { |
| is($x->dtype, $dtype); |
| } |
| } |
| |
| sub test_module_input_grads |
| { |
| my $a = mx->sym->Variable('a', __layout__=>'NC'); |
| my $b = mx->sym->Variable('b', __layout__=>'NC'); |
| my $c = mx->sym->Variable('c', __layout__=>'NC'); |
| |
| $c = $a + 2 * $b + 3 * $c; |
| my $net = mx->mod->Module( |
| $c, data_names=>['b', 'c', 'a'], |
| context=>[mx->cpu(0), mx->cpu(1)] |
| ); |
| $net->bind( |
| data_shapes => [['b', [5, 5]], ['c', [5, 5]], ['a', [5, 5]]], |
| inputs_need_grad => 1 |
| ); |
| $net->init_params(); |
| |
| $net->forward( |
| mx->io->DataBatch(data => [ |
| mx->nd->ones([5, 5]), |
| mx->nd->ones([5, 5]), |
| mx->nd->ones([5, 5]) |
| ]) |
| ); |
| $net->backward([mx->nd->ones([5, 5])]); |
| my $input_grads = $net->get_input_grads(); |
| my $b_grad = $input_grads->[0]->aspdl; |
| my $c_grad = $input_grads->[1]->aspdl; |
| my $a_grad = $input_grads->[2]->aspdl; |
| ok(($a_grad == 1)->all); |
| ok(($b_grad == 2)->all); |
| ok(($c_grad == 3)->all); |
| } |
| |
| sub test_executor_group |
| { |
| my $get_rnn_sym = sub { my ($num_layers, $num_words, $num_hidden, $num_embed, $seq_len, $sparse_embedding) = @_; |
| my $stack = mx->rnn->SequentialRNNCell(); |
| for my $i (0..$num_layers-1) |
| { |
| $stack->add(mx->rnn->LSTMCell(num_hidden=>$num_hidden, prefix=>"lstm_l${i}_")); |
| } |
| my $data = mx->sym->Variable('data'); |
| my $label = mx->sym->Variable('softmax_label'); |
| my $embed; |
| if($sparse_embedding) |
| { |
| my $embed_weight = mx->sym->Variable('embed_weight', stype=>'row_sparse'); |
| $embed = mx->sym->contrib->SparseEmbedding(data=>$data, input_dim=>$num_words, |
| weight=>$embed_weight, output_dim=>$num_embed, |
| name=>'embed'); |
| |
| } |
| else |
| { |
| $embed = mx->sym->Embedding(data=>$data, input_dim=>$num_words, |
| output_dim=>$num_embed, name=>'embed'); |
| } |
| |
| $stack->reset(); |
| my ($outputs, $states) = $stack->unroll($seq_len, inputs=>$embed, merge_outputs=>1); |
| |
| my $pred = mx->sym->Reshape($outputs, shape=>[-1, $num_hidden]); |
| $pred = mx->sym->FullyConnected(data=>$pred, num_hidden=>$num_words, name=>'pred'); |
| |
| $label = mx->sym->Reshape($label, shape=>[-1]); |
| $pred = mx->sym->SoftmaxOutput(data=>$pred, label=>$label, name=>'softmax'); |
| return $pred; |
| }; |
| |
| my $test_shared_exec_group = sub { my ($exec_grp_shared, $exec_grp_created, $shared_arg_names, $extra_args, $check_grads) = @_; |
| # Test shared data arrays |
| for my $i (0..@{ $exec_grp_shared->execs }-1) |
| { |
| # test same shared_data_arrays for two exec groups |
| my $shared_data_array1 = $exec_grp_shared->shared_data_arrays->[$i]; |
| my $shared_data_array2 = $exec_grp_created->shared_data_arrays->[$i]; |
| if(defined $extra_args) |
| { |
| ok(keys(%$shared_data_array1) == @$extra_args); |
| } |
| ok(keys(%$shared_data_array1) == keys(%$shared_data_array2)); |
| while(my ($k, $v) = each %{ $shared_data_array1 }) |
| { |
| if(defined $extra_args) |
| { |
| ok(grep { $_ eq $k } @$extra_args); |
| } |
| ok(exists $shared_data_array2->{$k}); |
| ok(same_array($v, $shared_data_array2->{$k})); |
| } |
| # Test shared argument arrays and gradient arrays |
| my $exec_shared = $exec_grp_shared->execs->[$i]; |
| my $exec_created = $exec_grp_created->execs->[$i]; |
| if(defined $shared_arg_names) |
| { |
| # test shared arguments |
| for my $arg_name (@$shared_arg_names) |
| { |
| ok(exists $exec_created->arg_dict->{$arg_name}); |
| ok(same_array($exec_shared->arg_dict->{$arg_name}, $exec_created->arg_dict->{$arg_name})); |
| } |
| # test shared argument gradients |
| for my $arg_name (@$shared_arg_names) |
| { |
| if($check_grads) |
| { |
| ok(exists $exec_created->grad_dict->{$arg_name}); |
| ok(same_array($exec_shared->grad_dict->{$arg_name}, $exec_created->grad_dict->{$arg_name})); |
| } |
| } |
| } |
| my $grad_req = $exec_grp_shared->grad_req; |
| while(my ($arg_name, $grad) = each %{ $grad_req }) |
| { |
| ok($grad eq $exec_grp_created->grad_req->{$arg_name}); |
| } |
| } |
| }; |
| |
| for my $sparse_embedding (0, 1) |
| { |
| my $contexts = [mx->cpu(0), mx->cpu(1)]; |
| my $workload = [(1) x scalar(@$contexts)]; |
| my $batch_size = 32; |
| my $max_bucket_size = 80; |
| my $num_words = 1000; |
| my $num_hidden = 100; |
| my $num_embed = 200; |
| my $data_shapes = [['data', [$batch_size, $max_bucket_size]]]; |
| my $label_shapes = [['softmax_label', [$batch_size, $max_bucket_size]]]; |
| |
| # generate an rnn sym with #layers=5 |
| my $sym = $get_rnn_sym->(3, $num_words, $num_hidden, |
| $num_embed, $max_bucket_size, $sparse_embedding); |
| my $arg_names1 = $sym->list_arguments(); |
| my $input_names = ['data', 'softmax_label']; |
| my $shared_arg_names = [grep { !/^(?:data|softmax_label)$/ } @$arg_names1]; |
| my $exec_group1 = AI::MXNet::DataParallelExecutorGroup->new( |
| symbol=>$sym, contexts=>$contexts, |
| workload=>$workload, data_shapes=>$data_shapes, |
| label_shapes=>$label_shapes, param_names=>$shared_arg_names, |
| for_training=>1, inputs_need_grad=>0 |
| ); |
| # shared_data_arrays should only have input "data" and "softmax_label" arrays |
| for my $i (0..@{$contexts}-1) |
| { |
| ok(keys(%{$exec_group1->shared_data_arrays->[$i]}) == @$input_names); |
| for my $name (@$input_names) |
| { |
| ok(exists $exec_group1->shared_data_arrays->[$i]->{$name}); |
| } |
| } |
| # generate an rnn sym with #layers=5 |
| $sym = $get_rnn_sym->(5, $num_words, $num_hidden, |
| $num_embed, $max_bucket_size, $sparse_embedding); |
| my $arg_names2 = $sym->list_arguments(); |
| my $exec_group2 = AI::MXNet::DataParallelExecutorGroup->new(symbol=>$sym, contexts=>$contexts, |
| workload=>$workload, data_shapes=>$data_shapes, |
| label_shapes=>$label_shapes, param_names=>$shared_arg_names, |
| for_training=>1, inputs_need_grad=>0, |
| shared_group=>$exec_group1); |
| my %shared_arg_names = map { $_ => 1 } @$shared_arg_names; |
| my $extra_args = [grep { not exists $shared_arg_names{$_} } @$arg_names2]; |
| $test_shared_exec_group->( |
| $exec_group1, $exec_group2, |
| $shared_arg_names, $extra_args, not $sparse_embedding |
| ); |
| } |
| } |
| |
| sub test_factorization_machine_module |
| { |
| mx->random->seed(11); |
| my $check_factorization_machine_module = sub { my ($optimizer, $num_epochs) = @_; |
| my $fm = sub { my ($factor_size, $feature_dim, $init) = @_; |
| my $x = mx->symbol->Variable("data", stype=>'csr'); |
| my $v = mx->symbol->Variable("v", shape=>[$feature_dim, $factor_size], |
| init=>$init, stype=>'row_sparse'); |
| |
| my $w1_weight = mx->symbol->var('w1_weight', shape=>[$feature_dim, 1], |
| init=>$init, stype=>'row_sparse'); |
| my $w1_bias = mx->symbol->var('w1_bias', shape=>[1]); |
| my $w1 = mx->symbol->broadcast_add(mx->symbol->dot($x, $w1_weight), $w1_bias); |
| |
| my $v_s = mx->symbol->_square_sum(data=>$v, axis=>1, keepdims=>1); |
| my $x_s = mx->symbol->square(data=>$x); |
| my $bd_sum = mx->sym->dot($x_s, $v_s); |
| |
| my $w2 = mx->symbol->dot($x, $v); |
| my $w2_squared = 0.5 * mx->symbol->square(data=>$w2); |
| |
| my $w_all = mx->symbol->Concat($w1, $w2_squared, dim=>1); |
| my $sum1 = mx->symbol->sum(data=>$w_all, axis=>1, keepdims=>1); |
| my $sum2 = 0.5 * mx->symbol->negative($bd_sum); |
| my $model = mx->sym->elemwise_add($sum1, $sum2); |
| |
| my $y = mx->symbol->Variable("label"); |
| $model = mx->symbol->LinearRegressionOutput(data=>$model, label=>$y); |
| return $model |
| }; |
| |
| # model |
| my $init = mx->initializer->Normal(sigma=>0.01); |
| my $factor_size = 4; |
| my $feature_dim = 10000; |
| my $model = $fm->($factor_size, $feature_dim, $init); |
| |
| # data iter |
| my $num_batches = 5; |
| my $batch_size = 64; |
| my $num_samples = $batch_size * $num_batches; |
| # generate some random csr data |
| my $csr_nd = rand_ndarray([$num_samples, $feature_dim], 'csr', 0.1); |
| my $label = mx->nd->ones([$num_samples,1]); |
| # the alternative is to use LibSVMIter |
| my $train_iter = mx->io->NDArrayIter(data=>$csr_nd, |
| label=>Hash::Ordered->new(label => $label), |
| batch_size=>$batch_size, |
| last_batch_handle=>'discard'); |
| # create module |
| my $mod = mx->mod->Module(symbol=>$model, data_names=>['data'], label_names=>['label']); |
| # allocate memory by given the input data and lable shapes |
| $mod->bind(data_shapes=>$train_iter->provide_data, label_shapes=>$train_iter->provide_label); |
| # initialize parameters by uniform random numbers |
| $mod->init_params(initializer=>$init); |
| my $expected_accuracy; |
| if($optimizer eq 'sgd') |
| { |
| # use Sparse SGD with learning rate 0.1 to train |
| my $sgd = mx->optimizer->SGD(momentum=>0.1, clip_gradient=>5.0, learning_rate=>0.01, |
| rescale_grad=>1.0/$batch_size); |
| $mod->init_optimizer(optimizer=>$sgd); |
| $num_epochs //= 10; |
| $expected_accuracy = 0.02; |
| } |
| elsif($optimizer eq 'adam') |
| { |
| # use Sparse Adam to train |
| my $adam = mx->optimizer->Adam(clip_gradient=>5.0, learning_rate=>0.0005, |
| rescale_grad=>1.0/$batch_size); |
| $mod->init_optimizer(optimizer=>$adam); |
| $num_epochs //= 10; |
| $expected_accuracy = 0.05; |
| } |
| elsif($optimizer eq 'adagrad') |
| { |
| # use Sparse AdaGrad with learning rate 0.1 to train |
| my $adagrad = mx->optimizer->AdaGrad(clip_gradient=>5.0, learning_rate=>0.01, |
| rescale_grad=>1.0/$batch_size); |
| $mod->init_optimizer(optimizer=>$adagrad); |
| $num_epochs //= 20; |
| $expected_accuracy = 0.09; |
| } |
| else |
| { |
| die "Unsupported optimizer type $optimizer specified"; |
| } |
| # use accuracy as the metric |
| my $metric = mx->metric->create('MSE'); |
| # train 'num_epochs' epoch |
| for my $epoch (1..$num_epochs) |
| { |
| $train_iter->reset(); |
| $metric->reset(); |
| while(my $batch = <$train_iter>) |
| { |
| $mod->forward($batch, is_train=>1); # compute predictions |
| $mod->update_metric($metric, $batch->label); # accumulate prediction accuracy |
| $mod->backward(); # compute gradients |
| $mod->update(); # update parameters |
| } |
| } |
| if($num_epochs > 1) |
| { |
| ok(($metric->get)[1] < $expected_accuracy); |
| } |
| }; |
| |
| $check_factorization_machine_module->('sgd'); |
| $check_factorization_machine_module->('adam'); |
| $check_factorization_machine_module->('adagrad'); |
| } |
| |
| |
| sub test_module_initializer |
| { |
| my $regression_model = sub { my ($m) = @_; |
| my $x = mx->symbol->var("data", stype=>'csr'); |
| my $v = mx->symbol->var("v", shape=>[$m, 1], init=>mx->init->Uniform(scale=>.1), |
| stype=>'row_sparse'); |
| my $model = mx->symbol->dot(lhs=>$x, rhs=>$v); |
| my $y = mx->symbol->Variable("label"); |
| $model = mx->symbol->LinearRegressionOutput(data=>$model, label=>$y, name=>"out"); |
| return $model |
| }; |
| |
| my ($n, $m) = (128, 100); |
| my $model = $regression_model->($m); |
| |
| my $data = mx->nd->zeros([$n, $m], stype=>'csr'); |
| my $label = mx->nd->zeros([$n, 1]); |
| my $iterator = mx->io->NDArrayIter(data=>$data, label=>Hash::Ordered->new(label => $label), |
| batch_size=>$n, last_batch_handle=>'discard'); |
| |
| # create module |
| my $mod = mx->mod->Module(symbol=>$model, data_names=>['data'], label_names=>['label']); |
| $mod->bind(data_shapes=>$iterator->provide_data, label_shapes=>$iterator->provide_label); |
| $mod->init_params(); |
| my $v = $mod->_arg_params->{v}; |
| ok($v->stype eq 'row_sparse'); |
| ok($v->aspdl->sum != 0); |
| } |
| |
| sub test_module_set_params |
| { |
| # data iter |
| mx->random->seed(11); |
| my $data = mx->nd->array([[0.05, .10]]); |
| my $label = mx->nd->array([[.01, 0.99]]); |
| my $train_data = mx->io->NDArrayIter(data => $data, label => $label, batch_size => 1); |
| |
| # symbols |
| my $x = mx->symbol->Variable('data'); |
| $x = mx->symbol->FullyConnected(name=>'fc_0', data=>$x, num_hidden=>2); |
| $x = mx->symbol->Activation(name=>"act_0", data=>$x, act_type=>'sigmoid'); |
| $x = mx->symbol->FullyConnected(name=>'fc_1', data=>$x, num_hidden=>2); |
| $x = mx->symbol->Activation(name=>"act_1", data=>$x, act_type=>'sigmoid'); |
| $x = mx->symbol->LinearRegressionOutput(data=>$x, name=>'softmax', grad_scale=>2); |
| |
| # create module |
| my $mod = mx->mod->Module($x, context=>[mx->cpu()]); |
| $mod->bind(data_shapes => $train_data->provide_data, label_shapes=>$train_data->provide_label, |
| for_training=>1); |
| |
| my $arg_params_correct = {fc_0_weight => mx->nd->array([[.15, .20], [.25, .30]]), |
| fc_0_bias => mx->nd->array([.35, .35]), |
| fc_1_weight => mx->nd->array([[.40, .45], [.50, .55]]), |
| fc_1_bias => mx->nd->array([.60, .60])}; |
| |
| my $arg_params_missing = {fc_0_weight => mx->nd->array([[.15, .20], [.25, .30]]), |
| fc_0_bias => mx->nd->array([.35, .35]), |
| fc_1_weight => mx->nd->array([[.40, .45], [.50, .55]])}; |
| |
| my $arg_params_extra = {fc_0_weight => mx->nd->array([[.15, .20], [.25, .30]]), |
| fc_0_bias => mx->nd->array([.35, .35]), |
| fc_1_weight=> mx->nd->array([[.40, .45], [.50, .55]]), |
| fc_1_bias => mx->nd->array([.60, .60]), |
| fc_2_weight => mx->nd->array([.60, .60])}; |
| |
| my $arg_params_missing_extra = {fc_3_weight => mx->nd->array([.60, .60])}; |
| |
| # test regular set_params |
| $mod->set_params($arg_params_correct, {}, force_init=>1); |
| |
| # test allow missing |
| $mod->set_params($arg_params_missing, {}, allow_missing=>1, force_init=>1); |
| ok(dies_like(sub { $mod->set_params($arg_params_missing, {}, force_init=>1, allow_missing=>0); }, qr/fc_/)); |
| |
| # test allow extra |
| $mod->set_params($arg_params_extra, {}, force_init=>1, allow_missing=>1, allow_extra=>1); |
| ok(dies_like(sub { $mod->set_params($arg_params_extra, {}, force_init=>1, allow_missing=>1, allow_extra=>0); }, qr/fc_/)); |
| |
| # test allow missing + extra, this will throw a runtime error |
| ok(dies_like(sub { $mod->set_params($arg_params_missing_extra, {}, force_init=>1, allow_missing=>1, allow_extra=>0); }, qr/fc_/)); |
| } |
| |
| sub test_forward_reshape |
| { |
| my $num_class = 10; |
| my $data1 = mx->sym->Variable('data1'); |
| my $data2 = mx->sym->Variable('data2'); |
| my $conv1 = mx->sym->Convolution(data=>$data1, kernel=>[2, 2], num_filter=>2, stride=>[2, 2]); |
| my $conv2 = mx->sym->Convolution(data=>$data2, kernel=>[3, 3], num_filter=>3, stride=>[1, 1]); |
| my $pooling1 = mx->sym->Pooling(data=>$conv1, kernel=>[2, 2], stride=>[1, 1], pool_type=>"avg"); |
| my $pooling2 = mx->sym->Pooling(data=>$conv2, kernel=>[2, 2], stride=>[1, 1], pool_type=>"max"); |
| my $flatten1 = mx->sym->flatten(data=>$pooling1); |
| my $flatten2 = mx->sym->flatten(data=>$pooling2); |
| my $sum = mx->sym->sum(data=>$flatten1, axis=>1) + mx->sym->sum(data=>$flatten2, axis=>1); |
| my $fc = mx->sym->FullyConnected(data=>$sum, num_hidden=>$num_class); |
| my $sym = mx->sym->SoftmaxOutput(data=>$fc, name=>'softmax'); |
| |
| my $dshape1 = [10, 3, 64, 64]; |
| my $dshape2 = [10, 3, 32, 32]; |
| my $lshape = [10]; |
| |
| my $mod = mx->mod->Module(symbol=>$sym, data_names=>['data1', 'data2'], |
| label_names=>['softmax_label']); |
| $mod->bind(data_shapes=>[['data1', $dshape1], ['data2', $dshape2]], |
| label_shapes=>[['softmax_label', $lshape]]); |
| $mod->init_params(); |
| $mod->init_optimizer(optimizer_params=>{learning_rate => 0.01}); |
| |
| # Train with original data shapes |
| my $data_batch = mx->io->DataBatch(data=>[mx->nd->random_uniform(0, 9, $dshape1), |
| mx->nd->random_uniform(5, 15, $dshape2)], |
| label=>[mx->nd->ones($lshape)]); |
| $mod->forward($data_batch); |
| is_deeply($mod->get_outputs->[0]->shape, [$lshape->[0], $num_class]); |
| $mod->backward(); |
| $mod->update(); |
| |
| # Train with different batch size |
| $dshape1 = [3, 3, 64, 64]; |
| $dshape2 = [3, 3, 32, 32]; |
| $lshape = [3]; |
| $data_batch = mx->io->DataBatch(data=>[mx->nd->random_uniform(0, 9, $dshape1), |
| mx->nd->random_uniform(5, 15, $dshape2)], |
| label=>[mx->nd->ones($lshape)]); |
| $mod->forward($data_batch); |
| is_deeply($mod->get_outputs->[0]->shape, [$lshape->[0], $num_class]); |
| $mod->backward(); |
| $mod->update(); |
| |
| $dshape1 = [20, 3, 64, 64]; |
| $dshape2 = [20, 3, 32, 32]; |
| $lshape = [20]; |
| $data_batch = mx->io->DataBatch(data=>[mx->nd->random_uniform(3, 5, $dshape1), |
| mx->nd->random_uniform(10, 25, $dshape2)], |
| label=>[mx->nd->ones($lshape)]); |
| $mod->forward($data_batch); |
| is_deeply($mod->get_outputs->[0]->shape, [$lshape->[0], $num_class]); |
| $mod->backward(); |
| $mod->update(); |
| |
| #Train with both different batch size and data shapes |
| $dshape1 = [20, 3, 120, 120]; |
| $dshape2 = [20, 3, 32, 64]; |
| $lshape = [20]; |
| $data_batch = mx->io->DataBatch(data=>[mx->nd->random_uniform(0, 9, $dshape1), |
| mx->nd->random_uniform(5, 15, $dshape2)], |
| label=>[mx->nd->ones($lshape)]); |
| $mod->forward($data_batch); |
| is_deeply($mod->get_outputs->[0]->shape, [$lshape->[0], $num_class]); |
| $mod->backward(); |
| $mod->update(); |
| |
| $dshape1 = [5, 3, 28, 40]; |
| $dshape2 = [5, 3, 24, 16]; |
| $lshape = [5]; |
| $data_batch = mx->io->DataBatch(data=>[mx->nd->random_uniform(0, 9, $dshape1), |
| mx->nd->random_uniform(15, 25, $dshape2)], |
| label=>[mx->nd->ones($lshape)]); |
| $mod->forward($data_batch); |
| is_deeply($mod->get_outputs->[0]->shape, [$lshape->[0], $num_class]); |
| $mod->backward(); |
| $mod->update(); |
| |
| #Test score |
| my $dataset_shape1 = [30, 3, 30, 30]; |
| my $dataset_shape2 = [30, 3, 20, 40]; |
| my $labelset_shape = [30]; |
| |
| my $eval_dataiter = mx->io->NDArrayIter(data=>[mx->nd->random_uniform(0, 9, $dataset_shape1), |
| mx->nd->random_uniform(15, 25, $dataset_shape2)], |
| label=>[mx->nd->ones($labelset_shape)], |
| batch_size=>5); |
| ok(keys %{ $mod->score($eval_dataiter, 'acc') } == 1); |
| |
| #Test prediction |
| $dshape1 = [1, 3, 30, 30]; |
| $dshape2 = [1, 3, 20, 40]; |
| $dataset_shape1 = [10, 3, 30, 30]; |
| $dataset_shape2 = [10, 3, 20, 40]; |
| |
| my $pred_dataiter = mx->io->NDArrayIter(data=>[mx->nd->random_uniform(0, 9, $dataset_shape1), |
| mx->nd->random_uniform(15, 25, $dataset_shape2)]); |
| $mod->bind(data_shapes=>[['data1', $dshape1], ['data2', $dshape2]], |
| for_training=>0, force_rebind=>1); |
| is_deeply($mod->predict($pred_dataiter)->shape, [10, $num_class]); |
| |
| } |
| |
| sub test_forward_acceptable_input |
| { |
| my $data = mx->sym->Variable('data'); |
| my $out = $data * 2; |
| my $mod = mx->mod->Module(symbol => $out); |
| $mod->bind(data_shapes => [['data', [1, 10]]]); |
| $mod->init_params(); |
| is_deeply($mod->predict(mx->nd->ones([1, 10]))->shape, [1, 10]); |
| is_deeply($mod->predict(mx->nd->ones([1, 10])->aspdl)->shape, [1, 10]); |
| } |
| |
| test_module_input_grads(); |
| test_module_dtype(); |
| test_monitor(); |
| test_module_switch_bucket(); |
| test_module_layout(); |
| test_module_states(); |
| test_module_reshape(); |
| test_save_load(); |
| test_executor_group(); |
| test_module_set_params(); |
| test_forward_reshape(); |
| test_module_initializer(); |
| test_factorization_machine_module(); |
| test_forward_acceptable_input(); |