blob: 089731f16eeec6b171cc9bc7b9f566e2fbd09ee6 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
package AI::MXNet::InitDesc;
use Mouse;
use AI::MXNet::Function::Parameters;
=head1 NAME
AI::MXNet::InitDesc - A container for the initialization pattern serialization.
=head2 new
Parameters
---------
name : str
name of variable
attrs : hash ref of str to str
attributes of this variable taken from AI::MXNet::Symbol->attr_dict
=cut
has 'name' => (is => 'ro', isa => 'Str', required => 1);
has 'attrs' => (is => 'rw', isa => 'HashRef[Str]', lazy => 1, default => sub { +{} });
use overload '""' => sub { shift->name };
around BUILDARGS => sub {
my $orig = shift;
my $class = shift;
return $class->$orig(name => $_[0]) if @_ == 1;
return $class->$orig(@_);
};
# Base class for Initializers
package AI::MXNet::Initializer;
use Mouse;
use AI::MXNet::NS;
use AI::MXNet::Base qw(:DEFAULT pzeros pceil);
use AI::MXNet::NDArray;
use JSON::PP;
use overload "&{}" => sub { my $self = shift; sub { $self->call(@_) } },
'""' => sub {
my $self = shift;
my ($name) = ref($self) =~ /::(\w+)$/;
encode_json(
[lc $name,
$self->kwargs//{ map { $_ => "".$self->$_ } $self->meta->get_attribute_list }
]);
},
fallback => 1;
has 'kwargs' => (is => 'rw', init_arg => undef, isa => 'HashRef');
has '_verbose' => (is => 'rw', isa => 'Bool', lazy => 1, default => 0);
has '_print_func' => (is => 'rw', isa => 'CodeRef', lazy => 1,
default => sub {
return sub {
my $x = shift;
return ($x->norm/sqrt($x->size))->asscalar;
};
}
);
=head1 NAME
AI::MXNet::Initializer - Base class for all Initializers
=head1 DESCRIPTION
The base class AI::MXNet::Initializer defines the default behaviors to initialize various parameters,
such as set bias to 1, except for the weight. Other classes then define how to initialize the weights.
Currently following classes are available:
mx->init->Uniform Initializes weights with random values uniformly sampled from a given range.
mx->init->Normal Initializes weights with random values sampled from a normal distribution with a mean of zero and standard deviation of sigma.
mx->init->Load Initializes variables by loading data from file or dict.
mx->init->Mixed Initialize parameters using multiple initializers.
mx->init->Zero Initializes weights to zero.
mx->init->One Initializes weights to one.
mx->init->Constant Initializes the weights to a given value.
mx->init->Orthogonal Initialize weight as orthogonal matrix.
mx->init->Xavier Returns an initializer performing Xavier initialization for weights.
mx->init->MSRAPrelu Initialize the weight according to a MSRA paper.
mx->init->Bilinear Initialize weight for upsampling layers.
mx->init->FusedRNN Initialize parameters for fused rnn layers.
=head2 register
Register an initializer class to the AI::MXNet::Initializer factory.
=cut
=head2 set_verbosity
Switch on/off verbose mode
Parameters
----------
$verbose : bool
switch on/off verbose mode
$print_func : CodeRef
A function that computes statistics of initialized arrays.
Takes an AI::MXNet::NDArray and returns a scalar. Defaults to mean
absolute value |x|/size(x)
=cut
method set_verbosity(Bool $verbose=0, CodeRef $print_func=)
{
$self->_verbose($verbose);
$self->_print_func($print_func) if defined $print_func;
}
method _verbose_print($desc, $init, $arr)
{
if($self->_verbose and defined $self->_print_func)
{
AI::MXNet::Logging->info('Initialized %s as %s: %s', $desc, $init, $self->_print_func->($arr));
}
}
my %init_registry;
method get_init_registry()
{
return \%init_registry;
}
method register()
{
my ($name) = $self =~ /::(\w+)$/;
my $orig_name = $name;
$name = lc $name;
if(exists $init_registry{ $name })
{
my $existing = $init_registry{ $name };
warn(
"WARNING: New initializer $self.$name"
."is overriding existing initializer $existing.$name"
);
}
$init_registry{ $name } = $self;
{
no strict 'refs';
no warnings 'redefine';
*{"$orig_name"} = sub { shift; $self->new(@_) };
*InitDesc = sub { shift; AI::MXNet::InitDesc->new(@_) };
}
}
=head2 init
Parameters
----------
$desc : AI::MXNet::InitDesc|str
a name of corresponding ndarray
or the object that describes the initializer.
$arr : AI::MXNet::NDArray
an ndarray to be initialized.
=cut
method call(Str|AI::MXNet::InitDesc $desc, AI::MXNet::NDArray $arr)
{
return $self->_legacy_init($desc, $arr) unless blessed $desc;
my $init = $desc->attrs->{ __init__ };
if($init)
{
my ($klass, $kwargs);
if(exists $self->get_init_registry->{ lc $init })
{
$klass = $init;
$kwargs = {};
}
else
{
($klass, $kwargs) = @{ decode_json($init) };
}
$self->get_init_registry->{ lc $klass }->new(%{ $kwargs })->_init_weight("$desc", $arr);
$self->_verbose_print($desc, $init, $arr);
}
else
{
$desc = "$desc";
if($desc =~ /(weight|bias|gamma|beta)$/)
{
my $method = "_init_$1";
$self->$method($desc, $arr);
$self->_verbose_print($desc, $1, $arr);
}
elsif($desc =~ /min$/)
{
$self->_init_zero($desc, $arr);
$self->_verbose_print($desc, 'min', $arr);
}
elsif($desc =~ /max$/)
{
$self->_init_one($desc, $arr);
$self->_verbose_print($desc, 'max', $arr);
}
else
{
$self->_init_default($desc, $arr)
}
}
}
method _legacy_init(Str $name, AI::MXNet::NDArray $arr)
{
warnings::warnif(
'deprecated',
'Calling initializer with init($str, $NDArray) has been deprecated.'.
'please use init(mx->init->InitDesc(...), NDArray) instead.'
);
if($name =~ /^upsampling/)
{
$self->_init_bilinear($name, $arr);
}
elsif($name =~ /^stn_loc/ and $name =~ /weight$/)
{
$self->_init_zero($name, $arr);
}
elsif($name =~ /^stn_loc/ and $name =~ /bias$/)
{
$self->_init_loc_bias($name, $arr);
}
elsif($name =~ /bias$/)
{
$self->_init_bias($name, $arr);
}
elsif($name =~ /gamma$/)
{
$self->_init_gamma($name, $arr);
}
elsif($name =~ /beta$/)
{
$self->_init_beta($name, $arr);
}
elsif($name =~ /weight$/)
{
$self->_init_weight($name, $arr);
}
elsif($name =~ /moving_mean$/)
{
$self->_init_zero($name, $arr);
}
elsif($name =~ /moving_var$/)
{
$self->_init_one($name, $arr);
}
elsif($name =~ /moving_inv_var$/)
{
$self->_init_zero($name, $arr);
}
elsif($name =~ /moving_avg$/)
{
$self->_init_zero($name, $arr);
}
elsif($name =~ /min$/)
{
$self->_init_zero($name, $arr);
}
elsif($name =~ /max$/)
{
$self->_init_one($name, $arr);
}
else
{
$self->_init_default($name, $arr);
}
}
*slice = *call;
method _init_bilinear($name, $arr)
{
my $pdl_type = PDL::Type->new(DTYPE_MX_TO_PDL->{ 'float32' });
my $weight = pzeros(
PDL::Type->new(DTYPE_MX_TO_PDL->{ 'float32' }),
$arr->size
);
my $shape = $arr->shape;
my $size = $arr->size;
my $f = pceil($shape->[3] / 2)->at(0);
my $c = (2 * $f - 1 - $f % 2) / (2 * $f);
for my $i (0..($size-1))
{
my $x = $i % $shape->[3];
my $y = ($i / $shape->[3]) % $shape->[2];
$weight->index($i) .= (1 - abs($x / $f - $c)) * (1 - abs($y / $f - $c));
}
$arr .= $weight->reshape(reverse @{ $shape });
}
method _init_loc_bias($name, $arr)
{
confess("assert error shape[0] == 6")
unless $arr->shape->[0] == 6;
$arr .= [1.0, 0, 0, 0, 1.0, 0];
}
method _init_zero($name, $arr)
{
$arr .= 0;
}
method _init_one($name, $arr)
{
$arr .= 1;
}
method _init_bias($name, $arr)
{
$arr .= 0;
}
method _init_gamma($name, $arr)
{
$arr .= 1;
}
method _init_beta($name, $arr)
{
$arr .= 0;
}
method _init_weight($name, $arr)
{
confess("Virtual method, subclass must override it");
}
method _init_default($name, $arr)
{
confess(
"Unknown initialization pattern for $name. "
.'Default initialization is now limited to '
.'"weight", "bias", "gamma" (1.0), and "beta" (0.0).'
.'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern'
);
}
=head1 NAME
AI::MXNet::Load - Initialize by loading a pretrained param from a hash ref.
=cut
=head2 new
Parameters
----------
param: HashRef[AI::MXNet::NDArray]
default_init: Initializer
default initializer when a name is not found in the param hash ref.
verbose: bool
log the names when initializing.
=cut
package AI::MXNet::Load;
use Mouse;
extends 'AI::MXNet::Initializer';
has 'param' => (is => "rw", isa => 'HashRef[AI::MXNet::NDArray]', required => 1);
has 'default_init' => (is => "rw", isa => "AI::MXNet::Initializer");
has 'verbose' => (is => "rw", isa => "Int", default => 0);
sub BUILD
{
my $self = shift;
my $param = AI::MXNet::NDArray->load($self->param) unless ref $self->param;
my %self_param;
while(my ($name, $arr) = each %{ $self->param })
{
$name =~ s/^(?:arg|aux)://;
$self_param{ $name } = $arr;
}
$self->param(\%self_param);
}
method call(Str $name, AI::MXNet::NDArray $arr)
{
if(exists $self->param->{ $name })
{
my $target_shape = join(',', @{ $arr->shape });
my $param_shape = join(',', @{ $self->param->{ $name }->shape });
confess(
"Parameter $name cannot be initialized from loading. "
."Shape mismatch, target $target_shape vs loaded $param_shape"
) unless $target_shape eq $param_shape;
$arr .= $self->param->{ $name };
AI::MXNet::Log->info("Initialized $name by loading") if $self->verbose;
}
else
{
confess(
"Cannot Initialize $name. Not found in loaded param "
."and no default Initializer is provided."
) unless defined $self->default_init;
$self->default_init($name, $arr);
AI::MXNet::Log->info("Initialized $name by default") if $self->verbose;
}
}
*slice = *call;
=head1 NAME
AI::MXNet::Mixed - A container with multiple initializer patterns.
=cut
=head2 new
patterns: array ref of str
array ref of regular expression patterns to match parameter names.
initializers: array ref of AI::MXNet::Initializer objects.
array ref of Initializers corresponding to the patterns.
=cut
package AI::MXNet::Mixed;
use Mouse;
extends 'AI::MXNet::Initializer';
has "map" => (is => "rw", init_arg => undef);
has "patterns" => (is => "ro", isa => 'ArrayRef[Str]');
has "initializers" => (is => "ro", isa => 'ArrayRef[AI::MXnet::Initializer]');
sub BUILD
{
my $self = shift;
confess("patterns count != initializers count")
unless (@{ $self->patterns } == @{ $self->initializers });
my %map;
@map{ @{ $self->patterns } } = @{ $self->initializers };
$self->map(\%map);
}
method call(Str $name, AI::MXNet::NDArray $arr)
{
for my $pattern (keys %{ $self->map })
{
if($name =~ /$pattern/)
{
$self->map->{$pattern}->($name, $arr);
return;
}
}
confess(
"Parameter name $name did not match any pattern. Consider"
."add a \".*\" pattern at the and with default Initializer."
);
}
package AI::MXNet::Zero;
use Mouse;
extends 'AI::MXNet::Initializer';
method _init_weight(Str $name, AI::MXNet::NDArray $arr)
{
$arr .= 0;
}
__PACKAGE__->register;
package AI::MXNet::Zeros;
use Mouse;
extends 'AI::MXNet::Zero';
__PACKAGE__->register;
package AI::MXNet::One;
use Mouse;
extends 'AI::MXNet::Initializer';
method _init_weight(Str $name, AI::MXNet::NDArray $arr)
{
$arr .= 1;
}
__PACKAGE__->register;
package AI::MXNet::Ones;
use Mouse;
extends 'AI::MXNet::One';
__PACKAGE__->register;
package AI::MXNet::Constant;
use Mouse;
extends 'AI::MXNet::Initializer';
has 'value' => (is => 'ro', isa => 'Num', required => 1);
around BUILDARGS => sub {
my $orig = shift;
my $class = shift;
return $class->$orig(value => $_[0]) if @_ == 1;
return $class->$orig(@_);
};
method _init_weight(Str $name, AI::MXNet::NDArray $arr)
{
$arr .= $self->value;
}
__PACKAGE__->register;
=head1 NAME
AI::MXNet::Uniform - Initialize the weight with uniform random values.
=cut
=head1 DESCRIPTION
Initialize the weight with uniform random values contained within of [-scale, scale]
Parameters
----------
scale : float, optional
The scale of the uniform distribution.
=cut
package AI::MXNet::Uniform;
use Mouse;
extends 'AI::MXNet::Initializer';
has "scale" => (is => "ro", isa => "Num", default => 0.07);
around BUILDARGS => sub {
my $orig = shift;
my $class = shift;
return $class->$orig(scale => $_[0]) if @_ == 1;
return $class->$orig(@_);
};
method _init_weight(Str $name, AI::MXNet::NDArray $arr)
{
AI::MXNet::Random->uniform(-$self->scale, $self->scale, { out => $arr });
}
__PACKAGE__->register;
=head1 NAME
AI::MXNet::Normal - Initialize the weight with gaussian random values.
=cut
=head1 DESCRIPTION
Initialize the weight with gaussian random values contained within of [0, sigma]
Parameters
----------
sigma : float, optional
Standard deviation for the gaussian distribution.
=cut
package AI::MXNet::Normal;
use Mouse;
extends 'AI::MXNet::Initializer';
has "sigma" => (is => "ro", isa => "Num", default => 0.01);
around BUILDARGS => sub {
my $orig = shift;
my $class = shift;
return $class->$orig(sigma => $_[0]) if @_ == 1;
return $class->$orig(@_);
};
method _init_weight(Str $name, AI::MXNet::NDArray $arr)
{
AI::MXNet::Random->normal(0, $self->sigma, { out => $arr });
}
__PACKAGE__->register;
=head1 NAME
AI::MXNet::Orthogonal - Intialize the weight as an Orthogonal matrix.
=cut
=head1 DESCRIPTION
Intialize weight as Orthogonal matrix
Parameters
----------
scale : float, optional
scaling factor of weight
rand_type: string optional
use "uniform" or "normal" random number to initialize weight
Reference
---------
Exact solutions to the nonlinear dynamics of learning in deep linear neural networks
arXiv preprint arXiv:1312.6120 (2013).
=cut
package AI::MXNet::Orthogonal;
use AI::MXNet::Base;
use Mouse;
use AI::MXNet::Types;
extends 'AI::MXNet::Initializer';
has "scale" => (is => "ro", isa => "Num", default => 1.414);
has "rand_type" => (is => "ro", isa => enum([qw/uniform normal/]), default => 'uniform');
method _init_weight(Str $name, AI::MXNet::NDArray $arr)
{
my @shape = @{ $arr->shape };
my $nout = $shape[0];
my $nin = AI::MXNet::NDArray->size([@shape[1..$#shape]]);
my $tmp = AI::MXNet::NDArray->zeros([$nout, $nin]);
if($self->rand_type eq 'uniform')
{
AI::MXNet::Random->uniform(-1, 1, { out => $tmp });
}
else
{
AI::MXNet::Random->normal(0, 1, { out => $tmp });
}
$tmp = $tmp->aspdl;
my ($u, $s, $v) = svd($tmp);
my $q;
if(join(',', @{ $u->shape->unpdl }) eq join(',', @{ $tmp->shape->unpdl }))
{
$q = $u;
}
else
{
$q = $v;
}
$q = $self->scale * $q->reshape(reverse(@shape));
$arr .= $q;
}
*slice = *call;
__PACKAGE__->register;
=head1 NAME
AI::MXNet::Xavier - Initialize the weight with Xavier or similar initialization scheme.
=cut
=head1 DESCRIPTION
Parameters
----------
rnd_type: str, optional
Use gaussian or uniform.
factor_type: str, optional
Use avg, in, or out.
magnitude: float, optional
The scale of the random number range.
=cut
package AI::MXNet::Xavier;
use Mouse;
use AI::MXNet::Types;
extends 'AI::MXNet::Initializer';
has "magnitude" => (is => "rw", isa => "Num", default => 3);
has "rnd_type" => (is => "ro", isa => enum([qw/uniform gaussian/]), default => 'uniform');
has "factor_type" => (is => "ro", isa => enum([qw/avg in out/]), default => 'avg');
method _init_weight(Str $name, AI::MXNet::NDArray $arr)
{
my @shape = @{ $arr->shape };
confess(__PACKAGE__." initializer can not be applied on less than 2D tensor")
if @shape < 2;
my $hw_scale = 1;
if(@shape > 2)
{
$hw_scale = AI::MXNet::NDArray->size([@shape[2..$#shape]]);
}
my ($fan_in, $fan_out) = ($shape[1] * $hw_scale, $shape[0] * $hw_scale);
my $factor;
if($self->factor_type eq "avg")
{
$factor = ($fan_in + $fan_out) / 2;
}
elsif($self->factor_type eq "in")
{
$factor = $fan_in;
}
else
{
$factor = $fan_out;
}
my $scale = sqrt($self->magnitude / $factor);
if($self->rnd_type eq "iniform")
{
AI::MXNet::Random->uniform(-$scale, $scale, { out => $arr });
}
else
{
AI::MXNet::Random->normal(0, $scale, { out => $arr });
}
}
__PACKAGE__->register;
=head1 NAME
AI::MXNet::MSRAPrelu - Custom initialization scheme.
=cut
=head1 DESCRIPTION
Initialize the weight with initialization scheme from
Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification.
Parameters
----------
factor_type: str, optional
Use avg, in, or out.
slope: float, optional
initial slope of any PReLU (or similar) nonlinearities.
=cut
package AI::MXNet::MSRAPrelu;
use Mouse;
extends 'AI::MXNet::Xavier';
has '+rnd_type' => (default => "gaussian");
has '+factor_type' => (default => "avg");
has 'slope' => (is => 'ro', isa => 'Num', default => 0.25);
sub BUILD
{
my $self = shift;
my $magnitude = 2 / (1 + $self->slope ** 2);
$self->magnitude($magnitude);
$self->kwargs({ slope => $self->slope, factor_type => $self->factor_type });
}
__PACKAGE__->register;
package AI::MXNet::Bilinear;
use Mouse;
use AI::MXNet::Base;
extends 'AI::MXNet::Initializer';
method _init_weight($name, $arr)
{
my $pdl_type = PDL::Type->new(DTYPE_MX_TO_PDL->{ 'float32' });
my $weight = pzeros(
PDL::Type->new(DTYPE_MX_TO_PDL->{ 'float32' }),
$arr->size
);
my $shape = $arr->shape;
my $size = $arr->size;
my $f = pceil($shape->[3] / 2)->at(0);
my $c = (2 * $f - 1 - $f % 2) / (2 * $f);
for my $i (0..($size-1))
{
my $x = $i % $shape->[3];
my $y = ($i / $shape->[3]) % $shape->[2];
$weight->index($i) .= (1 - abs($x / $f - $c)) * (1 - abs($y / $f - $c));
}
$arr .= $weight->reshape(reverse @{ $shape });
}
__PACKAGE__->register;
package AI::MXNet::LSTMBias;
=head1 NAME
AI::MXNet::LSTMBias - Custom initializer for LSTM cells.
=cut
=head1 DESCRIPTION
Initializes all biases of an LSTMCell to 0.0 except for
the forget gate's bias that is set to a custom value.
Parameters
----------
forget_bias: float,a bias for the forget gate.
Jozefowicz et al. 2015 recommends setting this to 1.0.
=cut
use Mouse;
extends 'AI::MXNet::Initializer';
has 'forget_bias' => (is => 'ro', isa => 'Num', required => 1);
around BUILDARGS => \&AI::MXNet::Base::process_arguments;
method python_constructor_arguments() { ['forget_bias'] }
method _init_weight(Str $name, AI::MXNet::NDArray $arr)
{
$arr .= 0;
# in the case of LSTMCell the forget gate is the second
# gate of the 4 LSTM gates, we modify the according values.
my $num_hidden = int($arr->shape->[0] / 4);
$arr->slice([$num_hidden, 2*$num_hidden-1]) .= $self->forget_bias;
}
__PACKAGE__->register;
package AI::MXNet::FusedRNN;
use Mouse;
use JSON::PP;
extends 'AI::MXNet::Initializer';
=head1 NAME
AI::MXNet::FusedRNN - Custom initializer for fused RNN cells.
=cut
=head1 DESCRIPTION
Initializes parameters for fused rnn layer.
Parameters
----------
init : Initializer
initializer applied to unpacked weights.
All parameters below must be exactly the same as ones passed to the
FusedRNNCell constructor.
num_hidden : int
num_layers : int
mode : str
bidirectional : bool
forget_bias : float
=cut
has 'init' => (is => 'rw', isa => 'Str|AI::MXNet::Initializer', required => 1);
has 'forget_bias' => (is => 'ro', isa => 'Num', default => 1);
has [qw/num_hidden
num_layers/] => (is => 'ro', isa => 'Int', required => 1);
has 'mode' => (is => 'ro', isa => 'Str', required => 1);
has 'bidirectional' => (is => 'ro', isa => 'Bool', default => 0);
sub BUILD
{
my $self = shift;
if(not blessed $self->init)
{
my ($klass, $kwargs);
eval {
($klass, $kwargs) = @{ decode_json($self->init) };
};
confess("FusedRNN failed to init $@") if $@;
$self->init($self->get_init_registry->{ lc $klass }->new(%$kwargs));
}
}
method _init_weight($name, $arr)
{
my $cell = AI::MXNet::RNN::FusedCell->new(
num_hidden => $self->num_hidden,
num_layers => $self->num_layers,
mode => $self->mode,
bidirectional => $self->bidirectional,
forget_bias => $self->forget_bias,
prefix => ''
);
my $args = $cell->unpack_weights({ parameters => $arr });
for my $name (keys %{ $args })
{
my $desc = AI::MXNet::InitDesc->new(name => $name);
# for lstm bias, we use a custom initializer
# which adds a bias to the forget gate
if($self->mode eq 'lstm' and $name =~ /f_bias$/)
{
$args->{$name} .= $self->forget_bias;
}
else
{
$self->init->($desc, $args->{$name});
}
}
$arr .= $cell->pack_weights($args)->{parameters};
}
__PACKAGE__->register;
1;