blob: a914c1ff68ca6b21c1a48bd88646b3ea036ec1a7 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
use strict;
use warnings;
use Hash::Ordered;
package AI::MXNet::Gluon::Parameter;
use AI::MXNet::NS;
use AI::MXNet::Function::Parameters;
=head1 NAME
AI::MXNet::Gluon::Parameter - A Container holding parameters (weights) of AI::MXNEt::Gluon::Block(s).
=cut
=head1 DESCRIPTION
AI::MXNet::Gluon::Parameter holds a copy of the parameter on each AI::MXNet::Context after
it is initialized with AI::MXNet::Gluon::Parameter->initialize(...)`. If grad_req is
not 'null', it will also hold a gradient array on each AI::MXNet::Context
$ctx = mx->gpu(0);
$x = mx->nd->zeros([16, 100], ctx=>$ctx);
$w = mx->gluon->Parameter('fc_weight', shape=>[64, 100], init=>mx->init->Xavier());
$b = mx->gluon->Parameter('fc_bias', shape=>[64], init=>mx->init->Zero());
$w->initialize(ctx=>$ctx);
$b->initialize(ctx=>ctx);
$out = mx->nd->FullyConnected($x, $w->data($ctx), $b->data($ctx), num_hidden=>64);
Parameters
----------
name : str
Name of this parameter.
grad_req : {'write', 'add', 'null'}, default 'write'
Specifies how to update gradient to grad arrays.
- 'write' means everytime gradient is written to grad NDArray.
- 'add' means everytime gradient is added to the grad NDArray. You need
to manually call zero_grad() to clear the gradient buffer before each
iteration when using this option.
- 'null' means gradient is not requested for this parameter. gradient arrays
will not be allocated.
shape : array ref of int or int, default undef
Shape of this parameter. By default shape is not specified. Parameter with
unknown shape can be used for `Symbol` API, but `init` will throw an error
when using `NDArray` API.
dtype : Dtype, default 'float32'
Data type of this parameter. For example, 'float64'.
lr_mult : float, default 1.0
Learning rate multiplier. Learning rate will be multiplied by lr_mult
when updating this parameter with optimizer.
wd_mult : float, default 1.0
Weight decay multiplier (L2 regularizer coefficient). Works similar to lr_mult.
init : Initializer, default None
Initializer of this parameter. Will use the global initializer by default.
stype: {'default', 'row_sparse', 'csr'}, defaults to 'default'.
The storage type of the parameter.
grad_stype: {'default', 'row_sparse', 'csr'}, defaults to 'default'.
The storage type of the parameter's gradient.
Attributes
----------
grad_req : {'write', 'add', 'null'}
This can be set before or after initialization. Setting grad_req to null
with $x->grad_req = 'null' saves memory and computation when you don't
need gradient w.r.t x.
=cut
use Mouse;
use AI::MXNet::Base;
use overload '""' => sub {
my $self = shift;
"Parameter " . $self->name.
" (shape=(" . join(', ', @{ $self->shape//[] }) .")".
", dtype=" . $self->dtype.
", stype=" . $self->stype.")"
},
fallback => 1;
around BUILDARGS => sub {
my $orig = shift;
my $class = shift;
if(@_ % 2)
{
my $name = shift;
return $class->$orig(name => $name, @_);
}
else
{
return $class->$orig(@_);
}
};
sub BUILD
{
my $self = shift;
$self->grad_req($self->_grad_req);
$self->_shape([$self->_shape]) if defined $self->_shape and not ref $self->_shape;
$self->_deferred_init([]);
}
has 'name' => (is => 'ro', isa => 'Str', required => 1);
has '_grad_req' => (is => 'rw', isa => 'GradReq', init_arg => 'grad_req', default => 'write');
has '_shape' => (is => 'rw', isa => 'Maybe[Shape|Int]', init_arg => 'shape');
has 'dtype' => (is => 'rw', isa => 'Dtype', default => 'float32');
has ['stype',
'grad_stype'] => (is => 'rw', isa => 'Stype', default => 'default');
has [qw/lr_mult wd_mult/] => (is => 'rw', isa => 'Num', default => 1);
has 'init' => (is => 'rw', isa => 'Maybe[Initializer]');
has 'allow_deferred_init' => (is => 'rw', isa => 'Bool', default => 0);
has 'differentiable' => (is => 'rw', isa => 'Bool', default => 1);
has [qw/_var _data _grad
_deferred_init _trainer
_ctx_list _ctx_map/] => (is => 'rw', init_arg => undef);
method grad_req(Maybe[GradReq] $req=)
{
return $self->_grad_req unless defined $req;
if(not $self->differentiable)
{
$req = 'null';
}
return if $self->_grad_req eq $req;
$self->_grad_req($req);
if($req eq 'null' and defined $self->_grad)
{
$self->_grad(undef);
$self->_data([map { $_->detach } @{ $self->_data }]);
}
elsif(defined $self->_data)
{
$self->_init_grad();
}
}
method shape(@args)
{
return $self->_shape unless @args;
if(not defined $args[0])
{
$self->_shape(undef);
return undef;
}
if(not defined $self->_shape and defined $args[0])
{
$self->_shape(ref $args[0] ? $args[0] : [$args[0]]);
return $self->_shape;
}
my $new_shape = ref $args[0] ? $args[0] : [$args[0]];
my $shape_validated = 0;
if(@{ $self->_shape } == @{ $new_shape })
{
$shape_validated = 1;
zip(sub {
my ($i, $j) = @_;
return unless $i;
return if $i == $j;
$shape_validated = 0;
}, $self->_shape, $new_shape);
}
assert($shape_validated, 'Expected shape is incompatible with given shape');
$self->_shape($new_shape);
return $self->_shape;
}
method _set_trainer($trainer)
{
if($self->stype ne 'default' and $self->_trainer and $trainer and Scalar::Util::refaddr($self->_trainer) ne Scalar::Util::refaddr($trainer))
{
confess(
"Failed to set the trainer for Parameter '${\ $self->name }' because it was already set. ".
"More than one trainers for a ${\ $self->stype } Parameter is not supported."
);
}
$self->_trainer($trainer);
}
method _get_row_sparse($arr_list, $ctx, AI::MXNet::NDArray $row_id)
{
if(not $self->_trainer)
{
confess(
"Cannot get row_sparse data for Parameter '${\ $self->name }' when no ".
"Trainer is created with it."
);
}
my $results = $self->_check_and_get($arr_list, $ctx);
# fetch row sparse params from the trainer
$self->_trainer->_row_sparse_pull($self, $results, $row_id);
return $results;
}
method _check_and_get($arr_list, $ctx)
{
if(defined $arr_list)
{
if(ref $ctx eq 'ARRAY')
{
return $arr_list;
}
if(not defined $ctx)
{
if(@{ $arr_list } == 1)
{
return $arr_list->[0];
}
else
{
$ctx = AI::MXNet::Context->current_ctx;
}
}
my $ctx_list = $self->_ctx_map->[$ctx->device_type_id&1];
if($ctx->device_id < @{ $ctx_list })
{
my $idx = $ctx_list->[$ctx->device_id];
if(defined $idx)
{
return $arr_list->[$idx];
}
}
confess(
"Parameter '${\ $self->name }' was not initialized on context $ctx. ".
"It was only initialized on @{ $self->_ctx_list }."
);
}
if(@{ $self->_deferred_init })
{
confess("DeferredInitializationError: ".
"Parameter '${\ $self->name }' has not been initialized yet because initialization was ".
"deferred. Actual initialization happens during the first forward pass. ".
"Please pass one batch of data through the network before accessing Parameters. ".
"You can also avoid deferred initialization by specifying in_units, ".
"num_features, etc., for network layers.");
}
confess(
"Parameter '${\ $self->name }' has not been initialized. Note that ".
"you should initialize parameters and create Trainer ".
"with Block.collect_params() instead of Block.params ".
"because the later does not include Parameters of ".
"nested child Blocks"
);
}
# (Re)initializes by loading from data.
method _load_init($data, $ctx)
{
if($self->shape)
{
for(zip($self->shape, $data->shape)) {
my ($self_dim, $data_dim) = @$_;
assert(
($self_dim == 0 or $self_dim == $data_dim),
sprintf(
"Failed loading Parameter '%s' from saved params: ".
"shape incompatible expected (%s) vs saved (%s)",
$self->name, "@{$self->shape}", "@{$data->shape}"
)
);
}
$self->shape([map { $_->[0] ? $_->[0] : $_->[1] } zip($self->shape, $data->shape)]);
}
if($self->dtype)
{
assert(
($self->dtype eq $data->dtype),
sprintf(
"Failed loading Parameter '%s' from saved params: ".
"dtype incompatible expected %s vs saved %s",
$self->name, $self->dtype, $data->dtype
)
);
}
if($self->stype ne $data->stype)
{
$data = $data->tostype($self->stype);
}
if(blessed ($ctx) and $ctx->isa('AI::MXNet::Context'))
{
$ctx = [$ctx];
}
if(not defined $self->_data)
{
if(@{ $self->_deferred_init })
{
assert(
(not defined $ctx or join('', @{ $ctx }) eq join('', @{ $self->_deferred_init->[1] })),
sprintf(
"Failed to load Parameter '%s' on %s because it was ".
"previously initialized on %s.",
$self->name, $ctx, $self->list_ctx
)
);
$ctx = $self->_deferred_init->[1];
}
elsif(not defined $ctx)
{
$ctx = [AI::MXNet::Context->cpu];
}
$self->_init_impl($data, $ctx);
}
else
{
assert(
(not defined $ctx or join('', @{ $ctx }) eq join('', @{ $self->list_ctx })),
sprintf(
"Failed to load Parameter '%s' on %s because it was ".
"previously initialized on %s.",
$self->name, "@$ctx", "@{$self->list_ctx}"
)
);
$self->set_data($data);
}
$self->_deferred_init([]);
}
# Finishes deferred initialization.
method _finish_deferred_init()
{
return unless @{ $self->_deferred_init };
my ($init, $ctx, $default_init, $data) = @{ $self->_deferred_init };
$self->_deferred_init([]);
assert(
(defined($self->shape) and product(@{ $self->shape }) > 0),
sprintf(
"Cannot initialize Parameter '%s' because it has ".
"invalid shape: %s. Please specify in_units, ".
"in_channels, etc for `Block`s.",
$self->name, $self->shape
)
);
AI::MXNet::AutoGrad->pause(sub {
if(not defined $data)
{
$data = AI::MXNet::NDArray->zeros(
$self->shape,
dtype => $self->dtype,
ctx => AI::MXNet::Context->cpu,
stype => $self->stype
);
AI::MXNet::Initializer->new->(
AI::MXNet::InitDesc->new(
name => $self->name,
attrs => { __init__ => defined $init ? "$init" : "$default_init" }
),
$data
);
}
$self->_init_impl($data, $ctx);
});
}
# Sets data and grad.
method _init_impl($data, $ctx_list)
{
$self->_ctx_list([@{ $ctx_list }]);
$self->_ctx_map([[], []]);
enumerate(sub {
my ($i, $ctx) = @_;
my $dev_list = $self->_ctx_map->[$ctx->device_type_id&1];
while(@{ $dev_list } <= $ctx->device_id)
{
push @{ $dev_list }, undef;
}
$dev_list->[$ctx->device_id] = $i;
}, $self->_ctx_list);
$self->_data([map { $data->copyto($_) } @{ $self->_ctx_list }]);
$self->_init_grad;
}
# Initialize grad buffers.
method _init_grad()
{
if($self->grad_req eq 'null')
{
$self->_grad(undef);
return;
}
$self->_grad([
map {
AI::MXNet::NDArray->zeros(
$_->shape, dtype => $_->dtype,
ctx => $_->context, stype => $self->grad_stype
)
} @{ $self->_data }
]);
AI::MXNet::AutoGrad->mark_variables(
$self->_check_and_get($self->_data, []),
$self->_grad,
grad_reqs => $self->grad_req
);
}
# Reduce data from multiple contexts to cpu.
method _reduce()
{
my $data;
my $ctx = AI::MXNet::Context->cpu;
if($self->stype eq 'default')
{
my $block = $self->list_data;
$data = AI::MXNet::NDArray->add_n(map { $_->copyto($ctx) } @{ $block }) / @{ $block };
}
else
{
my $all_row_ids = AI::MXNet::NDArray->arange(stop => $self->shape->[0], dtype=>'int64', ctx=>$ctx);
$data = AI::MXNet::NDArray->zeros($self->shape, stype=>'row_sparse', ctx=>$ctx);
$self->_trainer->_row_sparse_pull($self, $data, $all_row_ids, 1);
}
return $data;
}
=head2 initialize
Initializes parameter and gradient arrays. Only used for `NDArray` API.
Parameters
----------
:$init : Initializer
The initializer to use. Overrides AI::MXNet::Gluon::Parameter->init and default_init.
:$ctx : AI::MXNet::Context or array ref of AI::MXNet::Context, defaults to AI::MXNet::Context->current_ctx().
Initialize Parameter on given context. If ctx is a list of Context, a
copy will be made for each context.
Copies are independent arrays. User is responsible for keeping
their values consistent when updating. Normally gluon->Trainer does this for you.
:$default_init : Initializer
Default initializer is used when both 'init' and AI::MXNet::Gluon::Parameter->init are undefined.
:$force_reinit : bool, default False
Whether to force re-initialization if parameter is already initialized.
Examples
--------
>>> $weight = mx->gluon->Parameter('weight', shape=>[2, 2]);
>>> $weight->initialize(ctx=>mx->cpu(0));
>>> print $weight->data
[[-0.01068833 0.01729892]
[ 0.02042518 -0.01618656]]
<NDArray 2x2 @cpu(0)>
>>> print $weight->grad()
[[ 0. 0.]
[ 0. 0.]]
<NDArray 2x2 @cpu(0)>
>>> $weight->initialize(ctx=>[mx->gpu(0), mx->gpu(1)]);
>>> print $weight->data(mx->gpu(0));
[[-0.00873779 -0.02834515]
[ 0.05484822 -0.06206018]]
<NDArray 2x2 @gpu(0)>
>>> print $weight->data(mx->gpu(1))
[[-0.00873779 -0.02834515]
[ 0.05484822 -0.06206018]]
<NDArray 2x2 @gpu(1)>
=cut
method initialize(
Maybe[Initializer] :$init=,
Maybe[AI::MXNet::Context|ArrayRef[AI::MXNet::Context]] :$ctx=AI::MXNet::Context->current_ctx,
Initializer :$default_init=AI::MXNet::Initializer->Uniform,
Bool :$force_reinit=0
)
{
$ctx //=AI::MXNet::Context->current_ctx;
if(defined $self->_data and not $force_reinit)
{
AI::MXNet::Logging->warning(
"Parameter '%s' is already initialized, ignoring. ".
"Set force_reinit=True to re-initialize.", $self->name
);
return;
}
$self->_data(undef);
$self->_grad(undef);
if(blessed($ctx) and $ctx->isa('AI::MXNet::Context'))
{
$ctx = [$ctx];
}
if(not defined $init)
{
if(defined $self->init)
{
$init = $self->init;
}
else
{
$init = $default_init;
}
}
if(not defined $self->shape or not @{ $self->shape } or product(@{ $self->shape }) <= 0)
{
if($self->allow_deferred_init)
{
$self->_deferred_init([$init, $ctx, $default_init, undef]);
return;
}
confess("Cannot initialize Parameter '${\ $self->name }' because it has ".
"invalid shape: @{$self->shape//[]}.");
}
$self->_deferred_init([$init, $ctx, $default_init, undef]);
$self->_finish_deferred_init;
}
=head2 reset_ctx
Re-assign Parameter to other contexts.
:$ctx : AI::MXNet::Context or array ref of AI::MXNet::Context, default AI::MXNet::Context->current_ctx.
Assign Parameter to given context. If ctx is a list of Context, a
copy will be made for each context.
=cut
method reset_ctx(Maybe[AI::MXNet::Context|ArrayRef[AI::MXNet::Context]] :$ctx=AI::MXNet::Context->current_ctx)
{
if(blessed($ctx) and $ctx->isa('AI::MXNet::Context'))
{
$ctx = [$ctx];
}
if(defined $self->_data)
{
my $data = $self->_reduce;
AI::MXNet::AutoGrad->pause(sub {
$self->_init_impl($data, $ctx);
});
}
elsif(@{ $self->_deferred_init })
{
my ($init, undef, $default_init, $data) = @{ $self->_deferred_init };
$self->_deferred_init([$init, $ctx, $default_init, $data]);
}
else
{
confess("Cannot reset context for Parameter '${ \ $self->name }' because it ".
"has not been initialized.");
}
}
=head2 set_data
Sets this parameter's value on all contexts to data.
=cut
method set_data($data)
{
$self->shape($data->shape);
if(not defined $self->_data)
{
assert(
(@{ $self->_deferred_init }),
"Parameter '${\ $self->name }' has not been initialized"
);
$self->_deferred_init->[3] = $data;
return;
}
# if update_on_kvstore, we need to make sure the copy stored in kvstore is in sync
if($self->_trainer and $self->_trainer->_kv_initialized and $self->_trainer->update_on_kvstore)
{
if(!grep { Scalar::Util::refaddr($self) == Scalar::Util::refaddr($_) } @{ $self->_trainer->_params_to_init })
{
$self->_trainer->_reset_kvstore();
}
}
for my $arr (@{ $self->_check_and_get($self->_data, []) })
{
$arr .= $data;
}
}
=head2 row_sparse_data
Returns a copy of the 'row_sparse' parameter on the same context as row_id's.
The copy only retains rows whose ids occur in provided row ids.
The parameter must have been initialized on this context before.
Parameters
----------
$row_id: AI::MXNet::NDArray
Row ids to retain for the 'row_sparse' parameter.
Returns
-------
AI::MXNet::NDArray on row_id's context
=cut
method row_sparse_data(AI::MXNet::NDArray $row_id)
{
if($self->stype ne 'row_sparse')
{
confess(
"Cannot return a copy of Parameter ${\ $self->name } via row_sparse_data() ".
"because its storage type is ${\ $self->stype }. Please use data() instead."
);
}
return $self->_get_row_sparse($self->_data, $row_id->context, $row_id);
}
=head2 list_row_sparse_data
Returns copies of the 'row_sparse' parameter on all contexts, in the same order
as creation. The copy only retains rows whose ids occur in provided row ids.
The parameter must have been initialized before.
Parameters
----------
$row_id: AI::MXNet::NDArray
Row ids to retain for the 'row_sparse' parameter.
Returns
-------
array ref of AI::MXNet::NDArrays
=cut
method list_row_sparse_data(AI::MXNet::NDArray $row_id)
{
if($self->stype ne 'row_sparse')
{
confess(
"Cannot return copies of Parameter '${\ $self->name }' on all contexts via ".
"list_row_sparse_data() because its storage type is ${\ $self->stype }. Please ".
"use data() instead."
);
}
return $self->_get_row_sparse($self->_data, [], $row_id);
}
=head2 data
Returns a copy of this parameter on one context. Must have been
initialized on this context before. For sparse parameters, use
row_sparse_data instead.
Parameters
----------
ctx : Context
Desired context.
Returns
-------
NDArray on ctx
=cut
method data(Maybe[AI::MXNet::Context] $ctx=)
{
if($self->stype ne 'default')
{
$ctx //= AI::MXNet::Context->current_ctx;
confess(
"Cannot return a copy of Parameter '${\ $self->name }' on ctx $ctx via data() ".
"because its storage type is ${\ $self->stype }. Please use row_sparse_data() ".
"instead."
);
}
return $self->_check_and_get($self->_data, $ctx);
}
=head2 list_data
Returns copies of this parameter on all contexts, in the same order
as creation. For sparse parameters, use list_row_sparse_data
instead.
=cut
method list_data()
{
if($self->stype ne 'default')
{
confess(
"Cannot return a copies of Parameter '${\ $self->data }' on all contexts via list_data() ".
"because its storage type is ${\ $self->stype }. Please use row_sparse_data() ".
"instead."
);
}
return $self->_check_and_get($self->_data, [])
}
=head2 grad
Returns a gradient buffer for this parameter on one context.
Parameters
----------
ctx : Context
Desired context.
=cut
method grad(Maybe [AI::MXNet::Context] $ctx=)
{
if(defined $self->_data and not defined $self->_grad)
{
confess(
"Cannot get gradient array for Parameter ${\ $self->name } ".
"because grad_req='null'"
);
}
return $self->_check_and_get($self->_grad, $ctx);
}
=head2 list_grad
Returns gradient buffers on all contexts, in the same order
as 'values'.
=cut
method list_grad()
{
if(defined $self->_data and not defined $self->_grad)
{
confess(
"Cannot get gradient array for Parameter ${\ $self->name } ".
"because grad_req='null'"
);
}
return $self->_check_and_get($self->_grad, []);
}
=head2 list_ctx
Returns a list of contexts this parameter is initialized on.
=cut
method list_ctx()
{
if(not defined $self->_data)
{
if(@{ $self->_deferred_init })
{
return $self->_deferred_init->[1];
}
confess("Parameter ${\ $self->name } has not been initialized");
}
return $self->_ctx_list;
}
=head2 zero_grad
Sets gradient buffer on all contexts to 0. No action is taken if
parameter is uninitialized or doesn't require gradient.
=cut
method zero_grad()
{
return unless defined $self->_grad;
AI::MXNet::NDArray->zeros_like($_, { out => $_ }) for @{ $self->_grad };
}
=head2 var
Returns a symbol representing this parameter.
=cut
method var()
{
if(not defined $self->_var)
{
$self->_var(
AI::MXNet::Symbol->var(
$self->name, shape => $self->shape, dtype => $self->dtype,
lr_mult => $self->lr_mult, wd_mult => $self->wd_mult,
init => $self->init, stype => $self->stype
)
);
}
return $self->_var;
}
=head2 cast
Cast data and gradient of this Parameter to a new data type.
Parameters
----------
$dtype : Dtype
The new data type.
=cut
method cast(Dtype $dtype)
{
$self->dtype($dtype);
return unless defined $self->_data;
AI::MXNet::AutoGrad->pause(sub {
$self->_data([map { $_->astype($dtype) } @{ $self->_data }]);
return unless defined $self->_grad;
$self->_grad([map { $_->astype($dtype) } @{ $self->_grad }]);
AI::MXNet::AutoGrad->mark_variables($self->_data, $self->_grad, grad_reqs => $self->grad_req);
});
}
__PACKAGE__->AI::MXNet::NS::register('AI::MXNet::Gluon');
package AI::MXNet::Gluon::Constant;
use strict;
use warnings;
use Mouse;
extends 'AI::MXNet::Gluon::Parameter';
=head1 NAME
AI::MXNet::Gluon::Constant - A constant parameter for holding immutable tensors.
=cut
=head1 DESCRIPTION
A constant parameter for holding immutable tensors.
Constants are ignored by autograd and Trainer, thus their values
will not change during training. But you can still update their values
manually with the set_data method.
Constants can be created with either
$const = mx->gluon->Constant('const', [[1,2],[3,4]]);
or
package Block;
use AI::MXNet::Gluon::Mouse;
extends 'AI::MXNet::Gluon::Block';
sub BUILD
{
$self->const($self->params->get_constant('const', [[1,2],[3,4]]));
}
Constructor Attributes
----------
name : str
Name of the parameter.
value : AcceptableInput (perl array, pdl, ndarray, etc)
Initial value for the constant.
=cut
use Mouse;
use AI::MXNet::Base;
use Scalar::Util qw(refaddr);
around BUILDARGS => \&AI::MXNet::Base::process_arguments;
method python_constructor_arguments() { ['name', 'value'] }
has 'value' => (is => 'rw', isa => 'AcceptableInput');
has '+_grad_req' => (is => 'rw', default => 'null');
use overload '""' => sub {
my $self = shift;
"Constant " . $self->name.
" (shape=(" . join(', ', @{ $self->shape//[] }) .")".
", dtype=" . $self->dtype.
", stype=" . $self->stype.")"
},
fallback => 1;
sub BUILD
{
my $self = shift;
if(not (blessed $self->value and $self->value->isa('AI::MXNet::NDArray')))
{
$self->value(AI::MXNet::NDArray->array($self->value, dtype => $self->dtype));
}
$self->shape($self->value->shape);
my $init = "AI::MXNet::Gluon::Constant::Init_${\ $self->name }_${\ refaddr($self) }";
my $tmp =<<"EOP";
package $init;
use Mouse;
extends 'AI::MXNet::Initializer';
sub _init_weight
{
\$self->value->copyto(\$_[2]);
}
$init->register;
1;
EOP
eval $tmp;
$self->init($init->new);
}
method grad_req($req=)
{
if(defined $req and $req ne 'null')
{
AI::MXNet::Logging->warning(
'Constant parameter "%s" does not support '.
'grad_req other than "null", and new value "%s" '.
'is ignored.',
$self->name, $req
);
}
return 'null';
}
package AI::MXNet::Gluon::ParameterDict;
use AI::MXNet::Base;
=head1 NAME
AI::MXNet::Gluon::ParameterDict - A dictionary managing a set of parameters.
=cut
=head1 DESCRIPTION
Parameters
----------
prefix : str, default ''
The prefix to be prepended to all Parameters' names created by this dict.
shared : ParameterDict or undef
If not undef, when this dict's `get` method creates a new parameter, will
first try to retrieve it from `shared` dict. Usually used for sharing
parameters with another `Block`.
=cut
use Mouse;
has _prefix => (is => 'ro', isa => 'Str', init_arg => 'prefix', default => '');
has _shared => (is => 'rw', isa => 'Maybe[AI::MXNet::Gluon::ParameterDict]', init_arg => 'shared');
has _params => (is => 'rw', init_arg => undef);
around BUILDARGS => \&AI::MXNet::Base::process_arguments;
method python_constructor_arguments() { [qw/prefix shared/] }
sub BUILD
{
my $self = shift;
$self->_params(Hash::Ordered->new);
}
use overload
'""' => sub {
my $self = shift;
my $name = $self->_prefix ? $self->_prefix." " : '';
my $content = join("\n", map { AI::MXNet::Base::_indent(" $_", 2) } $self->values);
return "$name(\n$content\n)";
},
'@{}' => sub { my @tmp = shift->_params->as_list; \@tmp },
fallback => 1;
method items()
{
return @{$self};
}
method keys()
{
return $self->_params->keys;
}
method values()
{
return $self->_params->values;
}
method prefix()
{
$self->_prefix;
}
method params()
{
$self->_params;
}
method _get_impl($name)
{
if($self->_params->exists($name))
{
return $self->_params->get($name);
}
if(defined $self->_shared and $self->_shared->_params->exists($name))
{
$self->_params->set($name => $self->_shared->_params->get($name));
return $self->_params->get($name);
}
return undef;
}
=head2 get
Retrieves a 'AI::MXNet::Gluon::Parameter' with name '$self->prefix.$name'. If not found,
'get' will first try to retrieve it from 'shared' dict. If still not
found, 'get' will create a new 'AI::MXNet::Gluon::Parameter' with key-word arguments and
insert it to self.
Parameters
----------
name : str
Name of the desired Parameter. It will be prepended with this dictionary's
prefix.
%kwargs : hash
The rest of key-word arguments for the created `Parameter`.
Returns
-------
Parameter
The created or retrieved `Parameter`.
=cut
use Data::Dumper;
method get(Str $name, %kwargs)
{
$name = $self->prefix . $name;
my $param = $self->_get_impl($name);
if(not defined $param)
{
$param = AI::MXNet::Gluon::Parameter->new($name, %kwargs);
$self->_params->set($name => $param);
}
else
{
while(my ($k, $v) = each %kwargs)
{
if($param->can($k))
{
if(defined $param->$k)
{
my $existing = $param->$k;
if($k eq 'shape' and @{$v} == @{$existing})
{
my @inferred_shape;
my $matched = 1;
for(zip($v, $existing))
{
my ($dim1, $dim2) = @$_;
if($dim1 != $dim2 and $dim1 * $dim2 != 0)
{
$matched = 0;
last;
}
elsif($dim1 == $dim2)
{
push @inferred_shape, $dim1;
}
elsif($dim1 == 0)
{
push @inferred_shape, $dim2;
}
else
{
push @inferred_shape, $dim1;
}
}
if($matched)
{
$param->_shape(\@inferred_shape);
next;
}
}
elsif($k eq 'dtype' and ($v//'') eq ($existing//''))
{
next;
}
assert(
(not defined $v or Dumper($v) eq Dumper($param->$k)),
"Cannot retrieve Parameter $name because desired attribute ".
"does not match with stored for attribute $k: ".
"desired ".Dumper($v)." vs stored ". Dumper($param->$k)
);
}
else
{
$param->$k($v);
}
}
else
{
confess("unknown param $k, $v");
}
}
}
return $param;
}
=head2 update
Copies all Parameters in $other to self.
=cut
method update($other, Maybe[Str] $select=)
{
my @keys = $other->keys;
for my $k (grep { not defined $select or /$select/ } @keys)
{
if($self->_params->exists($k))
{
assert(
($self->_params->get($k) eq $other->_params->get($k)),
"Cannot update self with other because they have different ".
"Parameters with the same name $k"
);
}
else
{
$self->_params->set($k => $other->_params->get($k));
}
}
}
=head2 get_constant
Retrieves AI::MXNet::Gluon::Constant with name $self->prefix.$name. If not found,
'get' will first try to retrieve it from "shared" dictionary. If still not
found, 'get' will create a new Constant with key-word
arguments and insert it to self.
Parameters
----------
name : str
Name of the desired Constant. It will be prepended with this dictionary's
prefix.
value : array-like
Initial value of constant.
Returns
-------
Constant
The created or retrieved Constant.
=cut
method get_constant(Str $name, Maybe[AcceptableInput] $value=)
{
$name = $self->prefix . $name;
my $param = $self->_get_impl($name);
if(not defined $param)
{
if(not defined $value)
{
confess(
"No constant named '$name'. Please specify value ".
"if you want to create a new constant."
);
}
$param = AI::MXNet::Gluon::Constant->new($name, $value);
$self->_params->set($name, $param);
}
elsif(defined $value)
{
confess("reinit of Constant $name is not allowed");
}
return $param;
}
=head2 initialize
Initializes all Parameters managed by this dictionary to be used for 'NDArray'
API. It has no effect when using 'Symbol' API.
Parameters
----------
:$init : Initializer
Global default Initializer to be used when AI::MXNet::Gluon::Parameter->init is undef.
Otherwise, AI::MXNet::Gluon::Parameter->init takes precedence.
:$ctx : AI::MXNet::Context or array ref of AI::MXNet::Context objects
Keeps a copy of Parameters on one or many context(s).
:$force_reinit : bool, default False
Whether to force re-initialization if parameter is already initialized.
:$verbose : bool, default False
Whether to force re-initialization if parameter is already initialized.
=cut
method initialize(
Initializer :$init=AI::MXNet::Initializer->Uniform(),
Maybe[AI::MXNet::Context|ArrayRef[AI::MXNet::Context]] :$ctx=,
Bool :$verbose=0,
Bool :$force_reinit=0
)
{
if($verbose)
{
$init->set_verbosity(verbose=>$verbose);
}
$_->initialize(ctx => $ctx, default_init => $init, force_reinit => $force_reinit) for $self->values;
}
=head2 zero_grad
Sets all Parameters' gradient buffer to 0.
=cut
method zero_grad()
{
$_->zero_grad for $self->values;
}
=head2 reset_ctx
Re-assign all Parameters to other contexts.
$ctx : AI::MXNet::Context or array ref of AI::MXNet::Context objects, defaults to AI::MXNet::Context->current_ctx().
Assign Parameter to given context. If $ctx is an array ref of AI::MXNet::Context objects, a
copy will be made for each context.
=cut
method reset_ctx(AI::MXNet::Context|ArrayRef[AI::MXNet::Conetxt] $ctx=AI::MXNet::Context->current_ctx)
{
$_->reset_ctx($ctx) for $self->values;
}
=head2 setattr
Set an attribute to a new value for all Parameters.
For example, set grad_req to null if you don't need gradient w.r.t a
model's Parameters::
$model->collect_params()->setattr(grad_req => 'null');
or change the learning rate multiplier::
$model->collect_params()->setattr(lr_mult => 0.5);
Parameters
----------
$name : str
Name of the attribute.
$value : valid type for attribute name
The new value for the attribute.
=cut
method setattr($name, $value)
{
$_->$name($value) for $self->values;
}
=head2 save
Save parameters to file.
$filename : str
Path to parameter file.
$strip_prefix : str, default ''
Strip prefix from parameter names before saving.
=cut
method save(Str $filename, Str $strip_prefix='')
{
my %arg_dict = ();
for my $param ($self->values())
{
my $weight = $param->_reduce();
if(not $param->name =~ /^$strip_prefix/)
{
confess(
"Prefix $strip_prefix is to be striped before saving, but Parameter ".
"${\ $param->name } does not start with $strip_prefix. If you are using Block.save_params, ".
"This may be due to your Block shares parameters from other ".
"Blocks or you forgot to use `with name_scope()`` during init. ".
"Consider switching to Block.collect_params.save and ".
"Block.collect_params.load instead."
);
}
$arg_dict{ substr($param->name, length $strip_prefix) } = $weight;
}
AI::MXNet::NDArray->save($filename, \%arg_dict);
}
=head2
Load parameters from file.
$filename : str
Path to parameter file.
:$ctx : AI::MXNet::Context or array ref of AI::MXNet::Context objects
Context(s) initialize loaded parameters on.
:$allow_missing : bool, default False
Whether to silently skip loading parameters not represents in the file.
:$ignore_extra : bool, default False
Whether to silently ignore parameters from the file that are not
present in this ParameterDict.
:$restore_prefix : str, default ''
prepend prefix to names of stored parameters before loading.
=cut
method load(
Str $filename,
AI::MXNet::Context|ArrayRef[AI::MXNet::Context] :$ctx=AI::MXNet::Context->current_ctx,
Bool :$allow_missing=0,
Bool :$ignore_extra=0,
Str :$restore_prefix=''
)
{
if($restore_prefix)
{
for my $name ($self->keys())
{
assert(
($name =~ /^$restore_prefix/),
"restore_prefix is $restore_prefix but Parameters name $name does not start ".
"with $restore_prefix"
);
}
}
my $lprefix = length $restore_prefix;
my %orig_load = %{ AI::MXNet::NDArray->load($filename) };
my %arg_dict = map { my $k = $_; s/^(?:arg|aux)://; ($restore_prefix.$_, $orig_load{$k}) } keys %orig_load;
if(not $allow_missing)
{
for my $name ($self->keys())
{
assert(
(exists $arg_dict{ $name }),
sprintf("Parameter %s is missing in file %s", substr($name, $lprefix), $filename)
);
}
}
for my $name (keys %arg_dict)
{
if(not $self->_params->exists($name))
{
assert(
$ignore_extra,
sprintf(
"Parameter %s loaded from file %s is not present in ParameterDict",
substr($name, $lprefix),
$filename
)
);
next;
}
$self->_params->get($name)->_load_init($arg_dict{$name}, $ctx);
}
}
__PACKAGE__->AI::MXNet::NS::register('AI::MXNet::Gluon');
1;