blob: 28d28f4b400093045c89771902bbd8f9305eb943 [file]
# coding: utf-8
# pylint: disable=
"""Neural network parameter."""
from collections import OrderedDict
import numpy as np
from ..base import mx_real_t, MXNetError
from .. import symbol, ndarray, initializer, context
from ..context import Context
from .. import autograd
# pylint: disable= invalid-name
tensor_types = (symbol.Symbol, ndarray.NDArray)
# pylint: enable= invalid-name
class DeferredInitializationError(MXNetError):
"""Error for unfinished deferred initialization."""
pass
class Parameter(object):
"""A Container holding parameters (weights) of layers.
`Parameter` can be used with both `Symbol` and `NDArray` API. For `Symbol` API,
`Parameter.var()` will return a `Symbol` representing this parameter. It
can then be used for composing networks::
x = mx.sym.Variable('data')
w = mx.nn.Parameter('fc_weight', init=mx.init.Xavier())
b = mx.nn.Parameter('fc_bias', init=mx.init.Zero())
out = mx.sym.FullyConnected(x, w.var(), b.var(), num_hidden=64)
For `NDArray` API, `Parameter` must be initialized with `Parameter.init`. It
will then hold a copy of the the parameter on each `Context`. If `grad_req` is
not `null`, it will also hold a gradient array on each `Context`::
ctx = mx.gpu(0)
x = mx.nd.zeros((16, 100), ctx=ctx)
w = mx.nn.Parameter('fc_weight', shape=(64, 100), init=mx.init.Xavier())
b = mx.nn.Parameter('fc_bias', shape(64,), init=mx.init.Zero())
w.initialize(ctx=ctx)
b.initialize(ctx=ctx)
out = mx.nd.FullyConnected(x, w.value(ctx), b.value(ctx), num_hidden=64)
Parameters
----------
name : str
Name of this parameter.
grad_req : {'write', 'add', 'null'}, default 'write'
Specifies how to update gradient to grad arrays.
- 'write' means everytime gradient is written to grad `NDArray`.
- 'add' means everytime gradient is added to the grad `NDArray`. You need
to manually call `zero_grad()` to clear the gradient buffer before each
iteration when using this option.
- 'null' means gradient is not reqested for this parameter. gradient arrays
will not be allocated.
shape : tuple of int, default None
Shape of this parameter. By default shape is not specified. Parameter with
unknown shaped can be used for `Symbol` API, but `init` will throw an error
when using `NDArray` API.
dtype : numpy.dtype or str, default 'float32'
Data type of this parameter. For example, numpy.float32 or 'float32'.
lr_mult : float, default 1.0
Learning rate multiplier. Learning rate will be multiplied by lr_mult
when updating this parameter with optimizer.
wd_mult : float, default 1.0
Weight decay multiplier (L2 regulerizer coefficient). Works similarly to lr_mult.
init : Initializer, default None
Initializer of this parameter. Will use the global initializer by default.
"""
def __init__(self, name, grad_req='write', shape=None, dtype=mx_real_t,
lr_mult=1.0, wd_mult=1.0, init=None):
self.name = name
self.shape = shape
self.dtype = dtype
self.lr_mult = lr_mult
self.wd_mult = wd_mult
self.grad_req = grad_req
self.init = init
self._var = None
self._data = None
self._grad = None
self._defered_init = ()
def initialize(self, init=None, ctx=None, default_init=initializer.Xavier(),
allow_deferring=True):
"""Intialize parameter and gradient arrays. Only used for `NDArray` API.
Parameters
----------
init : Initializer
The initializer to use. Overrides `Parameter.init` and default_init.
ctx : Context or list of Context, defaults to `context.current_context()`.
Initialize Parameter on given context. If ctx is a list of Context, a
copy will be made for each context.
.. note:: Copies are independent arrays. User is responsible for keeping
their values consistent when updating. Normally nn.Trainer does this for you.
default_init : Initializer
Default initializer is used when both `init` and `Parameter.init` are None.
"""
if ctx is None:
ctx = [context.current_context()]
if isinstance(ctx, Context):
ctx = [ctx]
if self.shape is None or np.prod(self.shape) <= 0:
if allow_deferring:
self._defered_init = (init, ctx, default_init)
return
raise ValueError("Cannot initialize Parameter %s because it has " \
"invalid shape: %s. Please specify in_units, " \
"in_filters, num_features etc for Layers or " \
"set allow_deferring to True to defer initialization " \
"to first forward pass."%(self.name, str(self.shape)))
self._defered_init = (init, ctx, default_init)
self._finish_deferred_init()
def _load_init(self, data, ctx):
"""(Re)init by loading from data."""
if self.shape:
for i, j in zip(self.shape, data.shape):
assert i == 0 or i == j, \
"Failed loading Parameter %s from saved params: " \
"shape incompatible expacted %s vs saved %s"%(
self.name, str(self.shape), str(data.shape))
if self.dtype:
assert np.dtype(self.dtype).type == data.dtype, \
"Failed loading Parameter %s from saved params: " \
"dtype incompatible expacted %s vs saved %s"%(
self.name, str(self.dtype), str(data.dtype))
if isinstance(ctx, Context):
ctx = [ctx]
if self._data is None:
if self._defered_init:
assert set(ctx) == set(self._defered_init[1]), \
"Failed to load Parameter %s on %s because it was " \
"previous initialized on %s."%(
self.name, str(ctx), str(self.list_ctx()))
self._init_impl(data, ctx)
else:
assert set(ctx) == set(self.list_ctx()), \
"Failed to load Parameter %s on %s because it was " \
"previous initialized on %s."%(
self.name, str(ctx), str(self.list_ctx()))
self.set_data(data)
self._defered_init = ()
def _finish_deferred_init(self):
"""Finish deferred initialization."""
if not self._defered_init:
return
init, ctx, default_init = self._defered_init
self._defered_init = ()
assert self.shape is not None and np.prod(self.shape) > 0, \
"Cannot initialize Parameter %s because it has " \
"invalid shape: %s. Please specify in_units, " \
"in_filters, num_features etc for Layers."%(
self.name, str(self.shape))
with autograd.test_section():
data = ndarray.zeros(shape=self.shape, dtype=self.dtype,
ctx=context.cpu())
if init is None:
init = self.init
initializer.create(default_init)(
initializer.InitDesc(self.name, {'__init__': init}), data)
self._init_impl(data, ctx)
def _init_impl(self, data, ctx):
"""Set data and grad."""
self._data = OrderedDict()
for i in ctx:
self._data[i] = data.copyto(i)
if self.grad_req == 'null':
self._grad = None
return
self._grad = OrderedDict()
for i in ctx:
self._grad[i] = ndarray.zeros_like(self._data[i])
autograd.mark_variables(self.list_data(), self.list_grad(), self.grad_req)
def set_data(self, data):
"""Set this parameter's value on all contexts to data."""
assert self._data is not None, \
"Parameter %s has not been initialized"%self.name
for arr in self.list_data():
arr[:] = data
def _check_initialized(self, ctx=None):
if self._data is not None:
if ctx is not None and ctx not in self._data:
raise RuntimeError(
"Parameter %s was not initialized on context %s. "
"It was only initialized on %s."%(
self.name, str(ctx), str(self.list_ctx())))
return
if self._defered_init:
raise DeferredInitializationError
raise RuntimeError(
"Parameter %s has not been initialized. Note that " \
"you should initialize parameters and create Trainer " \
"with Layer.all_params() instead of Layer.params " \
"because the later does not include parameters of " \
"nested child layers "%(self.name))
def data(self, ctx=None):
"""Returns a copy of this parameter on one context. Must have been
intialized on this context before.
Parameters
----------
ctx : Context
Desired context.
Returns
-------
NDArray on ctx
"""
if ctx is None:
ctx = context.current_context()
self._check_initialized(ctx)
return self._data[ctx]
def list_data(self):
"""Returns copies of this parameter on all contexts, in the same order
as creation."""
self._check_initialized()
return list(self._data.values())
def grad(self, ctx=None):
"""Returns a gradient buffer for this parameter on one context.
Parameters
----------
ctx : Context
Desired context.
"""
if ctx is None:
ctx = context.current_context()
self._check_initialized(ctx)
if self._grad is None:
raise RuntimeError(
"Cannot get gradient array for Parameter %s " \
"because grad_req='null'"%(self.name))
return self._grad[ctx]
def list_grad(self):
"""Returns gradient buffers on all contexts, in the same order
as `values`."""
self._check_initialized()
assert self._grad is not None, \
"Parameter %s does not have gradients because grad_req='null'"%self.name
return list(self._grad.values())
def list_ctx(self):
"""Returns a list of contexts this parameter is initialized on"""
if self._data is None:
if self._defered_init:
return self._defered_init[1]
raise RuntimeError("Parameter %s has not been initialized"%self.name)
return list(self._data.keys())
def zero_grad(self):
"""Set gradient buffer on all contexts to 0. No action is taken if
parameter is uninitialized or doesn't require gradient."""
if self._grad is None:
return
for i in self._grad:
i[:] = 0
def var(self):
"""Returns a symbol representing this parameter."""
if self._var is None:
self._var = symbol.var(self.name, shape=self.shape, dtype=self.dtype,
lr_mult=self.lr_mult, wd_mult=self.wd_mult,
init=self.init)
return self._var
class ParameterDict(object):
"""A dictionary managing a set of parameters.
Parameters
----------
prefix : str, default ''
The prefix to be prepended to all Parameters' name created by this dict.
shared : ParameterDict or None
If not None, when this dict's get method creates a new parameter, will
first try to retrieve it from `shared` dict. Usually used for sharing
parameters with another layer.
"""
def __init__(self, prefix='', shared=None):
self._prefix = prefix
self._params = {}
self._shared = shared
def __getitem__(self, key):
return self._params[key]
def items(self):
return self._params.items()
def keys(self):
return self._params.keys()
def values(self):
return self._params.values()
@property
def prefix(self):
"""Prefix of this dict. It will be prepended to Parameters' name created
with `get`"""
return self._prefix
def _get_impl(self, name):
if name in self._params:
return self._params[name]
if self._shared is not None and name in self._shared._params:
return self._shared._params[name]
return None
def get(self, name, **kwargs):
"""Retrieve a Parameter with name `self.prefix+name`. If not found,
`get` will first try to retrive it from `shared` dict. If still not
found, `get` will create a new Parameter with key-word arguments and
insert it to self.
Parameters
----------
name : str
name of the desired Parameter. It will be prepended with this dictionary's
prefix.
**kwargs : dict
The rest of key-word arguments for the created Parameter.
Returns
-------
Parameter
The created or retrieved Parameter.
"""
name = self.prefix + name
param = self._get_impl(name)
if param is None:
param = Parameter(name, **kwargs)
self._params[name] = param
else:
for k, v in kwargs.items():
if hasattr(param, k) and getattr(param, k) is not None:
assert v is None or v == getattr(param, k), \
"Cannot retrieve Parameter %s because desired attribute " \
"does not match with stored for attribute %s: " \
"desired %s vs stored %s."%(
name, k, str(v), str(getattr(param, k)))
else:
setattr(param, k, v)
return param
def update(self, other):
"""Copy all Parameters in `other` to self."""
for k, v in other.items():
if k in self._params:
assert self._params[k] is v, \
"Cannot update self with other because they have different " \
"Parameters with the same name %s"%k
else:
self._params[k] = v
def initialize(self, init=initializer.Xavier(), ctx=None):
"""Intialize all Parameters manage by this dictionary to be used for `NDArray`
API. Has no effect when using `Symbol` API.
Parameters
----------
init : Initializer
Global default Initializer to be used when `Parameter.init` is None.
Otherwise `Parameter.init` takes precedence.
ctx : Context or list of Context
Keep a copy of Parameters on one or many context(s).
"""
for _, v in self.items():
v.initialize(None, ctx, init)
def zero_grad(self):
"""Set all Parameters' gradient buffer to 0."""
for i in self.values():
i.zero_grad()
def save(self, filename):
arg_dict = {}
for param in self.values():
block = param.list_data()
weight = sum(w.copyto(context.cpu()) for w in block) / len(block)
arg_dict[param.name] = weight
ndarray.save(filename, arg_dict)
def load(self, filename, ctx, allow_missing=False, ignore_extra=False):
arg_dict = ndarray.load(filename)
if not allow_missing:
for name in self.keys():
assert name in arg_dict, \
"Parameter %s is missing in file %s"%(name, filename)
for name in arg_dict:
if name not in self._params:
assert ignore_extra, \
"Parameter %s loaded from file %s is not present in ParameterDict"%(
name, filename)
continue
self[name]._load_init(arg_dict[name], ctx)