python/mxnet/foo/parameter.py - mxnet-test - Git at Google

 # coding: utf-8
 # pylint: disable=
 """Neural network parameter."""

 from collections import OrderedDict
 import numpy as np

 from ..base import mx_real_t, MXNetError
 from .. import symbol, ndarray, initializer, context
 from ..context import Context
 from .. import autograd

 # pylint: disable= invalid-name
 tensor_types = (symbol.Symbol, ndarray.NDArray)
 # pylint: enable= invalid-name

 class DeferredInitializationError(MXNetError):
     """Error for unfinished deferred initialization."""
     pass

 class Parameter(object):
     """A Container holding parameters (weights) of layers.

     `Parameter` can be used with both `Symbol` and `NDArray` API. For `Symbol` API,
     `Parameter.var()` will return a `Symbol` representing this parameter. It
     can then be used for composing networks::
         x = mx.sym.Variable('data')
         w = mx.nn.Parameter('fc_weight', init=mx.init.Xavier())
         b = mx.nn.Parameter('fc_bias', init=mx.init.Zero())
         out = mx.sym.FullyConnected(x, w.var(), b.var(), num_hidden=64)

     For `NDArray` API, `Parameter` must be initialized with `Parameter.init`. It
     will then hold a copy of the the parameter on each `Context`. If `grad_req` is
     not `null`, it will also hold a gradient array on each `Context`::
         ctx = mx.gpu(0)
         x = mx.nd.zeros((16, 100), ctx=ctx)
         w = mx.nn.Parameter('fc_weight', shape=(64, 100), init=mx.init.Xavier())
         b = mx.nn.Parameter('fc_bias', shape(64,), init=mx.init.Zero())
         w.initialize(ctx=ctx)
         b.initialize(ctx=ctx)
         out = mx.nd.FullyConnected(x, w.value(ctx), b.value(ctx), num_hidden=64)

     Parameters
     ----------
     name : str
         Name of this parameter.
     grad_req : {'write', 'add', 'null'}, default 'write'
         Specifies how to update gradient to grad arrays.

         - 'write' means everytime gradient is written to grad `NDArray`.
         - 'add' means everytime gradient is added to the grad `NDArray`. You need
           to manually call `zero_grad()` to clear the gradient buffer before each
           iteration when using this option.
         - 'null' means gradient is not reqested for this parameter. gradient arrays
           will not be allocated.
     shape : tuple of int, default None
         Shape of this parameter. By default shape is not specified. Parameter with
         unknown shaped can be used for `Symbol` API, but `init` will throw an error
         when using `NDArray` API.
     dtype : numpy.dtype or str, default 'float32'
         Data type of this parameter. For example, numpy.float32 or 'float32'.
     lr_mult : float, default 1.0
         Learning rate multiplier. Learning rate will be multiplied by lr_mult
         when updating this parameter with optimizer.
     wd_mult : float, default 1.0
         Weight decay multiplier (L2 regulerizer coefficient). Works similarly to lr_mult.
     init : Initializer, default None
         Initializer of this parameter. Will use the global initializer by default.
     """
     def __init__(self, name, grad_req='write', shape=None, dtype=mx_real_t,
                  lr_mult=1.0, wd_mult=1.0, init=None):
         self.name = name
         self.shape = shape
         self.dtype = dtype
         self.lr_mult = lr_mult
         self.wd_mult = wd_mult
         self.grad_req = grad_req
         self.init = init
         self._var = None
         self._data = None
         self._grad = None
         self._defered_init = ()

     def initialize(self, init=None, ctx=None, default_init=initializer.Xavier(),
                    allow_deferring=True):
         """Intialize parameter and gradient arrays. Only used for `NDArray` API.

         Parameters
         ----------
         init : Initializer
             The initializer to use. Overrides `Parameter.init` and default_init.
         ctx : Context or list of Context, defaults to `context.current_context()`.
             Initialize Parameter on given context. If ctx is a list of Context, a
             copy will be made for each context.

             .. note:: Copies are independent arrays. User is responsible for keeping
             their values consistent when updating. Normally nn.Trainer does this for you.
         default_init : Initializer
             Default initializer is used when both `init` and `Parameter.init` are None.
         """
         if ctx is None:
             ctx = [context.current_context()]
         if isinstance(ctx, Context):
             ctx = [ctx]

         if self.shape is None or np.prod(self.shape) <= 0:
             if allow_deferring:
                 self._defered_init = (init, ctx, default_init)
                 return
             raise ValueError("Cannot initialize Parameter %s because it has " \
                              "invalid shape: %s. Please specify in_units, " \
                              "in_filters, num_features etc for Layers or " \
                              "set allow_deferring to True to defer initialization " \
                              "to first forward pass."%(self.name, str(self.shape)))

         self._defered_init = (init, ctx, default_init)
         self._finish_deferred_init()

     def _load_init(self, data, ctx):
         """(Re)init by loading from data."""
         if self.shape:
             for i, j in zip(self.shape, data.shape):
                 assert i == 0 or i == j, \
                     "Failed loading Parameter %s from saved params: " \
                     "shape incompatible expacted %s vs saved %s"%(
                         self.name, str(self.shape), str(data.shape))
         if self.dtype:
             assert np.dtype(self.dtype).type == data.dtype, \
                 "Failed loading Parameter %s from saved params: " \
                 "dtype incompatible expacted %s vs saved %s"%(
                     self.name, str(self.dtype), str(data.dtype))
         if isinstance(ctx, Context):
             ctx = [ctx]
         if self._data is None:
             if self._defered_init:
                 assert set(ctx) == set(self._defered_init[1]), \
                     "Failed to load Parameter %s on %s because it was " \
                     "previous initialized on %s."%(
                         self.name, str(ctx), str(self.list_ctx()))
             self._init_impl(data, ctx)
         else:
             assert set(ctx) == set(self.list_ctx()), \
                 "Failed to load Parameter %s on %s because it was " \
                 "previous initialized on %s."%(
                     self.name, str(ctx), str(self.list_ctx()))
             self.set_data(data)
         self._defered_init = ()

     def _finish_deferred_init(self):
         """Finish deferred initialization."""
         if not self._defered_init:
             return
         init, ctx, default_init = self._defered_init
         self._defered_init = ()
         assert self.shape is not None and np.prod(self.shape) > 0, \
             "Cannot initialize Parameter %s because it has " \
             "invalid shape: %s. Please specify in_units, " \
             "in_filters, num_features etc for Layers."%(
                 self.name, str(self.shape))

         with autograd.test_section():
             data = ndarray.zeros(shape=self.shape, dtype=self.dtype,
                                  ctx=context.cpu())
             if init is None:
                 init = self.init
             initializer.create(default_init)(
                 initializer.InitDesc(self.name, {'__init__': init}), data)

             self._init_impl(data, ctx)

     def _init_impl(self, data, ctx):
         """Set data and grad."""
         self._data = OrderedDict()
         for i in ctx:
             self._data[i] = data.copyto(i)

         if self.grad_req == 'null':
             self._grad = None
             return

         self._grad = OrderedDict()
         for i in ctx:
             self._grad[i] = ndarray.zeros_like(self._data[i])

         autograd.mark_variables(self.list_data(), self.list_grad(), self.grad_req)

     def set_data(self, data):
         """Set this parameter's value on all contexts to data."""
         assert self._data is not None, \
             "Parameter %s has not been initialized"%self.name
         for arr in self.list_data():
             arr[:] = data

     def _check_initialized(self, ctx=None):
         if self._data is not None:
             if ctx is not None and ctx not in self._data:
                 raise RuntimeError(
                     "Parameter %s was not initialized on context %s. "
                     "It was only initialized on %s."%(
                         self.name, str(ctx), str(self.list_ctx())))
             return
         if self._defered_init:
             raise DeferredInitializationError
         raise RuntimeError(
             "Parameter %s has not been initialized. Note that " \
             "you should initialize parameters and create Trainer " \
             "with Layer.all_params() instead of Layer.params " \
             "because the later does not include parameters of " \
             "nested child layers "%(self.name))

     def data(self, ctx=None):
         """Returns a copy of this parameter on one context. Must have been
         intialized on this context before.

         Parameters
         ----------
         ctx : Context
             Desired context.

         Returns
         -------
         NDArray on ctx
         """
         if ctx is None:
             ctx = context.current_context()
         self._check_initialized(ctx)
         return self._data[ctx]

     def list_data(self):
         """Returns copies of this parameter on all contexts, in the same order
         as creation."""
         self._check_initialized()
         return list(self._data.values())

     def grad(self, ctx=None):
         """Returns a gradient buffer for this parameter on one context.

         Parameters
         ----------
         ctx : Context
             Desired context.
         """
         if ctx is None:
             ctx = context.current_context()
         self._check_initialized(ctx)
         if self._grad is None:
             raise RuntimeError(
                 "Cannot get gradient array for Parameter %s " \
                 "because grad_req='null'"%(self.name))
         return self._grad[ctx]

     def list_grad(self):
         """Returns gradient buffers on all contexts, in the same order
         as `values`."""
         self._check_initialized()
         assert self._grad is not None, \
             "Parameter %s does not have gradients because grad_req='null'"%self.name
         return list(self._grad.values())

     def list_ctx(self):
         """Returns a list of contexts this parameter is initialized on"""
         if self._data is None:
             if self._defered_init:
                 return self._defered_init[1]
             raise RuntimeError("Parameter %s has not been initialized"%self.name)
         return list(self._data.keys())

     def zero_grad(self):
         """Set gradient buffer on all contexts to 0. No action is taken if
         parameter is uninitialized or doesn't require gradient."""
         if self._grad is None:
             return
         for i in self._grad:
             i[:] = 0

     def var(self):
         """Returns a symbol representing this parameter."""
         if self._var is None:
             self._var = symbol.var(self.name, shape=self.shape, dtype=self.dtype,
                                    lr_mult=self.lr_mult, wd_mult=self.wd_mult,
                                    init=self.init)
         return self._var


 class ParameterDict(object):
     """A dictionary managing a set of parameters.

     Parameters
     ----------
     prefix : str, default ''
         The prefix to be prepended to all Parameters' name created by this dict.
     shared : ParameterDict or None
         If not None, when this dict's get method creates a new parameter, will
         first try to retrieve it from `shared` dict. Usually used for sharing
         parameters with another layer.
     """
     def __init__(self, prefix='', shared=None):
         self._prefix = prefix
         self._params = {}
         self._shared = shared

     def __getitem__(self, key):
         return self._params[key]

     def items(self):
         return self._params.items()

     def keys(self):
         return self._params.keys()

     def values(self):
         return self._params.values()

     @property
     def prefix(self):
         """Prefix of this dict. It will be prepended to Parameters' name created
         with `get`"""
         return self._prefix

     def _get_impl(self, name):
         if name in self._params:
             return self._params[name]
         if self._shared is not None and name in self._shared._params:
             return self._shared._params[name]
         return None

     def get(self, name, **kwargs):
         """Retrieve a Parameter with name `self.prefix+name`. If not found,
         `get` will first try to retrive it from `shared` dict. If still not
         found, `get` will create a new Parameter with key-word arguments and
         insert it to self.

         Parameters
         ----------
         name : str
             name of the desired Parameter. It will be prepended with this dictionary's
             prefix.
         **kwargs : dict
             The rest of key-word arguments for the created Parameter.

         Returns
         -------
         Parameter
             The created or retrieved Parameter.
         """
         name = self.prefix + name
         param = self._get_impl(name)
         if param is None:
             param = Parameter(name, **kwargs)
             self._params[name] = param
         else:
             for k, v in kwargs.items():
                 if hasattr(param, k) and getattr(param, k) is not None:
                     assert v is None or v == getattr(param, k), \
                         "Cannot retrieve Parameter %s because desired attribute " \
                         "does not match with stored for attribute %s: " \
                         "desired %s vs stored %s."%(
                             name, k, str(v), str(getattr(param, k)))
                 else:
                     setattr(param, k, v)
         return param

     def update(self, other):
         """Copy all Parameters in `other` to self."""
         for k, v in other.items():
             if k in self._params:
                 assert self._params[k] is v, \
                     "Cannot update self with other because they have different " \
                     "Parameters with the same name %s"%k
             else:
                 self._params[k] = v

     def initialize(self, init=initializer.Xavier(), ctx=None):
         """Intialize all Parameters manage by this dictionary to be used for `NDArray`
         API. Has no effect when using `Symbol` API.

         Parameters
         ----------
         init : Initializer
             Global default Initializer to be used when `Parameter.init` is None.
             Otherwise `Parameter.init` takes precedence.
         ctx : Context or list of Context
             Keep a copy of Parameters on one or many context(s).
         """
         for _, v in self.items():
             v.initialize(None, ctx, init)

     def zero_grad(self):
         """Set all Parameters' gradient buffer to 0."""
         for i in self.values():
             i.zero_grad()

     def save(self, filename):
         arg_dict = {}
         for param in self.values():
             block = param.list_data()
             weight = sum(w.copyto(context.cpu()) for w in block) / len(block)
             arg_dict[param.name] = weight
         ndarray.save(filename, arg_dict)

     def load(self, filename, ctx, allow_missing=False, ignore_extra=False):
         arg_dict = ndarray.load(filename)
         if not allow_missing:
             for name in self.keys():
                 assert name in arg_dict, \
                     "Parameter %s is missing in file %s"%(name, filename)
         for name in arg_dict:
             if name not in self._params:
                 assert ignore_extra, \
                     "Parameter %s loaded from file %s is not present in ParameterDict"%(
                         name, filename)
                 continue
             self[name]._load_init(arg_dict[name], ctx)
	# coding: utf-8
	# pylint: disable=
	"""Neural network parameter."""

	from collections import OrderedDict
	import numpy as np

	from ..base import mx_real_t, MXNetError
	from .. import symbol, ndarray, initializer, context
	from ..context import Context
	from .. import autograd

	# pylint: disable= invalid-name
	tensor_types = (symbol.Symbol, ndarray.NDArray)
	# pylint: enable= invalid-name

	class DeferredInitializationError(MXNetError):
	"""Error for unfinished deferred initialization."""
	pass

	class Parameter(object):
	"""A Container holding parameters (weights) of layers.

	`Parameter` can be used with both `Symbol` and `NDArray` API. For `Symbol` API,
	`Parameter.var()` will return a `Symbol` representing this parameter. It
	can then be used for composing networks::
	x = mx.sym.Variable('data')
	w = mx.nn.Parameter('fc_weight', init=mx.init.Xavier())
	b = mx.nn.Parameter('fc_bias', init=mx.init.Zero())
	out = mx.sym.FullyConnected(x, w.var(), b.var(), num_hidden=64)

	For `NDArray` API, `Parameter` must be initialized with `Parameter.init`. It
	will then hold a copy of the the parameter on each `Context`. If `grad_req` is
	not `null`, it will also hold a gradient array on each `Context`::
	ctx = mx.gpu(0)
	x = mx.nd.zeros((16, 100), ctx=ctx)
	w = mx.nn.Parameter('fc_weight', shape=(64, 100), init=mx.init.Xavier())
	b = mx.nn.Parameter('fc_bias', shape(64,), init=mx.init.Zero())
	w.initialize(ctx=ctx)
	b.initialize(ctx=ctx)
	out = mx.nd.FullyConnected(x, w.value(ctx), b.value(ctx), num_hidden=64)

	Parameters
	----------
	name : str
	Name of this parameter.
	grad_req : {'write', 'add', 'null'}, default 'write'
	Specifies how to update gradient to grad arrays.

	- 'write' means everytime gradient is written to grad `NDArray`.
	- 'add' means everytime gradient is added to the grad `NDArray`. You need
	to manually call `zero_grad()` to clear the gradient buffer before each
	iteration when using this option.
	- 'null' means gradient is not reqested for this parameter. gradient arrays
	will not be allocated.
	shape : tuple of int, default None
	Shape of this parameter. By default shape is not specified. Parameter with
	unknown shaped can be used for `Symbol` API, but `init` will throw an error
	when using `NDArray` API.
	dtype : numpy.dtype or str, default 'float32'
	Data type of this parameter. For example, numpy.float32 or 'float32'.
	lr_mult : float, default 1.0
	Learning rate multiplier. Learning rate will be multiplied by lr_mult
	when updating this parameter with optimizer.
	wd_mult : float, default 1.0
	Weight decay multiplier (L2 regulerizer coefficient). Works similarly to lr_mult.
	init : Initializer, default None
	Initializer of this parameter. Will use the global initializer by default.
	"""
	def __init__(self, name, grad_req='write', shape=None, dtype=mx_real_t,
	lr_mult=1.0, wd_mult=1.0, init=None):
	self.name = name
	self.shape = shape
	self.dtype = dtype
	self.lr_mult = lr_mult
	self.wd_mult = wd_mult
	self.grad_req = grad_req
	self.init = init
	self._var = None
	self._data = None
	self._grad = None
	self._defered_init = ()

	def initialize(self, init=None, ctx=None, default_init=initializer.Xavier(),
	allow_deferring=True):
	"""Intialize parameter and gradient arrays. Only used for `NDArray` API.

	Parameters
	----------
	init : Initializer
	The initializer to use. Overrides `Parameter.init` and default_init.
	ctx : Context or list of Context, defaults to `context.current_context()`.
	Initialize Parameter on given context. If ctx is a list of Context, a
	copy will be made for each context.

	.. note:: Copies are independent arrays. User is responsible for keeping
	their values consistent when updating. Normally nn.Trainer does this for you.
	default_init : Initializer
	Default initializer is used when both `init` and `Parameter.init` are None.
	"""
	if ctx is None:
	ctx = [context.current_context()]
	if isinstance(ctx, Context):
	ctx = [ctx]

	if self.shape is None or np.prod(self.shape) <= 0:
	if allow_deferring:
	self._defered_init = (init, ctx, default_init)
	return
	raise ValueError("Cannot initialize Parameter %s because it has " \
	"invalid shape: %s. Please specify in_units, " \
	"in_filters, num_features etc for Layers or " \
	"set allow_deferring to True to defer initialization " \
	"to first forward pass."%(self.name, str(self.shape)))

	self._defered_init = (init, ctx, default_init)
	self._finish_deferred_init()

	def _load_init(self, data, ctx):
	"""(Re)init by loading from data."""
	if self.shape:
	for i, j in zip(self.shape, data.shape):
	assert i == 0 or i == j, \
	"Failed loading Parameter %s from saved params: " \
	"shape incompatible expacted %s vs saved %s"%(
	self.name, str(self.shape), str(data.shape))
	if self.dtype:
	assert np.dtype(self.dtype).type == data.dtype, \
	"Failed loading Parameter %s from saved params: " \
	"dtype incompatible expacted %s vs saved %s"%(
	self.name, str(self.dtype), str(data.dtype))
	if isinstance(ctx, Context):
	ctx = [ctx]
	if self._data is None:
	if self._defered_init:
	assert set(ctx) == set(self._defered_init[1]), \
	"Failed to load Parameter %s on %s because it was " \
	"previous initialized on %s."%(
	self.name, str(ctx), str(self.list_ctx()))
	self._init_impl(data, ctx)
	else:
	assert set(ctx) == set(self.list_ctx()), \
	"Failed to load Parameter %s on %s because it was " \
	"previous initialized on %s."%(
	self.name, str(ctx), str(self.list_ctx()))
	self.set_data(data)
	self._defered_init = ()

	def _finish_deferred_init(self):
	"""Finish deferred initialization."""
	if not self._defered_init:
	return
	init, ctx, default_init = self._defered_init
	self._defered_init = ()
	assert self.shape is not None and np.prod(self.shape) > 0, \
	"Cannot initialize Parameter %s because it has " \
	"invalid shape: %s. Please specify in_units, " \
	"in_filters, num_features etc for Layers."%(
	self.name, str(self.shape))

	with autograd.test_section():
	data = ndarray.zeros(shape=self.shape, dtype=self.dtype,
	ctx=context.cpu())
	if init is None:
	init = self.init
	initializer.create(default_init)(
	initializer.InitDesc(self.name, {'__init__': init}), data)

	self._init_impl(data, ctx)

	def _init_impl(self, data, ctx):
	"""Set data and grad."""
	self._data = OrderedDict()
	for i in ctx:
	self._data[i] = data.copyto(i)

	if self.grad_req == 'null':
	self._grad = None
	return

	self._grad = OrderedDict()
	for i in ctx:
	self._grad[i] = ndarray.zeros_like(self._data[i])

	autograd.mark_variables(self.list_data(), self.list_grad(), self.grad_req)

	def set_data(self, data):
	"""Set this parameter's value on all contexts to data."""
	assert self._data is not None, \
	"Parameter %s has not been initialized"%self.name
	for arr in self.list_data():
	arr[:] = data

	def _check_initialized(self, ctx=None):
	if self._data is not None:
	if ctx is not None and ctx not in self._data:
	raise RuntimeError(
	"Parameter %s was not initialized on context %s. "
	"It was only initialized on %s."%(
	self.name, str(ctx), str(self.list_ctx())))
	return
	if self._defered_init:
	raise DeferredInitializationError
	raise RuntimeError(
	"Parameter %s has not been initialized. Note that " \
	"you should initialize parameters and create Trainer " \
	"with Layer.all_params() instead of Layer.params " \
	"because the later does not include parameters of " \
	"nested child layers "%(self.name))

	def data(self, ctx=None):
	"""Returns a copy of this parameter on one context. Must have been
	intialized on this context before.

	Parameters
	----------
	ctx : Context
	Desired context.

	Returns
	-------
	NDArray on ctx
	"""
	if ctx is None:
	ctx = context.current_context()
	self._check_initialized(ctx)
	return self._data[ctx]

	def list_data(self):
	"""Returns copies of this parameter on all contexts, in the same order
	as creation."""
	self._check_initialized()
	return list(self._data.values())

	def grad(self, ctx=None):
	"""Returns a gradient buffer for this parameter on one context.

	Parameters
	----------
	ctx : Context
	Desired context.
	"""
	if ctx is None:
	ctx = context.current_context()
	self._check_initialized(ctx)
	if self._grad is None:
	raise RuntimeError(
	"Cannot get gradient array for Parameter %s " \
	"because grad_req='null'"%(self.name))
	return self._grad[ctx]

	def list_grad(self):
	"""Returns gradient buffers on all contexts, in the same order
	as `values`."""
	self._check_initialized()
	assert self._grad is not None, \
	"Parameter %s does not have gradients because grad_req='null'"%self.name
	return list(self._grad.values())

	def list_ctx(self):
	"""Returns a list of contexts this parameter is initialized on"""
	if self._data is None:
	if self._defered_init:
	return self._defered_init[1]
	raise RuntimeError("Parameter %s has not been initialized"%self.name)
	return list(self._data.keys())

	def zero_grad(self):
	"""Set gradient buffer on all contexts to 0. No action is taken if
	parameter is uninitialized or doesn't require gradient."""
	if self._grad is None:
	return
	for i in self._grad:
	i[:] = 0

	def var(self):
	"""Returns a symbol representing this parameter."""
	if self._var is None:
	self._var = symbol.var(self.name, shape=self.shape, dtype=self.dtype,
	lr_mult=self.lr_mult, wd_mult=self.wd_mult,
	init=self.init)
	return self._var


	class ParameterDict(object):
	"""A dictionary managing a set of parameters.

	Parameters
	----------
	prefix : str, default ''
	The prefix to be prepended to all Parameters' name created by this dict.
	shared : ParameterDict or None
	If not None, when this dict's get method creates a new parameter, will
	first try to retrieve it from `shared` dict. Usually used for sharing
	parameters with another layer.
	"""
	def __init__(self, prefix='', shared=None):
	self._prefix = prefix
	self._params = {}
	self._shared = shared

	def __getitem__(self, key):
	return self._params[key]

	def items(self):
	return self._params.items()

	def keys(self):
	return self._params.keys()

	def values(self):
	return self._params.values()

	@property
	def prefix(self):
	"""Prefix of this dict. It will be prepended to Parameters' name created
	with `get`"""
	return self._prefix

	def _get_impl(self, name):
	if name in self._params:
	return self._params[name]
	if self._shared is not None and name in self._shared._params:
	return self._shared._params[name]
	return None

	def get(self, name, **kwargs):
	"""Retrieve a Parameter with name `self.prefix+name`. If not found,
	`get` will first try to retrive it from `shared` dict. If still not
	found, `get` will create a new Parameter with key-word arguments and
	insert it to self.

	Parameters
	----------
	name : str
	name of the desired Parameter. It will be prepended with this dictionary's
	prefix.
	**kwargs : dict
	The rest of key-word arguments for the created Parameter.

	Returns
	-------
	Parameter
	The created or retrieved Parameter.
	"""
	name = self.prefix + name
	param = self._get_impl(name)
	if param is None:
	param = Parameter(name, **kwargs)
	self._params[name] = param
	else:
	for k, v in kwargs.items():
	if hasattr(param, k) and getattr(param, k) is not None:
	assert v is None or v == getattr(param, k), \
	"Cannot retrieve Parameter %s because desired attribute " \
	"does not match with stored for attribute %s: " \
	"desired %s vs stored %s."%(
	name, k, str(v), str(getattr(param, k)))
	else:
	setattr(param, k, v)
	return param

	def update(self, other):
	"""Copy all Parameters in `other` to self."""
	for k, v in other.items():
	if k in self._params:
	assert self._params[k] is v, \
	"Cannot update self with other because they have different " \
	"Parameters with the same name %s"%k
	else:
	self._params[k] = v

	def initialize(self, init=initializer.Xavier(), ctx=None):
	"""Intialize all Parameters manage by this dictionary to be used for `NDArray`
	API. Has no effect when using `Symbol` API.

	Parameters
	----------
	init : Initializer
	Global default Initializer to be used when `Parameter.init` is None.
	Otherwise `Parameter.init` takes precedence.
	ctx : Context or list of Context
	Keep a copy of Parameters on one or many context(s).
	"""
	for _, v in self.items():
	v.initialize(None, ctx, init)

	def zero_grad(self):
	"""Set all Parameters' gradient buffer to 0."""
	for i in self.values():
	i.zero_grad()

	def save(self, filename):
	arg_dict = {}
	for param in self.values():
	block = param.list_data()
	weight = sum(w.copyto(context.cpu()) for w in block) / len(block)
	arg_dict[param.name] = weight
	ndarray.save(filename, arg_dict)

	def load(self, filename, ctx, allow_missing=False, ignore_extra=False):
	arg_dict = ndarray.load(filename)
	if not allow_missing:
	for name in self.keys():
	assert name in arg_dict, \
	"Parameter %s is missing in file %s"%(name, filename)
	for name in arg_dict:
	if name not in self._params:
	assert ignore_extra, \
	"Parameter %s loaded from file %s is not present in ParameterDict"%(
	name, filename)
	continue
	self[name]._load_init(arg_dict[name], ctx)