python/mxnet/module/python_module.py - mxnet-test - Git at Google

 # pylint: disable=too-many-instance-attributes, too-many-arguments
 """Provide some handy classes for user to implement a simple computation module
 in Python easily.
 """
 import logging

 from .base_module import BaseModule
 from ..initializer import Uniform
 from .. import ndarray as nd

 class PythonModule(BaseModule):
     """A convenient module class that implements many of the module APIs as
     empty functions.

     Parameters
     ----------
     data_names : list of str
         Names of the data expected by the module.
     label_names : list of str
         Names of the labels expected by the module. Could be ``None`` if the
         module does not need labels.
     output_names : list of str
         Names of the outputs.
     """
     def __init__(self, data_names, label_names, output_names, logger=logging):
         super(PythonModule, self).__init__(logger=logger)

         if isinstance(data_names, tuple):
             data_names = list(data_names)
         if isinstance(label_names, tuple):
             label_names = list(label_names)

         self._data_names = data_names
         self._label_names = label_names
         self._output_names = output_names

         self._data_shapes = None
         self._label_shapes = None
         self._output_shapes = None

     ################################################################################
     # Symbol information
     ################################################################################
     @property
     def data_names(self):
         """A list of names for data required by this module."""
         return self._data_names

     @property
     def output_names(self):
         """A list of names for the outputs of this module."""
         return self._output_names

     ################################################################################
     # Input/Output information
     ################################################################################
     @property
     def data_shapes(self):
         """A list of (name, shape) pairs specifying the data inputs to this module."""
         return self._data_shapes

     @property
     def label_shapes(self):
         """A list of (name, shape) pairs specifying the label inputs to this module.
         If this module does not accept labels -- either it is a module without loss
         function, or it is not bound for training, then this should return an empty
         list ``[]```.
         """
         return self._label_shapes

     @property
     def output_shapes(self):
         """A list of (name, shape) pairs specifying the outputs of this module."""
         return self._output_shapes

     ################################################################################
     # Parameters of a module
     ################################################################################
     def get_params(self):
         """Get parameters, those are potentially copies of the the actual parameters used
         to do computation on the device.

         Returns
         -------
         ``({}, {})``, a pair of empty dict. Subclass should override this method if
         contains parameters.
         """
         return (dict(), dict())

     def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None,
                     allow_missing=False, force_init=False):
         """Initialize the parameters and auxiliary states. By default this function
         does nothing. Subclass should override this method if contains parameters.

         Parameters
         ----------
         initializer : Initializer
             Called to initialize parameters if needed.
         arg_params : dict
             If not ``None``, should be a dictionary of existing `arg_params`. Initialization
             will be copied from that.
         aux_params : dict
             If not ``None``, should be a dictionary of existing `aux_params`. Initialization
             will be copied from that.
         allow_missing : bool
             If ``True``, params could contain missing values, and the initializer will be
             called to fill those missing params.
         force_init : bool
             If ``True``, will force re-initialize even if already initialized.
         """
         pass

     def update(self):
         """Update parameters according to the installed optimizer and the gradients computed
         in the previous forward-backward batch. Currently we do nothing here. Subclass should
         override this method if contains parameters.
         """
         pass

     def update_metric(self, eval_metric, labels):
         """Evaluate and accumulate evaluation metric on outputs of the last forward computation.
         ubclass should override this method if needed.

         Parameters
         ----------
         eval_metric : EvalMetric
         labels : list of NDArray
             Typically ``data_batch.label``.
         """
         if self._label_shapes is None:
             # since we do not need labels, we are probably not a module with a loss
             # function or predictions, so just ignore this call
             return

         # by default we expect our outputs are some scores that could be evaluated
         eval_metric.update(labels, self.get_outputs())

     ################################################################################
     # module setup
     ################################################################################
     def bind(self, data_shapes, label_shapes=None, for_training=True,
              inputs_need_grad=False, force_rebind=False, shared_module=None,
              grad_req='write'):
         """Bind the symbols to construct executors. This is necessary before one
         can perform computation with the module.

         Parameters
         ----------
         data_shapes : list of (str, tuple)
             Typically is ``data_iter.provide_data``.
         label_shapes : list of (str, tuple)
             Typically is ``data_iter.provide_label``.
         for_training : bool
             Default is ``True``. Whether the executors should be bind for training.
         inputs_need_grad : bool
             Default is ``False``. Whether the gradients to the input data need to be computed.
             Typically this is not needed. But this might be needed when implementing composition
             of modules.
         force_rebind : bool
             Default is ``False``. This function does nothing if the executors are already
             bound. But with this ``True``, the executors will be forced to rebind.
         shared_module : Module
             Default is ``None``. This is used in bucketing. When not ``None``, the shared module
             essentially corresponds to a different bucket -- a module with different symbol
             but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
         grad_req : str, list of str, dict of str to str
             Requirement for gradient accumulation. Can be 'write', 'add', or 'null'
             (default to 'write').
             Can be specified globally (str) or for each argument (list, dict).
         """
         if self.binded and not force_rebind:
             self.logger.warning('Already bound, ignoring bind()')
             return

         assert grad_req == 'write', "Python module only support write gradient"
         self.for_training = for_training
         self.inputs_need_grad = inputs_need_grad

         assert len(data_shapes) == len(self._data_names)
         assert [x[0] for x in data_shapes] == self._data_names
         self._data_shapes = data_shapes

         self._label_shapes = label_shapes
         if label_shapes is not None:
             assert self._label_names is not None
             assert len(self._label_names) == len(label_shapes)
             assert [x[0] for x in label_shapes] == self._label_names

         self._output_shapes = self._compute_output_shapes()

     def _compute_output_shapes(self):
         """The subclass should implement this method to compute the shape of
         outputs. This method can assume that the ``data_shapes`` and ``label_shapes``
         are already initialized.
         """
         raise NotImplementedError()

     def init_optimizer(self, kvstore='local', optimizer='sgd',
                        optimizer_params=(('learning_rate', 0.01),), force_init=False):
         """Install and initialize optimizers. By default we do nothing. Subclass
         should

         Parameters
         ----------
         kvstore : str or KVStore
             Default `'local'`.
         optimizer : str or Optimizer
             Default `'sgd'`
         optimizer_params : dict
             Default `(('learning_rate', 0.01),)`. The default value is not a dictionary,
             just to avoid pylint warning of dangerous default values.
         force_init : bool
             Default `False`, indicating whether we should force re-initializing the
             optimizer in the case an optimizer is already installed.
         """
         pass


 class PythonLossModule(PythonModule):
     """A convenient module class that implements many of the module APIs as
     empty functions.

     Parameters
     ----------
     name : str
         Names of the module. The outputs will be named `[name + '_output']`.
     data_names : list of str
         Defaults to ``['data']``. Names of the data expected by this module.
         Should be a list of only one name.
     label_names : list of str
         Default ``['softmax_label']``. Names of the labels expected by the module.
         Should be a list of only one name.
     grad_func : function
         Optional. If not ``None``, should be a function that takes `scores`
         and `labels`, both of type `NDArray`, and return the gradients with
         respect to the scores according to this loss function. The return
         value could be a numpy array or an `NDArray`.
     """
     def __init__(self, name='pyloss', data_names=('data',), label_names=('softmax_label',),
                  logger=logging, grad_func=None):
         super(PythonLossModule, self).__init__(data_names, label_names,
                                                [name + '_output'], logger=logger)
         self._name = name
         assert len(data_names) == 1
         assert len(label_names) == 1

         self._scores = None
         self._labels = None
         self._scores_grad = None

         if grad_func is not None:
             assert callable(grad_func)
         self._grad_func = grad_func

     def _compute_output_shapes(self):
         """Compute the shapes of outputs. As a loss module with outputs, we simply
         output whatever we receive as inputs (i.e. the scores).
         """
         return [(self._name + '_output', self._data_shapes[0][1])]

     def forward(self, data_batch, is_train=None):
         """Forward computation. Here we do nothing but to keep a reference to
         the scores and the labels so that we can do backward computation.

         Parameters
         ----------
         data_batch : DataBatch
             Could be anything with similar API implemented.
         is_train : bool
             Default is ``None``, which means `is_train` takes the value of ``self.for_training``.
         """
         self._scores = data_batch.data[0]

         if is_train is None:
             is_train = self.for_training

         if is_train:
             self._labels = data_batch.label[0]

     def get_outputs(self, merge_multi_context=True):
         """Get outputs of the previous forward computation. As a output loss module,
         we treat the inputs to this module as scores, and simply return them.

         Parameters
         ----------
         merge_multi_context : bool
             Should always be ``True``, because we do not use multiple contexts for computing.
         """
         assert merge_multi_context is True
         return [self._scores]

     def backward(self, out_grads=None):
         """Backward computation.

         Parameters
         ----------
         out_grads : NDArray or list of NDArray, optional
             Gradient on the outputs to be propagated back.
             This parameter is only needed when bind is called
             on outputs that are not a loss function.
         """
         assert out_grads is None, 'For a loss module, out_grads should be None'
         assert self.for_training

         self._backward_impl()

     def _backward_impl(self):
         """Actual implementation of the backward computation. The computation
         should take ``self._scores`` and ``self._labels`` and then compute the
         gradients with respect to the scores, store it as an `NDArray` in
         ``self._scores_grad``.

         Instead of defining a subclass and overriding this function,
         a more convenient way is to pass in a `grad_func` when constructing
         the module object. Then it will be called to compute the gradients.
         """
         if self._grad_func is not None:
             grad = self._grad_func(self._scores, self._labels)
             if not isinstance(grad, nd.NDArray):
                 grad = nd.array(grad)
             self._scores_grad = grad
         else:
             raise NotImplementedError()

     def get_input_grads(self, merge_multi_context=True):
         """Get the gradients to the inputs, computed in the previous backward computation.

         Parameters
         ----------
         merge_multi_context : bool
             Should always be ``True`` because we do not use multiple context for computation.
         """
         assert merge_multi_context is True
         return [self._scores_grad]

     def install_monitor(self, mon):
         """Install monitor on all executors."""
         raise NotImplementedError()
	# pylint: disable=too-many-instance-attributes, too-many-arguments
	"""Provide some handy classes for user to implement a simple computation module
	in Python easily.
	"""
	import logging

	from .base_module import BaseModule
	from ..initializer import Uniform
	from .. import ndarray as nd

	class PythonModule(BaseModule):
	"""A convenient module class that implements many of the module APIs as
	empty functions.

	Parameters
	----------
	data_names : list of str
	Names of the data expected by the module.
	label_names : list of str
	Names of the labels expected by the module. Could be ``None`` if the
	module does not need labels.
	output_names : list of str
	Names of the outputs.
	"""
	def __init__(self, data_names, label_names, output_names, logger=logging):
	super(PythonModule, self).__init__(logger=logger)

	if isinstance(data_names, tuple):
	data_names = list(data_names)
	if isinstance(label_names, tuple):
	label_names = list(label_names)

	self._data_names = data_names
	self._label_names = label_names
	self._output_names = output_names

	self._data_shapes = None
	self._label_shapes = None
	self._output_shapes = None

	################################################################################
	# Symbol information
	################################################################################
	@property
	def data_names(self):
	"""A list of names for data required by this module."""
	return self._data_names

	@property
	def output_names(self):
	"""A list of names for the outputs of this module."""
	return self._output_names

	################################################################################
	# Input/Output information
	################################################################################
	@property
	def data_shapes(self):
	"""A list of (name, shape) pairs specifying the data inputs to this module."""
	return self._data_shapes

	@property
	def label_shapes(self):
	"""A list of (name, shape) pairs specifying the label inputs to this module.
	If this module does not accept labels -- either it is a module without loss
	function, or it is not bound for training, then this should return an empty
	list ``[]```.
	"""
	return self._label_shapes

	@property
	def output_shapes(self):
	"""A list of (name, shape) pairs specifying the outputs of this module."""
	return self._output_shapes

	################################################################################
	# Parameters of a module
	################################################################################
	def get_params(self):
	"""Get parameters, those are potentially copies of the the actual parameters used
	to do computation on the device.

	Returns
	-------
	``({}, {})``, a pair of empty dict. Subclass should override this method if
	contains parameters.
	"""
	return (dict(), dict())

	def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None,
	allow_missing=False, force_init=False):
	"""Initialize the parameters and auxiliary states. By default this function
	does nothing. Subclass should override this method if contains parameters.

	Parameters
	----------
	initializer : Initializer
	Called to initialize parameters if needed.
	arg_params : dict
	If not ``None``, should be a dictionary of existing `arg_params`. Initialization
	will be copied from that.
	aux_params : dict
	If not ``None``, should be a dictionary of existing `aux_params`. Initialization
	will be copied from that.
	allow_missing : bool
	If ``True``, params could contain missing values, and the initializer will be
	called to fill those missing params.
	force_init : bool
	If ``True``, will force re-initialize even if already initialized.
	"""
	pass

	def update(self):
	"""Update parameters according to the installed optimizer and the gradients computed
	in the previous forward-backward batch. Currently we do nothing here. Subclass should
	override this method if contains parameters.
	"""
	pass

	def update_metric(self, eval_metric, labels):
	"""Evaluate and accumulate evaluation metric on outputs of the last forward computation.
	ubclass should override this method if needed.

	Parameters
	----------
	eval_metric : EvalMetric
	labels : list of NDArray
	Typically ``data_batch.label``.
	"""
	if self._label_shapes is None:
	# since we do not need labels, we are probably not a module with a loss
	# function or predictions, so just ignore this call
	return

	# by default we expect our outputs are some scores that could be evaluated
	eval_metric.update(labels, self.get_outputs())

	################################################################################
	# module setup
	################################################################################
	def bind(self, data_shapes, label_shapes=None, for_training=True,
	inputs_need_grad=False, force_rebind=False, shared_module=None,
	grad_req='write'):
	"""Bind the symbols to construct executors. This is necessary before one
	can perform computation with the module.

	Parameters
	----------
	data_shapes : list of (str, tuple)
	Typically is ``data_iter.provide_data``.
	label_shapes : list of (str, tuple)
	Typically is ``data_iter.provide_label``.
	for_training : bool
	Default is ``True``. Whether the executors should be bind for training.
	inputs_need_grad : bool
	Default is ``False``. Whether the gradients to the input data need to be computed.
	Typically this is not needed. But this might be needed when implementing composition
	of modules.
	force_rebind : bool
	Default is ``False``. This function does nothing if the executors are already
	bound. But with this ``True``, the executors will be forced to rebind.
	shared_module : Module
	Default is ``None``. This is used in bucketing. When not ``None``, the shared module
	essentially corresponds to a different bucket -- a module with different symbol
	but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
	grad_req : str, list of str, dict of str to str
	Requirement for gradient accumulation. Can be 'write', 'add', or 'null'
	(default to 'write').
	Can be specified globally (str) or for each argument (list, dict).
	"""
	if self.binded and not force_rebind:
	self.logger.warning('Already bound, ignoring bind()')
	return

	assert grad_req == 'write', "Python module only support write gradient"
	self.for_training = for_training
	self.inputs_need_grad = inputs_need_grad

	assert len(data_shapes) == len(self._data_names)
	assert [x[0] for x in data_shapes] == self._data_names
	self._data_shapes = data_shapes

	self._label_shapes = label_shapes
	if label_shapes is not None:
	assert self._label_names is not None
	assert len(self._label_names) == len(label_shapes)
	assert [x[0] for x in label_shapes] == self._label_names

	self._output_shapes = self._compute_output_shapes()

	def _compute_output_shapes(self):
	"""The subclass should implement this method to compute the shape of
	outputs. This method can assume that the ``data_shapes`` and ``label_shapes``
	are already initialized.
	"""
	raise NotImplementedError()

	def init_optimizer(self, kvstore='local', optimizer='sgd',
	optimizer_params=(('learning_rate', 0.01),), force_init=False):
	"""Install and initialize optimizers. By default we do nothing. Subclass
	should

	Parameters
	----------
	kvstore : str or KVStore
	Default `'local'`.
	optimizer : str or Optimizer
	Default `'sgd'`
	optimizer_params : dict
	Default `(('learning_rate', 0.01),)`. The default value is not a dictionary,
	just to avoid pylint warning of dangerous default values.
	force_init : bool
	Default `False`, indicating whether we should force re-initializing the
	optimizer in the case an optimizer is already installed.
	"""
	pass


	class PythonLossModule(PythonModule):
	"""A convenient module class that implements many of the module APIs as
	empty functions.

	Parameters
	----------
	name : str
	Names of the module. The outputs will be named `[name + '_output']`.
	data_names : list of str
	Defaults to ``['data']``. Names of the data expected by this module.
	Should be a list of only one name.
	label_names : list of str
	Default ``['softmax_label']``. Names of the labels expected by the module.
	Should be a list of only one name.
	grad_func : function
	Optional. If not ``None``, should be a function that takes `scores`
	and `labels`, both of type `NDArray`, and return the gradients with
	respect to the scores according to this loss function. The return
	value could be a numpy array or an `NDArray`.
	"""
	def __init__(self, name='pyloss', data_names=('data',), label_names=('softmax_label',),
	logger=logging, grad_func=None):
	super(PythonLossModule, self).__init__(data_names, label_names,
	[name + '_output'], logger=logger)
	self._name = name
	assert len(data_names) == 1
	assert len(label_names) == 1

	self._scores = None
	self._labels = None
	self._scores_grad = None

	if grad_func is not None:
	assert callable(grad_func)
	self._grad_func = grad_func

	def _compute_output_shapes(self):
	"""Compute the shapes of outputs. As a loss module with outputs, we simply
	output whatever we receive as inputs (i.e. the scores).
	"""
	return [(self._name + '_output', self._data_shapes[0][1])]

	def forward(self, data_batch, is_train=None):
	"""Forward computation. Here we do nothing but to keep a reference to
	the scores and the labels so that we can do backward computation.

	Parameters
	----------
	data_batch : DataBatch
	Could be anything with similar API implemented.
	is_train : bool
	Default is ``None``, which means `is_train` takes the value of ``self.for_training``.
	"""
	self._scores = data_batch.data[0]

	if is_train is None:
	is_train = self.for_training

	if is_train:
	self._labels = data_batch.label[0]

	def get_outputs(self, merge_multi_context=True):
	"""Get outputs of the previous forward computation. As a output loss module,
	we treat the inputs to this module as scores, and simply return them.

	Parameters
	----------
	merge_multi_context : bool
	Should always be ``True``, because we do not use multiple contexts for computing.
	"""
	assert merge_multi_context is True
	return [self._scores]

	def backward(self, out_grads=None):
	"""Backward computation.

	Parameters
	----------
	out_grads : NDArray or list of NDArray, optional
	Gradient on the outputs to be propagated back.
	This parameter is only needed when bind is called
	on outputs that are not a loss function.
	"""
	assert out_grads is None, 'For a loss module, out_grads should be None'
	assert self.for_training

	self._backward_impl()

	def _backward_impl(self):
	"""Actual implementation of the backward computation. The computation
	should take ``self._scores`` and ``self._labels`` and then compute the
	gradients with respect to the scores, store it as an `NDArray` in
	``self._scores_grad``.

	Instead of defining a subclass and overriding this function,
	a more convenient way is to pass in a `grad_func` when constructing
	the module object. Then it will be called to compute the gradients.
	"""
	if self._grad_func is not None:
	grad = self._grad_func(self._scores, self._labels)
	if not isinstance(grad, nd.NDArray):
	grad = nd.array(grad)
	self._scores_grad = grad
	else:
	raise NotImplementedError()

	def get_input_grads(self, merge_multi_context=True):
	"""Get the gradients to the inputs, computed in the previous backward computation.

	Parameters
	----------
	merge_multi_context : bool
	Should always be ``True`` because we do not use multiple context for computation.
	"""
	assert merge_multi_context is True
	return [self._scores_grad]

	def install_monitor(self, mon):
	"""Install monitor on all executors."""
	raise NotImplementedError()