blob: d9b103c40bb30eca34b0d235f0396ad21e9ce816 [file] [log] [blame]
# pylint: disable=too-many-instance-attributes, too-many-arguments
"""Provide some handy classes for user to implement a simple computation module
in Python easily.
"""
import logging
from .base_module import BaseModule
from ..initializer import Uniform
from .. import ndarray as nd
class PythonModule(BaseModule):
"""A convenient module class that implements many of the module APIs as
empty functions.
Parameters
----------
data_names : list of str
Names of the data expected by the module.
label_names : list of str
Names of the labels expected by the module. Could be ``None`` if the
module does not need labels.
output_names : list of str
Names of the outputs.
"""
def __init__(self, data_names, label_names, output_names, logger=logging):
super(PythonModule, self).__init__(logger=logger)
if isinstance(data_names, tuple):
data_names = list(data_names)
if isinstance(label_names, tuple):
label_names = list(label_names)
self._data_names = data_names
self._label_names = label_names
self._output_names = output_names
self._data_shapes = None
self._label_shapes = None
self._output_shapes = None
################################################################################
# Symbol information
################################################################################
@property
def data_names(self):
"""A list of names for data required by this module."""
return self._data_names
@property
def output_names(self):
"""A list of names for the outputs of this module."""
return self._output_names
################################################################################
# Input/Output information
################################################################################
@property
def data_shapes(self):
"""A list of (name, shape) pairs specifying the data inputs to this module."""
return self._data_shapes
@property
def label_shapes(self):
"""A list of (name, shape) pairs specifying the label inputs to this module.
If this module does not accept labels -- either it is a module without loss
function, or it is not bound for training, then this should return an empty
list ``[]```.
"""
return self._label_shapes
@property
def output_shapes(self):
"""A list of (name, shape) pairs specifying the outputs of this module."""
return self._output_shapes
################################################################################
# Parameters of a module
################################################################################
def get_params(self):
"""Get parameters, those are potentially copies of the the actual parameters used
to do computation on the device.
Returns
-------
``({}, {})``, a pair of empty dict. Subclass should override this method if
contains parameters.
"""
return (dict(), dict())
def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None,
allow_missing=False, force_init=False):
"""Initialize the parameters and auxiliary states. By default this function
does nothing. Subclass should override this method if contains parameters.
Parameters
----------
initializer : Initializer
Called to initialize parameters if needed.
arg_params : dict
If not ``None``, should be a dictionary of existing `arg_params`. Initialization
will be copied from that.
aux_params : dict
If not ``None``, should be a dictionary of existing `aux_params`. Initialization
will be copied from that.
allow_missing : bool
If ``True``, params could contain missing values, and the initializer will be
called to fill those missing params.
force_init : bool
If ``True``, will force re-initialize even if already initialized.
"""
pass
def update(self):
"""Update parameters according to the installed optimizer and the gradients computed
in the previous forward-backward batch. Currently we do nothing here. Subclass should
override this method if contains parameters.
"""
pass
def update_metric(self, eval_metric, labels):
"""Evaluate and accumulate evaluation metric on outputs of the last forward computation.
ubclass should override this method if needed.
Parameters
----------
eval_metric : EvalMetric
labels : list of NDArray
Typically ``data_batch.label``.
"""
if self._label_shapes is None:
# since we do not need labels, we are probably not a module with a loss
# function or predictions, so just ignore this call
return
# by default we expect our outputs are some scores that could be evaluated
eval_metric.update(labels, self.get_outputs())
################################################################################
# module setup
################################################################################
def bind(self, data_shapes, label_shapes=None, for_training=True,
inputs_need_grad=False, force_rebind=False, shared_module=None,
grad_req='write'):
"""Bind the symbols to construct executors. This is necessary before one
can perform computation with the module.
Parameters
----------
data_shapes : list of (str, tuple)
Typically is ``data_iter.provide_data``.
label_shapes : list of (str, tuple)
Typically is ``data_iter.provide_label``.
for_training : bool
Default is ``True``. Whether the executors should be bind for training.
inputs_need_grad : bool
Default is ``False``. Whether the gradients to the input data need to be computed.
Typically this is not needed. But this might be needed when implementing composition
of modules.
force_rebind : bool
Default is ``False``. This function does nothing if the executors are already
bound. But with this ``True``, the executors will be forced to rebind.
shared_module : Module
Default is ``None``. This is used in bucketing. When not ``None``, the shared module
essentially corresponds to a different bucket -- a module with different symbol
but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
grad_req : str, list of str, dict of str to str
Requirement for gradient accumulation. Can be 'write', 'add', or 'null'
(default to 'write').
Can be specified globally (str) or for each argument (list, dict).
"""
if self.binded and not force_rebind:
self.logger.warning('Already bound, ignoring bind()')
return
assert grad_req == 'write', "Python module only support write gradient"
self.for_training = for_training
self.inputs_need_grad = inputs_need_grad
assert len(data_shapes) == len(self._data_names)
assert [x[0] for x in data_shapes] == self._data_names
self._data_shapes = data_shapes
self._label_shapes = label_shapes
if label_shapes is not None:
assert self._label_names is not None
assert len(self._label_names) == len(label_shapes)
assert [x[0] for x in label_shapes] == self._label_names
self._output_shapes = self._compute_output_shapes()
def _compute_output_shapes(self):
"""The subclass should implement this method to compute the shape of
outputs. This method can assume that the ``data_shapes`` and ``label_shapes``
are already initialized.
"""
raise NotImplementedError()
def init_optimizer(self, kvstore='local', optimizer='sgd',
optimizer_params=(('learning_rate', 0.01),), force_init=False):
"""Install and initialize optimizers. By default we do nothing. Subclass
should
Parameters
----------
kvstore : str or KVStore
Default `'local'`.
optimizer : str or Optimizer
Default `'sgd'`
optimizer_params : dict
Default `(('learning_rate', 0.01),)`. The default value is not a dictionary,
just to avoid pylint warning of dangerous default values.
force_init : bool
Default `False`, indicating whether we should force re-initializing the
optimizer in the case an optimizer is already installed.
"""
pass
class PythonLossModule(PythonModule):
"""A convenient module class that implements many of the module APIs as
empty functions.
Parameters
----------
name : str
Names of the module. The outputs will be named `[name + '_output']`.
data_names : list of str
Defaults to ``['data']``. Names of the data expected by this module.
Should be a list of only one name.
label_names : list of str
Default ``['softmax_label']``. Names of the labels expected by the module.
Should be a list of only one name.
grad_func : function
Optional. If not ``None``, should be a function that takes `scores`
and `labels`, both of type `NDArray`, and return the gradients with
respect to the scores according to this loss function. The return
value could be a numpy array or an `NDArray`.
"""
def __init__(self, name='pyloss', data_names=('data',), label_names=('softmax_label',),
logger=logging, grad_func=None):
super(PythonLossModule, self).__init__(data_names, label_names,
[name + '_output'], logger=logger)
self._name = name
assert len(data_names) == 1
assert len(label_names) == 1
self._scores = None
self._labels = None
self._scores_grad = None
if grad_func is not None:
assert callable(grad_func)
self._grad_func = grad_func
def _compute_output_shapes(self):
"""Compute the shapes of outputs. As a loss module with outputs, we simply
output whatever we receive as inputs (i.e. the scores).
"""
return [(self._name + '_output', self._data_shapes[0][1])]
def forward(self, data_batch, is_train=None):
"""Forward computation. Here we do nothing but to keep a reference to
the scores and the labels so that we can do backward computation.
Parameters
----------
data_batch : DataBatch
Could be anything with similar API implemented.
is_train : bool
Default is ``None``, which means `is_train` takes the value of ``self.for_training``.
"""
self._scores = data_batch.data[0]
if is_train is None:
is_train = self.for_training
if is_train:
self._labels = data_batch.label[0]
def get_outputs(self, merge_multi_context=True):
"""Get outputs of the previous forward computation. As a output loss module,
we treat the inputs to this module as scores, and simply return them.
Parameters
----------
merge_multi_context : bool
Should always be ``True``, because we do not use multiple contexts for computing.
"""
assert merge_multi_context is True
return [self._scores]
def backward(self, out_grads=None):
"""Backward computation.
Parameters
----------
out_grads : NDArray or list of NDArray, optional
Gradient on the outputs to be propagated back.
This parameter is only needed when bind is called
on outputs that are not a loss function.
"""
assert out_grads is None, 'For a loss module, out_grads should be None'
assert self.for_training
self._backward_impl()
def _backward_impl(self):
"""Actual implementation of the backward computation. The computation
should take ``self._scores`` and ``self._labels`` and then compute the
gradients with respect to the scores, store it as an `NDArray` in
``self._scores_grad``.
Instead of defining a subclass and overriding this function,
a more convenient way is to pass in a `grad_func` when constructing
the module object. Then it will be called to compute the gradients.
"""
if self._grad_func is not None:
grad = self._grad_func(self._scores, self._labels)
if not isinstance(grad, nd.NDArray):
grad = nd.array(grad)
self._scores_grad = grad
else:
raise NotImplementedError()
def get_input_grads(self, merge_multi_context=True):
"""Get the gradients to the inputs, computed in the previous backward computation.
Parameters
----------
merge_multi_context : bool
Should always be ``True`` because we do not use multiple context for computation.
"""
assert merge_multi_context is True
return [self._scores_grad]
def install_monitor(self, mon):
"""Install monitor on all executors."""
raise NotImplementedError()