python/mxnet/initializer.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 """Weight initializer."""

 import re
 import logging
 import warnings
 import json
 from math import sqrt
 import numpy as np
 from .base import string_types
 from .ndarray import NDArray, load
 from . import random
 from . import registry
 from . import ndarray
 from . util import is_np_array
 from . import numpy as _mx_np  # pylint: disable=reimported


 # inherit str for backward compatibility
 class InitDesc(str):
     """
     Descriptor for the initialization pattern.

     Parameters
     ----------
     name : str
         Name of variable.
     attrs : dict of str to str
         Attributes of this variable taken from ``Symbol.attr_dict``.
     global_init : Initializer
         Global initializer to fallback to.
     """
     def __new__(cls, name, attrs=None, global_init=None):
         ret = super(InitDesc, cls).__new__(cls, name)
         ret.attrs = attrs or {}
         ret.global_init = global_init
         return ret


 class Initializer(object):
     """The base class of an initializer."""
     def __init__(self, **kwargs):
         self._kwargs = kwargs
         self._verbose = False
         self._print_func = None

     def set_verbosity(self, verbose=False, print_func=None):
         """Switch on/off verbose mode

         Parameters
         ----------
         verbose : bool
             switch on/off verbose mode
         print_func : function
             A function that computes statistics of initialized arrays.
             Takes an `NDArray` and returns an `str`. Defaults to mean
             absolute value str((abs(x)/size(x)).asscalar()).
         """
         self._verbose = verbose
         if print_func is None:
             def asum_stat(x):
                 """returns |x|/size(x), async execution."""
                 return str((ndarray.norm(x)/sqrt(x.size)).asscalar())
             print_func = asum_stat
         self._print_func = print_func
         return self

     def _verbose_print(self, desc, init, arr):
         """Internal verbose print function

         Parameters
         ----------
         desc : InitDesc or str
             name of the array
         init : str
             initializer pattern
         arr : NDArray
             initialized array
         """
         if self._verbose and self._print_func:
             logging.info('Initialized %s as %s: %s', desc, init, self._print_func(arr))

     def dumps(self):
         """Saves the initializer to string

         Returns
         -------
         str
             JSON formatted string that describes the initializer.

         Examples
         --------
         >>> # Create initializer and retrieve its parameters
         ...
         >>> init = mx.init.Normal(0.5)
         >>> init.dumps()
         '["normal", {"sigma": 0.5}]'
         >>> init = mx.init.Xavier(factor_type="in", magnitude=2.34)
         >>> init.dumps()
         '["xavier", {"rnd_type": "uniform", "magnitude": 2.34, "factor_type": "in"}]'
         """
         return json.dumps([self.__class__.__name__.lower(), self._kwargs])

     def __call__(self, desc, arr):
         """Initialize an array

         Parameters
         ----------
         desc : InitDesc
             Initialization pattern descriptor.

         arr : NDArray
             The array to be initialized.
         """
         if not isinstance(desc, InitDesc):
             self._legacy_init(desc, arr)
             return

         if desc.global_init is None:
             desc.global_init = self
         init = desc.attrs.get('__init__', "")

         if init:
             # when calling Variable initializer
             create(init)._init_weight(desc, arr)
             self._verbose_print(desc, init, arr)
         else:
             # register nnvm::FSetInputVariableAttrs in the backend for new patterns
             # don't add new cases here.
             if desc.endswith('weight'):
                 self._init_weight(desc, arr)
                 self._verbose_print(desc, 'weight', arr)
             elif desc.endswith('bias'):
                 self._init_bias(desc, arr)
                 self._verbose_print(desc, 'bias', arr)
             elif desc.endswith('gamma'):
                 self._init_gamma(desc, arr)
                 self._verbose_print(desc, 'gamma', arr)
             elif desc.endswith('beta'):
                 self._init_beta(desc, arr)
                 self._verbose_print(desc, 'beta', arr)
             elif desc.endswith('min'):
                 self._init_zero(desc, arr)
                 self._verbose_print(desc, 'min', arr)
             elif desc.endswith('max'):
                 self._init_one(desc, arr)
                 self._verbose_print(desc, 'max', arr)
             elif desc.endswith('weight_quantize'):
                 self._init_quantized_weight(desc, arr)
                 self._verbose_print(desc, 'weight_quantize', arr)
             elif desc.endswith('bias_quantize'):
                 self._init_quantized_bias(desc, arr)
                 self._verbose_print(desc, 'bias_quantize', arr)
             else:
                 self._init_default(desc, arr)

     def _legacy_init(self, name, arr):
         """Legacy initialization method.

         Parameters
         ----------
         name : str
             Name of corresponding NDArray.

         arr : NDArray
             NDArray to be initialized.
         """
         warnings.warn(
             "\033[91mCalling initializer with init(str, NDArray) has been deprecated." \
             "please use init(mx.init.InitDesc(...), NDArray) instead.\033[0m",
             DeprecationWarning, stacklevel=3)
         if not isinstance(name, string_types):
             raise TypeError('name must be string')
         if not isinstance(arr, NDArray):
             raise TypeError('arr must be NDArray')
         if name.startswith('upsampling'):
             self._init_bilinear(name, arr)
         elif name.startswith('stn_loc') and name.endswith('weight'):
             self._init_zero(name, arr)
         elif name.startswith('stn_loc') and name.endswith('bias'):
             self._init_loc_bias(name, arr)
         elif name.endswith('bias'):
             self._init_bias(name, arr)
         elif name.endswith('gamma'):
             self._init_gamma(name, arr)
         elif name.endswith('beta'):
             self._init_beta(name, arr)
         elif name.endswith('weight'):
             self._init_weight(name, arr)
         elif name.endswith("moving_mean"):
             self._init_zero(name, arr)
         elif name.endswith("moving_var"):
             self._init_one(name, arr)
         elif name.endswith("moving_inv_var"):
             self._init_zero(name, arr)
         elif name.endswith("moving_avg"):
             self._init_zero(name, arr)
         elif name.endswith('min'):
             self._init_zero(name, arr)
         elif name.endswith('max'):
             self._init_one(name, arr)
         else:
             self._init_default(name, arr)

     def _init_bilinear(self, _, arr):
         weight = np.zeros(np.prod(arr.shape), dtype='float32')
         shape = arr.shape
         f = np.ceil(shape[3] / 2.)
         c = (2 * f - 1 - f % 2) / (2. * f)
         for i in range(np.prod(shape)):
             x = i % shape[3]
             y = (i // shape[3]) % shape[2]
             weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
         arr[:] = weight.reshape(shape)

     def _init_loc_bias(self, _, arr):
         shape = arr.shape
         assert(shape[0] == 6)
         arr[:] = np.array([1.0, 0, 0, 0, 1.0, 0])

     def _init_zero(self, _, arr):
         arr[:] = 0.0

     def _init_one(self, _, arr):
         arr[:] = 1.0

     def _init_bias(self, _, arr):
         arr[:] = 0.0

     def _init_quantized_bias(self, _, arr):
         arr[:] = 0

     def _init_gamma(self, _, arr):
         arr[:] = 1.0

     def _init_beta(self, _, arr):
         arr[:] = 0.0

     def _init_weight(self, name, arr):
         """Abstract method to Initialize weight."""
         raise NotImplementedError("Must override it")

     def _init_quantized_weight(self, _, arr):
         _arr = random.randint(-127, 127, dtype='int32').asnumpy()
         arr[:] = np.int8(_arr)

     def _init_default(self, name, _):
         raise ValueError(
             f'Unknown initialization pattern for {name}. ' \
             'Default initialization is now limited to '\
             '"weight", "bias", "gamma" (1.0), and "beta" (0.0).' \
             'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern')

     def __eq__(self, other):
         if not isinstance(other, Initializer):
             return NotImplemented
         # pylint: disable=unidiomatic-typecheck
         return type(self) is type(other) and self._kwargs == other._kwargs

 # pylint: disable=invalid-name
 _register = registry.get_register_func(Initializer, 'initializer')
 alias = registry.get_alias_func(Initializer, 'initializer')
 create = registry.get_create_func(Initializer, 'initializer')
 # pylint: enable=invalid-name

 def register(klass):
     """Registers a custom initializer.

     Custom initializers can be created by extending `mx.init.Initializer` and implementing the
     required functions like `_init_weight` and `_init_bias`. The created initializer must be
     registered using `mx.init.register` before it can be called by name.

     Parameters
     ----------
     klass : class
         A subclass of `mx.init.Initializer` that needs to be registered as a custom initializer.

     Example
     -------
     >>> # Create and register a custom initializer that
     ... # initializes weights to 0.1 and biases to 1.
     ...
     >>> @mx.init.register
     ... @alias('myinit')
     ... class CustomInit(mx.init.Initializer):
     ...   def __init__(self):
     ...     super(CustomInit, self).__init__()
     ...   def _init_weight(self, _, arr):
     ...     arr[:] = 0.1
     ...   def _init_bias(self, _, arr):
     ...     arr[:] = 1
     ...
     >>> # block is an instance of 'mxnet.gluon.Block'
     ...
     >>> block.initialize(CustomInit())
     """
     return _register(klass)


 class Load(object):
     """Initializes variables by loading data from file or dict.

     **Note** Load will drop ``arg:`` or ``aux:`` from name and
     initialize the variables that match with the prefix dropped.

     Parameters
     ----------
     param: str or dict of str->`NDArray`
         Parameter file or dict mapping name to NDArray.
     default_init: Initializer
         Default initializer when name is not found in `param`.
     verbose: bool
         Flag for enabling logging of source when initializing.

     """
     def __init__(self, param, default_init=None, verbose=False):
         if isinstance(param, str):
             param = load(param)
         assert isinstance(param, dict)
         self.param = {}
         for name, arr in param.items():
             if name.startswith('arg:') or name.startswith('aux:'):
                 self.param[name[4:]] = arr
             else:
                 self.param[name] = arr
         self.default_init = default_init
         self.verbose = verbose

     def __call__(self, name, arr):
         if name in self.param:
             assert arr.shape == self.param[name].shape, \
                 f'Parameter {name} cannot be initialized from loading. ' + \
                 f'Shape mismatch, target {str(arr.shape)} vs loaded {self.param[name].shape}'
             arr[:] = self.param[name]
             if self.verbose:
                 logging.info('Initialized %s by loading', name)
         else:
             assert self.default_init is not None, \
                 f"Cannot Initialize {name}. Not found in loaded param " + \
                 "and no default Initializer is provided."
             self.default_init(name, arr)
             if self.verbose:
                 logging.info('Initialized %s by default', name)


 class Mixed(object):
     """Initialize parameters using multiple initializers.

     Parameters
     ----------
     patterns: list of str
         List of regular expressions matching parameter names.
     initializers: list of Initializer
         List of initializers corresponding to `patterns`.

     Example
     -------
     >>> # Given 'block', an instance of 'mxnet.gluon.Block', initialize biases to zero
     ... # and every other parameter to random values with uniform distribution.
     ...
     >>> init = mx.initializer.Mixed(['bias', '.*'], [mx.init.Zero(), mx.init.Uniform(0.1)])
     >>> block.initialize(init)
     >>>
     >>> for dictionary in module.get_params():
     ...     for key in dictionary:
     ...         print(key)
     ...         print(dictionary[key].asnumpy())
     ...
     fullyconnected1_weight
     [[ 0.0097627   0.01856892  0.04303787]]
     fullyconnected1_bias
     [ 0.]

     """
     def __init__(self, patterns, initializers):
         assert len(patterns) == len(initializers)
         self.map = list(zip([re.compile(p) for p in patterns], initializers))

     def __call__(self, name, arr):
         for prog, init in self.map:
             if prog.match(name):
                 init(name, arr)
                 return
         raise ValueError('Parameter name %s did not match any pattern. Consider' +
                          'add a ".*" pattern at the and with default Initializer.')

 @register
 @alias("zeros")
 class Zero(Initializer):
     """Initializes weights to zero.

     Example
     -------
     >>> # Given 'block', an instance of 'mxnet.gluon.Block', initialize weights to zero.
     ...
     >>> init = mx.initializer.Zero()
     >>> module.initialize(init)
     >>> for dictionary in module.get_params():
     ...     for key in dictionary:
     ...         print(key)
     ...         print(dictionary[key].asnumpy())
     ...
     fullyconnected0_weight
     [[ 0.  0.  0.]]
     """
     def __init__(self):
         super(Zero, self).__init__()

     def _init_weight(self, _, arr):
         arr[:] = 0

 @register
 @alias("ones")
 class One(Initializer):
     """Initializes weights to one.

     Example
     -------
     >>> # Given 'block', an instance of 'mxnet.gluon.Block', initialize weights to one.
     ...
     >>> init = mx.initializer.One()
     >>> module.initialize(init)
     >>> for dictionary in module.get_params():
     ...     for key in dictionary:
     ...         print(key)
     ...         print(dictionary[key].asnumpy())
     ...
     fullyconnected0_weight
     [[ 1.  1.  1.]]
     """
     def __init__(self):
         super(One, self).__init__()

     def _init_weight(self, _, arr):
         arr[:] = 1

 @register
 class Constant(Initializer):
     """Initializes the weights to a given value.
     The value passed in can be a scalar or a NDarray that matches the shape
     of the parameter to be set.

     Parameters
     ----------
     value : float, NDArray
         Value to set.
     """
     def __init__(self, value):
         super(Constant, self).__init__(value=value)
         self.value = value

     def _init_weight(self, _, arr):
         arr[:] = self.value

     def dumps(self):
         val = self._kwargs['value']
         if not np.isscalar(val):
             self._kwargs['value'] = val.tolist() if isinstance(val, np.ndarray) else val.asnumpy().tolist()
         return json.dumps([self.__class__.__name__.lower(), self._kwargs])

 @register
 class Uniform(Initializer):
     """Initializes weights with random values uniformly sampled from a given range.

     Parameters
     ----------
     scale : float, optional
         The bound on the range of the generated random values.
         Values are generated from the range [-`scale`, `scale`].
         Default scale is 0.07.

     Example
     -------
     >>> # Given 'block', an instance of 'mxnet.gluon.Block', initialize weights
     >>> # to random values uniformly sampled between -0.1 and 0.1.
     ...
     >>> init = mx.init.Uniform(0.1)
     >>> module.initialize(init)
     >>> for dictionary in module.get_params():
     ...     for key in dictionary:
     ...         print(key)
     ...         print(dictionary[key].asnumpy())
     ...
     fullyconnected0_weight
     [[ 0.01360891 -0.02144304  0.08511933]]
     """
     def __init__(self, scale=0.07):
         super(Uniform, self).__init__(scale=scale)
         self.scale = scale

     def _init_weight(self, _, arr):
         uniform_fn = _mx_np.random.uniform if is_np_array() else random.uniform
         uniform_fn(-self.scale, self.scale, arr.shape, dtype=arr.dtype, out=arr)

 @register
 class Normal(Initializer):
     """Initializes weights with random values sampled from a normal distribution
     with a mean of zero and standard deviation of `sigma`.

     Parameters
     ----------
     sigma : float, optional
         Standard deviation of the normal distribution.
         Default standard deviation is 0.01.

     Example
     -------
     >>> # Given 'block', an instance of 'mxnet.gluon.Block', initialize weights
     >>> # to random values sampled from a normal distribution.
     ...
     >>> init = mx.init.Normal(0.5)
     >>> module.initialize(init)
     >>> for dictionary in module.get_params():
     ...     for key in dictionary:
     ...         print(key)
     ...         print(dictionary[key].asnumpy())
     ...
     fullyconnected0_weight
     [[-0.3214761  -0.12660924  0.53789419]]
     """
     def __init__(self, sigma=0.01):
         super(Normal, self).__init__(sigma=sigma)
         self.sigma = sigma

     def _init_weight(self, _, arr):
         normal_fn = _mx_np.random.normal if is_np_array() else random.normal
         normal_fn(0, self.sigma, arr.shape, dtype=arr.dtype, out=arr)

 @register
 class Orthogonal(Initializer):
     """Initialize weight as orthogonal matrix.

     This initializer implements *Exact solutions to the nonlinear dynamics of
     learning in deep linear neural networks*, available at
     https://arxiv.org/abs/1312.6120.

     Parameters
     ----------
     scale : float optional
         Scaling factor of weight.

     rand_type: string optional
         Use "uniform" or "normal" random number to initialize weight.

     """
     def __init__(self, scale=1.414, rand_type="uniform"):
         super(Orthogonal, self).__init__(scale=scale, rand_type=rand_type)
         self.scale = scale
         self.rand_type = rand_type

     def _init_weight(self, _, arr):
         nout = arr.shape[0]
         nin = np.prod(arr.shape[1:])
         if self.rand_type == "uniform":
             tmp = random.uniform(-1.0, 1.0, shape=(nout, nin)).asnumpy()
         elif self.rand_type == "normal":
             tmp = random.normal(0.0, 1.0, shape=(nout, nin)).asnumpy()
         u, _, v = np.linalg.svd(tmp, full_matrices=False) # pylint: disable=invalid-name
         if u.shape == tmp.shape:
             res = u
         else:
             res = v
         res = self.scale * res.reshape(arr.shape)
         arr[:] = res

 @register
 class Xavier(Initializer):
     """Returns an initializer performing "Xavier" initialization for weights.

     This initializer is designed to keep the scale of gradients roughly the same
     in all layers.

     By default, `rnd_type` is ``'uniform'`` and `factor_type` is ``'avg'``,
     the initializer fills the weights with random numbers in the range
     of :math:`[-c, c]`, where :math:`c = \\sqrt{\\frac{3.}{0.5 * (n_{in} + n_{out})}}`.
     :math:`n_{in}` is the number of neurons feeding into weights, and :math:`n_{out}` is
     the number of neurons the result is fed to.

     If `rnd_type` is ``'uniform'`` and `factor_type` is ``'in'``,
     the :math:`c = \\sqrt{\\frac{3.}{n_{in}}}`.
     Similarly when `factor_type` is ``'out'``, the :math:`c = \\sqrt{\\frac{3.}{n_{out}}}`.

     If `rnd_type` is ``'gaussian'`` and `factor_type` is ``'avg'``,
     the initializer fills the weights with numbers from normal distribution with
     a standard deviation of :math:`\\sqrt{\\frac{3.}{0.5 * (n_{in} + n_{out})}}`.

     Parameters
     ----------
     rnd_type: str, optional
         Random generator type, can be ``'gaussian'`` or ``'uniform'``.

     factor_type: str, optional
         Can be ``'avg'``, ``'in'``, or ``'out'``.

     magnitude: float, optional
         Scale of random number.
     """
     def __init__(self, rnd_type="uniform", factor_type="avg", magnitude=3):
         super(Xavier, self).__init__(rnd_type=rnd_type, factor_type=factor_type,
                                      magnitude=magnitude)
         self.rnd_type = rnd_type
         self.factor_type = factor_type
         self.magnitude = float(magnitude)


     def _init_weight(self, name, arr):
         shape = arr.shape
         hw_scale = 1.
         if len(shape) < 2:
             raise ValueError('Xavier initializer cannot be applied to vector {0}. It requires at'
                              ' least 2D.'.format(name))
         if len(shape) > 2:
             hw_scale = np.prod(shape[2:])
         fan_in, fan_out = shape[1] * hw_scale, shape[0] * hw_scale
         factor = 1.
         if self.factor_type == "avg":
             factor = (fan_in + fan_out) / 2.0
         elif self.factor_type == "in":
             factor = fan_in
         elif self.factor_type == "out":
             factor = fan_out
         else:
             raise ValueError("Incorrect factor type")
         scale = np.sqrt(self.magnitude / factor)
         if self.rnd_type == "uniform":
             uniform_fn = _mx_np.random.uniform if is_np_array() else random.uniform
             uniform_fn(-scale, scale, arr.shape, dtype=arr.dtype, out=arr)
         elif self.rnd_type == "gaussian":
             normal_fn = _mx_np.random.normal if is_np_array() else random.normal
             normal_fn(0, scale, arr.shape, dtype=arr.dtype, out=arr)
         else:
             raise ValueError("Unknown random type")

 @register
 class MSRAPrelu(Xavier):
     """Initialize the weight according to a MSRA paper.

     This initializer implements *Delving Deep into Rectifiers: Surpassing
     Human-Level Performance on ImageNet Classification*, available at
     https://arxiv.org/abs/1502.01852.

     This initializer is proposed for initialization related to ReLu activation,
     it makes some changes on top of Xavier method.

     Parameters
     ----------
     factor_type: str, optional
         Can be ``'avg'``, ``'in'``, or ``'out'``.

     slope: float, optional
         initial slope of any PReLU (or similar) nonlinearities.
     """
     def __init__(self, factor_type="avg", slope=0.25):
         magnitude = 2. / (1 + slope ** 2)
         super(MSRAPrelu, self).__init__("gaussian", factor_type, magnitude)
         self._kwargs = {'factor_type': factor_type, 'slope': slope}

 @register
 class Bilinear(Initializer):
     """Initialize weight for upsampling layers."""
     def __init__(self):
         super(Bilinear, self).__init__()

     def _init_weight(self, _, arr):
         weight = np.zeros(np.prod(arr.shape), dtype='float32')
         shape = arr.shape
         f = np.ceil(shape[3] / 2.)
         c = (2 * f - 1 - f % 2) / (2. * f)
         for i in range(np.prod(shape)):
             x = i % shape[3]
             y = (i // shape[3]) % shape[2]
             weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
         arr[:] = weight.reshape(shape)


 @register
 class LSTMBias(Initializer):
     """Initialize all biases of an LSTMCell to 0.0 except for
     the forget gate whose bias is set to custom value.

     Parameters
     ----------
     forget_bias: float, default 1.0
         bias for the forget gate. Jozefowicz et al. 2015 recommends
         setting this to 1.0.
     """
     def __init__(self, forget_bias=1.0):
         super(LSTMBias, self).__init__(forget_bias=forget_bias)
         self.forget_bias = forget_bias

     def _init_weight(self, name, arr):
         arr[:] = 0.0
         # in the case of LSTMCell the forget gate is the second
         # gate of the 4 LSTM gates, we modify the according values.
         num_hidden = int(arr.shape[0] / 4)
         arr[num_hidden:2*num_hidden] = self.forget_bias


 @register
 class RNNFused(Initializer):
     """Initialize RNN fused parameter with bias part initialized to 0.0 and
     weight initialized with random values uniformly sampled from a given range.

     Parameters
     ----------
     mode : {'gru', 'lstm', 'rnn_relu', 'rnn_tanh'}, required
         the type of RNN to compute
     num_layers : int (non-negative), required
         number of stacked layers
     state_size : int (non-negative), required
         size of the state for each layer
     bidirectional : boolean, optional, default=0
         whether to use bidirectional recurrent layers
     projection_size : int or None, optional, default='None'
         size of project size
     scale : float, optional
         The bound on the range of the generated random values for weights.
         Values are generated from the range [-`scale`, `scale`].
         Default scale is 0.07.
     """
     def __init__(self, mode, num_layers, state_size, bidirectional=False,
                  projection_size=None, i2h_weight_initializer=None,
                  h2h_weight_initializer=None, i2h_bias_initializer=None,
                  h2h_bias_initializer=None, h2r_weight_initializer=None):
         super(RNNFused, self).__init__(mode=mode, num_layers=num_layers,
                                        state_size=state_size,
                                        bidirectional=bidirectional,
                                        projection_size=projection_size,
                                        i2h_weight_initializer=i2h_weight_initializer,
                                        h2h_weight_initializer=h2h_weight_initializer,
                                        i2h_bias_initializer=i2h_bias_initializer,
                                        h2h_bias_initializer=h2h_bias_initializer,
                                        h2r_weight_initializer=h2r_weight_initializer)
         self.gates = {'rnn_relu': 1, 'rnn_tanh': 1, 'lstm': 4, 'gru': 3}[mode]
         self.num_layers = num_layers
         self.num_hidden = state_size
         self.dir = 2 if bidirectional else 1
         self.projection_size = projection_size
         self._i2h_weight_initializer = i2h_weight_initializer
         self._h2h_weight_initializer = h2h_weight_initializer
         self._i2h_bias_initializer = i2h_bias_initializer
         self._h2h_bias_initializer = h2h_bias_initializer
         self._h2r_weight_initializer = h2r_weight_initializer

     # pylint: disable=too-many-nested-blocks
     def _init_weight(self, name, arr):
         arr_len = arr.shape[0]
         size = self.num_hidden * self.dir * self.gates
         if not self.projection_size:
             # second layer size
             size2 = (self.num_hidden * self.dir + self.num_hidden + 2) * size
             input_size = (arr_len - (self.num_layers - 1) * size2) // \
                 size - 2 - self.num_hidden
         else:
             # second layer size
             size2 = (self.projection_size * self.dir + self.projection_size + 2) * size
             size_projection = self.projection_size * self.num_hidden * self.num_layers * self.dir
             input_size = (arr_len - size_projection - (self.num_layers - 1) * size2) // \
                 size - 2 - self.projection_size
         begin = 0
         if not self.projection_size:
             for param in ['weight', 'bias']:
                 for layer_num in range(self.num_layers):
                     for _ in range(self.dir):
                         for connect in ['i2h', 'h2h']:
                             num_inputs = input_size
                             if layer_num != 0:
                                 num_inputs = self.num_hidden * self.dir
                             if connect == 'h2h':
                                 num_inputs = self.num_hidden
                             shape0 = self.gates * self.num_hidden
                             if param == 'weight':
                                 cur_len = shape0 * num_inputs
                             else:
                                 cur_len = shape0
                             self._init_util(param, connect, arr[begin:begin+cur_len])
                             begin += cur_len
         else:
             for param in ['weight', 'bias']:
                 for layer_num in range(self.num_layers):
                     for _ in range(self.dir):
                         for connect in ['i2h', 'h2h', 'h2r']:
                             if connect != 'h2r' or param != 'bias':
                                 if connect == 'h2r':
                                     cur_len = self.projection_size * self.num_hidden
                                 else:
                                     num_inputs = input_size
                                     if layer_num != 0:
                                         num_inputs = self.projection_size * self.dir
                                     if connect == 'h2h':
                                         num_inputs = self.projection_size
                                     shape0 = self.gates * self.num_hidden
                                     if param == 'weight':
                                         cur_len = shape0 * num_inputs
                                     else:
                                         cur_len = shape0
                                 self._init_util(param, connect, arr[begin:begin+cur_len])
                                 begin += cur_len

     def _init_util(self, param, connect, arr):
         name = "_{}_{}_initializer".format(connect, param)
         init = getattr(self, name)
         create(init)(InitDesc(name, {'__init__': init}), arr)

     def set_initializer(self, init):
         self._i2h_weight_initializer = \
             init if not self._i2h_weight_initializer else 'uniform'
         self._h2h_weight_initializer = \
             init if not self._h2h_weight_initializer else 'uniform'
         self._i2h_bias_initializer = \
             init if not self._i2h_bias_initializer else 'zero'
         self._h2h_bias_initializer = \
             init if not self._i2h_bias_initializer else 'zero'
         self._h2r_weight_initializer = \
             init if not self._h2r_weight_initializer else 'uniform'