| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| """Weight initializer.""" |
| |
| import re |
| import logging |
| import warnings |
| import json |
| from math import sqrt |
| import numpy as np |
| from .base import string_types |
| from .ndarray import NDArray, load |
| from . import random |
| from . import registry |
| from . import ndarray |
| from . util import is_np_array |
| from . import numpy as _mx_np # pylint: disable=reimported |
| |
| |
| # inherit str for backward compatibility |
| class InitDesc(str): |
| """ |
| Descriptor for the initialization pattern. |
| |
| Parameters |
| ---------- |
| name : str |
| Name of variable. |
| attrs : dict of str to str |
| Attributes of this variable taken from ``Symbol.attr_dict``. |
| global_init : Initializer |
| Global initializer to fallback to. |
| """ |
| def __new__(cls, name, attrs=None, global_init=None): |
| ret = super(InitDesc, cls).__new__(cls, name) |
| ret.attrs = attrs or {} |
| ret.global_init = global_init |
| return ret |
| |
| |
| class Initializer(object): |
| """The base class of an initializer.""" |
| def __init__(self, **kwargs): |
| self._kwargs = kwargs |
| self._verbose = False |
| self._print_func = None |
| |
| def set_verbosity(self, verbose=False, print_func=None): |
| """Switch on/off verbose mode |
| |
| Parameters |
| ---------- |
| verbose : bool |
| switch on/off verbose mode |
| print_func : function |
| A function that computes statistics of initialized arrays. |
| Takes an `NDArray` and returns an `str`. Defaults to mean |
| absolute value str((abs(x)/size(x)).asscalar()). |
| """ |
| self._verbose = verbose |
| if print_func is None: |
| def asum_stat(x): |
| """returns |x|/size(x), async execution.""" |
| return str((ndarray.norm(x)/sqrt(x.size)).asscalar()) |
| print_func = asum_stat |
| self._print_func = print_func |
| return self |
| |
| def _verbose_print(self, desc, init, arr): |
| """Internal verbose print function |
| |
| Parameters |
| ---------- |
| desc : InitDesc or str |
| name of the array |
| init : str |
| initializer pattern |
| arr : NDArray |
| initialized array |
| """ |
| if self._verbose and self._print_func: |
| logging.info('Initialized %s as %s: %s', desc, init, self._print_func(arr)) |
| |
| def dumps(self): |
| """Saves the initializer to string |
| |
| Returns |
| ------- |
| str |
| JSON formatted string that describes the initializer. |
| |
| Examples |
| -------- |
| >>> # Create initializer and retrieve its parameters |
| ... |
| >>> init = mx.init.Normal(0.5) |
| >>> init.dumps() |
| '["normal", {"sigma": 0.5}]' |
| >>> init = mx.init.Xavier(factor_type="in", magnitude=2.34) |
| >>> init.dumps() |
| '["xavier", {"rnd_type": "uniform", "magnitude": 2.34, "factor_type": "in"}]' |
| """ |
| return json.dumps([self.__class__.__name__.lower(), self._kwargs]) |
| |
| def __call__(self, desc, arr): |
| """Initialize an array |
| |
| Parameters |
| ---------- |
| desc : InitDesc |
| Initialization pattern descriptor. |
| |
| arr : NDArray |
| The array to be initialized. |
| """ |
| if not isinstance(desc, InitDesc): |
| self._legacy_init(desc, arr) |
| return |
| |
| if desc.global_init is None: |
| desc.global_init = self |
| init = desc.attrs.get('__init__', "") |
| |
| if init: |
| # when calling Variable initializer |
| create(init)._init_weight(desc, arr) |
| self._verbose_print(desc, init, arr) |
| else: |
| # register nnvm::FSetInputVariableAttrs in the backend for new patterns |
| # don't add new cases here. |
| if desc.endswith('weight'): |
| self._init_weight(desc, arr) |
| self._verbose_print(desc, 'weight', arr) |
| elif desc.endswith('bias'): |
| self._init_bias(desc, arr) |
| self._verbose_print(desc, 'bias', arr) |
| elif desc.endswith('gamma'): |
| self._init_gamma(desc, arr) |
| self._verbose_print(desc, 'gamma', arr) |
| elif desc.endswith('beta'): |
| self._init_beta(desc, arr) |
| self._verbose_print(desc, 'beta', arr) |
| elif desc.endswith('min'): |
| self._init_zero(desc, arr) |
| self._verbose_print(desc, 'min', arr) |
| elif desc.endswith('max'): |
| self._init_one(desc, arr) |
| self._verbose_print(desc, 'max', arr) |
| elif desc.endswith('weight_quantize'): |
| self._init_quantized_weight(desc, arr) |
| self._verbose_print(desc, 'weight_quantize', arr) |
| elif desc.endswith('bias_quantize'): |
| self._init_quantized_bias(desc, arr) |
| self._verbose_print(desc, 'bias_quantize', arr) |
| else: |
| self._init_default(desc, arr) |
| |
| def _legacy_init(self, name, arr): |
| """Legacy initialization method. |
| |
| Parameters |
| ---------- |
| name : str |
| Name of corresponding NDArray. |
| |
| arr : NDArray |
| NDArray to be initialized. |
| """ |
| warnings.warn( |
| "\033[91mCalling initializer with init(str, NDArray) has been deprecated." \ |
| "please use init(mx.init.InitDesc(...), NDArray) instead.\033[0m", |
| DeprecationWarning, stacklevel=3) |
| if not isinstance(name, string_types): |
| raise TypeError('name must be string') |
| if not isinstance(arr, NDArray): |
| raise TypeError('arr must be NDArray') |
| if name.startswith('upsampling'): |
| self._init_bilinear(name, arr) |
| elif name.startswith('stn_loc') and name.endswith('weight'): |
| self._init_zero(name, arr) |
| elif name.startswith('stn_loc') and name.endswith('bias'): |
| self._init_loc_bias(name, arr) |
| elif name.endswith('bias'): |
| self._init_bias(name, arr) |
| elif name.endswith('gamma'): |
| self._init_gamma(name, arr) |
| elif name.endswith('beta'): |
| self._init_beta(name, arr) |
| elif name.endswith('weight'): |
| self._init_weight(name, arr) |
| elif name.endswith("moving_mean"): |
| self._init_zero(name, arr) |
| elif name.endswith("moving_var"): |
| self._init_one(name, arr) |
| elif name.endswith("moving_inv_var"): |
| self._init_zero(name, arr) |
| elif name.endswith("moving_avg"): |
| self._init_zero(name, arr) |
| elif name.endswith('min'): |
| self._init_zero(name, arr) |
| elif name.endswith('max'): |
| self._init_one(name, arr) |
| else: |
| self._init_default(name, arr) |
| |
| def _init_bilinear(self, _, arr): |
| weight = np.zeros(np.prod(arr.shape), dtype='float32') |
| shape = arr.shape |
| f = np.ceil(shape[3] / 2.) |
| c = (2 * f - 1 - f % 2) / (2. * f) |
| for i in range(np.prod(shape)): |
| x = i % shape[3] |
| y = (i // shape[3]) % shape[2] |
| weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) |
| arr[:] = weight.reshape(shape) |
| |
| def _init_loc_bias(self, _, arr): |
| shape = arr.shape |
| assert(shape[0] == 6) |
| arr[:] = np.array([1.0, 0, 0, 0, 1.0, 0]) |
| |
| def _init_zero(self, _, arr): |
| arr[:] = 0.0 |
| |
| def _init_one(self, _, arr): |
| arr[:] = 1.0 |
| |
| def _init_bias(self, _, arr): |
| arr[:] = 0.0 |
| |
| def _init_quantized_bias(self, _, arr): |
| arr[:] = 0 |
| |
| def _init_gamma(self, _, arr): |
| arr[:] = 1.0 |
| |
| def _init_beta(self, _, arr): |
| arr[:] = 0.0 |
| |
| def _init_weight(self, name, arr): |
| """Abstract method to Initialize weight.""" |
| raise NotImplementedError("Must override it") |
| |
| def _init_quantized_weight(self, _, arr): |
| _arr = random.randint(-127, 127, dtype='int32').asnumpy() |
| arr[:] = np.int8(_arr) |
| |
| def _init_default(self, name, _): |
| raise ValueError( |
| 'Unknown initialization pattern for %s. ' \ |
| 'Default initialization is now limited to '\ |
| '"weight", "bias", "gamma" (1.0), and "beta" (0.0).' \ |
| 'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name) |
| |
| def __eq__(self, other): |
| if not isinstance(other, Initializer): |
| return NotImplemented |
| # pylint: disable=unidiomatic-typecheck |
| return type(self) is type(other) and self._kwargs == other._kwargs |
| |
| # pylint: disable=invalid-name |
| _register = registry.get_register_func(Initializer, 'initializer') |
| alias = registry.get_alias_func(Initializer, 'initializer') |
| create = registry.get_create_func(Initializer, 'initializer') |
| # pylint: enable=invalid-name |
| |
| def register(klass): |
| """Registers a custom initializer. |
| |
| Custom initializers can be created by extending `mx.init.Initializer` and implementing the |
| required functions like `_init_weight` and `_init_bias`. The created initializer must be |
| registered using `mx.init.register` before it can be called by name. |
| |
| Parameters |
| ---------- |
| klass : class |
| A subclass of `mx.init.Initializer` that needs to be registered as a custom initializer. |
| |
| Example |
| ------- |
| >>> # Create and register a custom initializer that |
| ... # initializes weights to 0.1 and biases to 1. |
| ... |
| >>> @mx.init.register |
| ... @alias('myinit') |
| ... class CustomInit(mx.init.Initializer): |
| ... def __init__(self): |
| ... super(CustomInit, self).__init__() |
| ... def _init_weight(self, _, arr): |
| ... arr[:] = 0.1 |
| ... def _init_bias(self, _, arr): |
| ... arr[:] = 1 |
| ... |
| >>> # block is an instance of 'mxnet.gluon.Block' |
| ... |
| >>> block.initialize(CustomInit()) |
| """ |
| return _register(klass) |
| |
| |
| class Load(object): |
| """Initializes variables by loading data from file or dict. |
| |
| **Note** Load will drop ``arg:`` or ``aux:`` from name and |
| initialize the variables that match with the prefix dropped. |
| |
| Parameters |
| ---------- |
| param: str or dict of str->`NDArray` |
| Parameter file or dict mapping name to NDArray. |
| default_init: Initializer |
| Default initializer when name is not found in `param`. |
| verbose: bool |
| Flag for enabling logging of source when initializing. |
| |
| """ |
| def __init__(self, param, default_init=None, verbose=False): |
| if isinstance(param, str): |
| param = load(param) |
| assert isinstance(param, dict) |
| self.param = {} |
| for name, arr in param.items(): |
| if name.startswith('arg:') or name.startswith('aux:'): |
| self.param[name[4:]] = arr |
| else: |
| self.param[name] = arr |
| self.default_init = default_init |
| self.verbose = verbose |
| |
| def __call__(self, name, arr): |
| if name in self.param: |
| assert arr.shape == self.param[name].shape, \ |
| 'Parameter %s cannot be initialized from loading. '%name + \ |
| 'Shape mismatch, target %s vs loaded %s'%(str(arr.shape), |
| self.param[name].shape) |
| arr[:] = self.param[name] |
| if self.verbose: |
| logging.info('Initialized %s by loading', name) |
| else: |
| assert self.default_init is not None, \ |
| "Cannot Initialize %s. Not found in loaded param "%name + \ |
| "and no default Initializer is provided." |
| self.default_init(name, arr) |
| if self.verbose: |
| logging.info('Initialized %s by default', name) |
| |
| |
| class Mixed(object): |
| """Initialize parameters using multiple initializers. |
| |
| Parameters |
| ---------- |
| patterns: list of str |
| List of regular expressions matching parameter names. |
| initializers: list of Initializer |
| List of initializers corresponding to `patterns`. |
| |
| Example |
| ------- |
| >>> # Given 'block', an instance of 'mxnet.gluon.Block', initialize biases to zero |
| ... # and every other parameter to random values with uniform distribution. |
| ... |
| >>> init = mx.initializer.Mixed(['bias', '.*'], [mx.init.Zero(), mx.init.Uniform(0.1)]) |
| >>> block.initialize(init) |
| >>> |
| >>> for dictionary in module.get_params(): |
| ... for key in dictionary: |
| ... print(key) |
| ... print(dictionary[key].asnumpy()) |
| ... |
| fullyconnected1_weight |
| [[ 0.0097627 0.01856892 0.04303787]] |
| fullyconnected1_bias |
| [ 0.] |
| |
| """ |
| def __init__(self, patterns, initializers): |
| assert len(patterns) == len(initializers) |
| self.map = list(zip([re.compile(p) for p in patterns], initializers)) |
| |
| def __call__(self, name, arr): |
| for prog, init in self.map: |
| if prog.match(name): |
| init(name, arr) |
| return |
| raise ValueError('Parameter name %s did not match any pattern. Consider' + |
| 'add a ".*" pattern at the and with default Initializer.') |
| |
| @register |
| @alias("zeros") |
| class Zero(Initializer): |
| """Initializes weights to zero. |
| |
| Example |
| ------- |
| >>> # Given 'block', an instance of 'mxnet.gluon.Block', initialize weights to zero. |
| ... |
| >>> init = mx.initializer.Zero() |
| >>> module.initialize(init) |
| >>> for dictionary in module.get_params(): |
| ... for key in dictionary: |
| ... print(key) |
| ... print(dictionary[key].asnumpy()) |
| ... |
| fullyconnected0_weight |
| [[ 0. 0. 0.]] |
| """ |
| def __init__(self): |
| super(Zero, self).__init__() |
| |
| def _init_weight(self, _, arr): |
| arr[:] = 0 |
| |
| @register |
| @alias("ones") |
| class One(Initializer): |
| """Initializes weights to one. |
| |
| Example |
| ------- |
| >>> # Given 'block', an instance of 'mxnet.gluon.Block', initialize weights to one. |
| ... |
| >>> init = mx.initializer.One() |
| >>> module.initialize(init) |
| >>> for dictionary in module.get_params(): |
| ... for key in dictionary: |
| ... print(key) |
| ... print(dictionary[key].asnumpy()) |
| ... |
| fullyconnected0_weight |
| [[ 1. 1. 1.]] |
| """ |
| def __init__(self): |
| super(One, self).__init__() |
| |
| def _init_weight(self, _, arr): |
| arr[:] = 1 |
| |
| @register |
| class Constant(Initializer): |
| """Initializes the weights to a given value. |
| The value passed in can be a scalar or a NDarray that matches the shape |
| of the parameter to be set. |
| |
| Parameters |
| ---------- |
| value : float, NDArray |
| Value to set. |
| """ |
| def __init__(self, value): |
| super(Constant, self).__init__(value=value) |
| self.value = value |
| |
| def _init_weight(self, _, arr): |
| arr[:] = self.value |
| |
| def dumps(self): |
| val = self._kwargs['value'] |
| if not np.isscalar(val): |
| self._kwargs['value'] = val.tolist() if isinstance(val, np.ndarray) else val.asnumpy().tolist() |
| return json.dumps([self.__class__.__name__.lower(), self._kwargs]) |
| |
| @register |
| class Uniform(Initializer): |
| """Initializes weights with random values uniformly sampled from a given range. |
| |
| Parameters |
| ---------- |
| scale : float, optional |
| The bound on the range of the generated random values. |
| Values are generated from the range [-`scale`, `scale`]. |
| Default scale is 0.07. |
| |
| Example |
| ------- |
| >>> # Given 'block', an instance of 'mxnet.gluon.Block', initialize weights |
| >>> # to random values uniformly sampled between -0.1 and 0.1. |
| ... |
| >>> init = mx.init.Uniform(0.1) |
| >>> module.initialize(init) |
| >>> for dictionary in module.get_params(): |
| ... for key in dictionary: |
| ... print(key) |
| ... print(dictionary[key].asnumpy()) |
| ... |
| fullyconnected0_weight |
| [[ 0.01360891 -0.02144304 0.08511933]] |
| """ |
| def __init__(self, scale=0.07): |
| super(Uniform, self).__init__(scale=scale) |
| self.scale = scale |
| |
| def _init_weight(self, _, arr): |
| uniform_fn = _mx_np.random.uniform if is_np_array() else random.uniform |
| uniform_fn(-self.scale, self.scale, arr.shape, dtype=arr.dtype, out=arr) |
| |
| @register |
| class Normal(Initializer): |
| """Initializes weights with random values sampled from a normal distribution |
| with a mean of zero and standard deviation of `sigma`. |
| |
| Parameters |
| ---------- |
| sigma : float, optional |
| Standard deviation of the normal distribution. |
| Default standard deviation is 0.01. |
| |
| Example |
| ------- |
| >>> # Given 'block', an instance of 'mxnet.gluon.Block', initialize weights |
| >>> # to random values sampled from a normal distribution. |
| ... |
| >>> init = mx.init.Normal(0.5) |
| >>> module.initialize(init) |
| >>> for dictionary in module.get_params(): |
| ... for key in dictionary: |
| ... print(key) |
| ... print(dictionary[key].asnumpy()) |
| ... |
| fullyconnected0_weight |
| [[-0.3214761 -0.12660924 0.53789419]] |
| """ |
| def __init__(self, sigma=0.01): |
| super(Normal, self).__init__(sigma=sigma) |
| self.sigma = sigma |
| |
| def _init_weight(self, _, arr): |
| normal_fn = _mx_np.random.normal if is_np_array() else random.normal |
| normal_fn(0, self.sigma, arr.shape, dtype=arr.dtype, out=arr) |
| |
| @register |
| class Orthogonal(Initializer): |
| """Initialize weight as orthogonal matrix. |
| |
| This initializer implements *Exact solutions to the nonlinear dynamics of |
| learning in deep linear neural networks*, available at |
| https://arxiv.org/abs/1312.6120. |
| |
| Parameters |
| ---------- |
| scale : float optional |
| Scaling factor of weight. |
| |
| rand_type: string optional |
| Use "uniform" or "normal" random number to initialize weight. |
| |
| """ |
| def __init__(self, scale=1.414, rand_type="uniform"): |
| super(Orthogonal, self).__init__(scale=scale, rand_type=rand_type) |
| self.scale = scale |
| self.rand_type = rand_type |
| |
| def _init_weight(self, _, arr): |
| nout = arr.shape[0] |
| nin = np.prod(arr.shape[1:]) |
| if self.rand_type == "uniform": |
| tmp = random.uniform(-1.0, 1.0, shape=(nout, nin)).asnumpy() |
| elif self.rand_type == "normal": |
| tmp = random.normal(0.0, 1.0, shape=(nout, nin)).asnumpy() |
| u, _, v = np.linalg.svd(tmp, full_matrices=False) # pylint: disable=invalid-name |
| if u.shape == tmp.shape: |
| res = u |
| else: |
| res = v |
| res = self.scale * res.reshape(arr.shape) |
| arr[:] = res |
| |
| @register |
| class Xavier(Initializer): |
| """Returns an initializer performing "Xavier" initialization for weights. |
| |
| This initializer is designed to keep the scale of gradients roughly the same |
| in all layers. |
| |
| By default, `rnd_type` is ``'uniform'`` and `factor_type` is ``'avg'``, |
| the initializer fills the weights with random numbers in the range |
| of :math:`[-c, c]`, where :math:`c = \\sqrt{\\frac{3.}{0.5 * (n_{in} + n_{out})}}`. |
| :math:`n_{in}` is the number of neurons feeding into weights, and :math:`n_{out}` is |
| the number of neurons the result is fed to. |
| |
| If `rnd_type` is ``'uniform'`` and `factor_type` is ``'in'``, |
| the :math:`c = \\sqrt{\\frac{3.}{n_{in}}}`. |
| Similarly when `factor_type` is ``'out'``, the :math:`c = \\sqrt{\\frac{3.}{n_{out}}}`. |
| |
| If `rnd_type` is ``'gaussian'`` and `factor_type` is ``'avg'``, |
| the initializer fills the weights with numbers from normal distribution with |
| a standard deviation of :math:`\\sqrt{\\frac{3.}{0.5 * (n_{in} + n_{out})}}`. |
| |
| Parameters |
| ---------- |
| rnd_type: str, optional |
| Random generator type, can be ``'gaussian'`` or ``'uniform'``. |
| |
| factor_type: str, optional |
| Can be ``'avg'``, ``'in'``, or ``'out'``. |
| |
| magnitude: float, optional |
| Scale of random number. |
| """ |
| def __init__(self, rnd_type="uniform", factor_type="avg", magnitude=3): |
| super(Xavier, self).__init__(rnd_type=rnd_type, factor_type=factor_type, |
| magnitude=magnitude) |
| self.rnd_type = rnd_type |
| self.factor_type = factor_type |
| self.magnitude = float(magnitude) |
| |
| |
| def _init_weight(self, name, arr): |
| shape = arr.shape |
| hw_scale = 1. |
| if len(shape) < 2: |
| raise ValueError('Xavier initializer cannot be applied to vector {0}. It requires at' |
| ' least 2D.'.format(name)) |
| if len(shape) > 2: |
| hw_scale = np.prod(shape[2:]) |
| fan_in, fan_out = shape[1] * hw_scale, shape[0] * hw_scale |
| factor = 1. |
| if self.factor_type == "avg": |
| factor = (fan_in + fan_out) / 2.0 |
| elif self.factor_type == "in": |
| factor = fan_in |
| elif self.factor_type == "out": |
| factor = fan_out |
| else: |
| raise ValueError("Incorrect factor type") |
| scale = np.sqrt(self.magnitude / factor) |
| if self.rnd_type == "uniform": |
| uniform_fn = _mx_np.random.uniform if is_np_array() else random.uniform |
| uniform_fn(-scale, scale, arr.shape, dtype=arr.dtype, out=arr) |
| elif self.rnd_type == "gaussian": |
| normal_fn = _mx_np.random.normal if is_np_array() else random.normal |
| normal_fn(0, scale, arr.shape, dtype=arr.dtype, out=arr) |
| else: |
| raise ValueError("Unknown random type") |
| |
| @register |
| class MSRAPrelu(Xavier): |
| """Initialize the weight according to a MSRA paper. |
| |
| This initializer implements *Delving Deep into Rectifiers: Surpassing |
| Human-Level Performance on ImageNet Classification*, available at |
| https://arxiv.org/abs/1502.01852. |
| |
| This initializer is proposed for initialization related to ReLu activation, |
| it makes some changes on top of Xavier method. |
| |
| Parameters |
| ---------- |
| factor_type: str, optional |
| Can be ``'avg'``, ``'in'``, or ``'out'``. |
| |
| slope: float, optional |
| initial slope of any PReLU (or similar) nonlinearities. |
| """ |
| def __init__(self, factor_type="avg", slope=0.25): |
| magnitude = 2. / (1 + slope ** 2) |
| super(MSRAPrelu, self).__init__("gaussian", factor_type, magnitude) |
| self._kwargs = {'factor_type': factor_type, 'slope': slope} |
| |
| @register |
| class Bilinear(Initializer): |
| """Initialize weight for upsampling layers.""" |
| def __init__(self): |
| super(Bilinear, self).__init__() |
| |
| def _init_weight(self, _, arr): |
| weight = np.zeros(np.prod(arr.shape), dtype='float32') |
| shape = arr.shape |
| f = np.ceil(shape[3] / 2.) |
| c = (2 * f - 1 - f % 2) / (2. * f) |
| for i in range(np.prod(shape)): |
| x = i % shape[3] |
| y = (i // shape[3]) % shape[2] |
| weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) |
| arr[:] = weight.reshape(shape) |
| |
| |
| @register |
| class LSTMBias(Initializer): |
| """Initialize all biases of an LSTMCell to 0.0 except for |
| the forget gate whose bias is set to custom value. |
| |
| Parameters |
| ---------- |
| forget_bias: float, default 1.0 |
| bias for the forget gate. Jozefowicz et al. 2015 recommends |
| setting this to 1.0. |
| """ |
| def __init__(self, forget_bias=1.0): |
| super(LSTMBias, self).__init__(forget_bias=forget_bias) |
| self.forget_bias = forget_bias |
| |
| def _init_weight(self, name, arr): |
| arr[:] = 0.0 |
| # in the case of LSTMCell the forget gate is the second |
| # gate of the 4 LSTM gates, we modify the according values. |
| num_hidden = int(arr.shape[0] / 4) |
| arr[num_hidden:2*num_hidden] = self.forget_bias |
| |
| |
| @register |
| class RNNFused(Initializer): |
| """Initialize RNN fused parameter with bias part initialized to 0.0 and |
| weight initialized with random values uniformly sampled from a given range. |
| |
| Parameters |
| ---------- |
| mode : {'gru', 'lstm', 'rnn_relu', 'rnn_tanh'}, required |
| the type of RNN to compute |
| num_layers : int (non-negative), required |
| number of stacked layers |
| state_size : int (non-negative), required |
| size of the state for each layer |
| bidirectional : boolean, optional, default=0 |
| whether to use bidirectional recurrent layers |
| projection_size : int or None, optional, default='None' |
| size of project size |
| scale : float, optional |
| The bound on the range of the generated random values for weights. |
| Values are generated from the range [-`scale`, `scale`]. |
| Default scale is 0.07. |
| """ |
| def __init__(self, mode, num_layers, state_size, bidirectional=False, |
| projection_size=None, i2h_weight_initializer=None, |
| h2h_weight_initializer=None, i2h_bias_initializer=None, |
| h2h_bias_initializer=None, h2r_weight_initializer=None): |
| super(RNNFused, self).__init__(mode=mode, num_layers=num_layers, |
| state_size=state_size, |
| bidirectional=bidirectional, |
| projection_size=projection_size, |
| i2h_weight_initializer=i2h_weight_initializer, |
| h2h_weight_initializer=h2h_weight_initializer, |
| i2h_bias_initializer=i2h_bias_initializer, |
| h2h_bias_initializer=h2h_bias_initializer, |
| h2r_weight_initializer=h2r_weight_initializer) |
| self.gates = {'rnn_relu': 1, 'rnn_tanh': 1, 'lstm': 4, 'gru': 3}[mode] |
| self.num_layers = num_layers |
| self.num_hidden = state_size |
| self.dir = 2 if bidirectional else 1 |
| self.projection_size = projection_size |
| self._i2h_weight_initializer = i2h_weight_initializer |
| self._h2h_weight_initializer = h2h_weight_initializer |
| self._i2h_bias_initializer = i2h_bias_initializer |
| self._h2h_bias_initializer = h2h_bias_initializer |
| self._h2r_weight_initializer = h2r_weight_initializer |
| |
| # pylint: disable=too-many-nested-blocks |
| def _init_weight(self, name, arr): |
| arr_len = arr.shape[0] |
| size = self.num_hidden * self.dir * self.gates |
| if not self.projection_size: |
| # second layer size |
| size2 = (self.num_hidden * self.dir + self.num_hidden + 2) * size |
| input_size = (arr_len - (self.num_layers - 1) * size2) // \ |
| size - 2 - self.num_hidden |
| else: |
| # second layer size |
| size2 = (self.projection_size * self.dir + self.projection_size + 2) * size |
| size_projection = self.projection_size * self.num_hidden * self.num_layers * self.dir |
| input_size = (arr_len - size_projection - (self.num_layers - 1) * size2) // \ |
| size - 2 - self.projection_size |
| begin = 0 |
| if not self.projection_size: |
| for param in ['weight', 'bias']: |
| for layer_num in range(self.num_layers): |
| for _ in range(self.dir): |
| for connect in ['i2h', 'h2h']: |
| num_inputs = input_size |
| if layer_num != 0: |
| num_inputs = self.num_hidden * self.dir |
| if connect == 'h2h': |
| num_inputs = self.num_hidden |
| shape0 = self.gates * self.num_hidden |
| if param == 'weight': |
| cur_len = shape0 * num_inputs |
| else: |
| cur_len = shape0 |
| self._init_util(param, connect, arr[begin:begin+cur_len]) |
| begin += cur_len |
| else: |
| for param in ['weight', 'bias']: |
| for layer_num in range(self.num_layers): |
| for _ in range(self.dir): |
| for connect in ['i2h', 'h2h', 'h2r']: |
| if connect != 'h2r' or param != 'bias': |
| if connect == 'h2r': |
| cur_len = self.projection_size * self.num_hidden |
| else: |
| num_inputs = input_size |
| if layer_num != 0: |
| num_inputs = self.projection_size * self.dir |
| if connect == 'h2h': |
| num_inputs = self.projection_size |
| shape0 = self.gates * self.num_hidden |
| if param == 'weight': |
| cur_len = shape0 * num_inputs |
| else: |
| cur_len = shape0 |
| self._init_util(param, connect, arr[begin:begin+cur_len]) |
| begin += cur_len |
| |
| def _init_util(self, param, connect, arr): |
| name = "_{}_{}_initializer".format(connect, param) |
| init = getattr(self, name) |
| create(init)(InitDesc(name, {'__init__': init}), arr) |
| |
| def set_initializer(self, init): |
| self._i2h_weight_initializer = \ |
| init if not self._i2h_weight_initializer else 'uniform' |
| self._h2h_weight_initializer = \ |
| init if not self._h2h_weight_initializer else 'uniform' |
| self._i2h_bias_initializer = \ |
| init if not self._i2h_bias_initializer else 'zero' |
| self._h2h_bias_initializer = \ |
| init if not self._i2h_bias_initializer else 'zero' |
| self._h2r_weight_initializer = \ |
| init if not self._h2r_weight_initializer else 'uniform' |