blob: e97e9f0e0eb2685ef9a210eae81acfb28634d447 [file]
# coding: utf-8
# pylint: disable=too-many-branches
"""Initialization helper for mxnet"""
from __future__ import absolute_import
import re
import logging
import numpy as np
from .base import string_types
from .ndarray import NDArray, load
from . import random
class Initializer(object):
"""Base class for Initializer."""
def __call__(self, name, arr):
"""Override () function to do Initialization
Parameters
----------
name : str
name of corrosponding ndarray
arr : NDArray
ndarray to be Initialized
"""
if not isinstance(name, string_types):
raise TypeError('name must be string')
if not isinstance(arr, NDArray):
raise TypeError('arr must be NDArray')
if name.startswith('upsampling'):
self._init_bilinear(name, arr)
elif name.startswith('stn_loc') and name.endswith('weight'):
self._init_zero(name, arr)
elif name.startswith('stn_loc') and name.endswith('bias'):
self._init_loc_bias(name, arr)
elif name.endswith('bias'):
self._init_bias(name, arr)
elif name.endswith('gamma'):
self._init_gamma(name, arr)
elif name.endswith('beta'):
self._init_beta(name, arr)
elif name.endswith('weight'):
self._init_weight(name, arr)
elif name.endswith("moving_mean"):
self._init_zero(name, arr)
elif name.endswith("moving_var"):
self._init_one(name, arr)
elif name.endswith("moving_inv_var"):
self._init_zero(name, arr)
elif name.endswith("moving_avg"):
self._init_zero(name, arr)
else:
self._init_default(name, arr)
# pylint: disable=no-self-use, missing-docstring, invalid-name
def _init_bilinear(self, _, arr):
weight = np.zeros(np.prod(arr.shape), dtype='float32')
shape = arr.shape
f = np.ceil(shape[3] / 2.)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(np.prod(shape)):
x = i % shape[3]
y = (i / shape[3]) % shape[2]
weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
arr[:] = weight.reshape(shape)
def _init_loc_bias(self, _, arr):
shape = arr.shape
assert(shape[0] == 6)
arr[:] = np.array([1.0, 0, 0, 0, 1.0, 0])
def _init_zero(self, _, arr):
arr[:] = 0.0
def _init_one(self, _, arr):
arr[:] = 1.0
def _init_bias(self, _, arr):
arr[:] = 0.0
def _init_gamma(self, _, arr):
arr[:] = 1.0
def _init_beta(self, _, arr):
arr[:] = 0.0
def _init_weight(self, name, arr):
"""Abstruct method to Initialize weight"""
raise NotImplementedError("Must override it")
def _init_default(self, name, _):
raise ValueError('Unknown initialization pattern for %s' % name)
# pylint: enable=no-self-use, missing-docstring, invalid-name
class Load(object):
"""Initialize by loading pretrained param from file or dict
Parameters
----------
param: str or dict of str->NDArray
param file or dict mapping name to NDArray.
default_init: Initializer
default initializer when name is not found in param.
verbose: bool
log source when initializing.
"""
def __init__(self, param, default_init=None, verbose=False):
if isinstance(param, str):
param = load(param)
assert isinstance(param, dict)
self.param = {}
for name, arr in param.items():
if name.startswith('arg:') or name.startswith('aux:'):
self.param[name[4:]] = arr
else:
self.param[name] = arr
self.default_init = default_init
self.verbose = verbose
def __call__(self, name, arr):
if name in self.param:
assert arr.shape == self.param[name].shape, \
'Parameter %s cannot be initialized from loading. '%name + \
'Shape mismatch, target %s vs loaded %s'%(str(arr.shape),
self.param[name].shape)
arr[:] = self.param[name]
if self.verbose:
logging.info('Initialized %s by loading', name)
else:
assert self.default_init is not None, \
"Cannot Initialize %s. Not found in loaded param "%name + \
"and no default Initializer is provided."
self.default_init(name, arr)
if self.verbose:
logging.info('Initialized %s by default', name)
class Mixed(object):
"""Initialize with mixed Initializer
Parameters
----------
patterns: list of str
list of regular expression patterns to match parameter names.
initializers: list of Initializer
list of Initializer corrosponding to patterns
"""
def __init__(self, patterns, initializers):
assert len(patterns) == len(initializers)
self.map = list(zip([re.compile(p) for p in patterns], initializers))
def __call__(self, name, arr):
for prog, init in self.map:
if prog.match(name):
init(name, arr)
return
raise ValueError('Parameter name %s did not match any pattern. Consider' +
'add a ".*" pattern at the and with default Initializer.')
class Uniform(Initializer):
"""Initialize the weight with uniform [-scale, scale]
Parameters
----------
scale : float, optional
The scale of uniform distribution
"""
def __init__(self, scale=0.07):
self.scale = scale
def _init_weight(self, _, arr):
random.uniform(-self.scale, self.scale, out=arr)
class Normal(Initializer):
"""Initialize the weight with normal(0, sigma)
Parameters
----------
sigma : float, optional
Standard deviation for gaussian distribution.
"""
def __init__(self, sigma=0.01):
self.sigma = sigma
def _init_weight(self, _, arr):
random.normal(0, self.sigma, out=arr)
class Orthogonal(Initializer):
"""Intialize weight as Orthogonal matrix
Parameters
----------
scale : float optional
scaling factor of weight
rand_type: string optional
use "uniform" or "normal" random number to initialize weight
Reference
---------
Exact solutions to the nonlinear dynamics of learning in deep linear neural networks
arXiv preprint arXiv:1312.6120 (2013).
"""
def __init__(self, scale=1.414, rand_type="uniform"):
self.scale = scale
self.rand_type = rand_type
# pylint: disable=invalid-name
def _init_weight(self, _, arr):
nout = arr.shape[0]
nin = np.prod(arr.shape[1:])
if self.rand_type == "uniform":
tmp = np.random.uniform(-1.0, 1.0, (nout, nin))
elif self.rand_type == "normal":
tmp = np.random.normal(0.0, 1.0, (nout, nin))
u, _, v = np.linalg.svd(tmp, full_matrices=False)
if u.shape == tmp.shape:
q = u
else:
q = v
q = self.scale * q.reshape(arr.shape)
arr[:] = q
class Xavier(Initializer):
"""Initialize the weight with Xavier or similar initialization scheme.
Parameters
----------
rnd_type: str, optional
Use ```gaussian``` or ```uniform``` to init
factor_type: str, optional
Use ```avg```, ```in```, or ```out``` to init
magnitude: float, optional
scale of random number range
"""
def __init__(self, rnd_type="uniform", factor_type="avg", magnitude=3):
self.rnd_type = rnd_type
self.factor_type = factor_type
self.magnitude = float(magnitude)
def _init_weight(self, _, arr):
shape = arr.shape
hw_scale = 1.
if len(shape) > 2:
hw_scale = np.prod(shape[2:])
fan_in, fan_out = shape[1] * hw_scale, shape[0] * hw_scale
factor = 1.
if self.factor_type == "avg":
factor = (fan_in + fan_out) / 2.0
elif self.factor_type == "in":
factor = fan_in
elif self.factor_type == "out":
factor = fan_out
else:
raise ValueError("Incorrect factor type")
scale = np.sqrt(self.magnitude / factor)
if self.rnd_type == "uniform":
random.uniform(-scale, scale, out=arr)
elif self.rnd_type == "gaussian":
random.normal(0, scale, out=arr)
else:
raise ValueError("Unknown random type")
class MSRAPrelu(Xavier):
"""Initialize the weight with initialization scheme from
Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification.
Parameters
----------
factor_type: str, optional
Use ```avg```, ```in```, or ```out``` to init
slope: float, optional
initial slope of any PReLU (or similar) nonlinearities.
"""
def __init__(self, factor_type="avg", slope=0.25):
magnitude = 2. / (1 + slope ** 2)
super(MSRAPrelu, self).__init__("gaussian", factor_type, magnitude)