| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| """ |
| AbstractInitializer |
| |
| The abstract base class for all initializers. |
| |
| To define a new initializer, it is |
| enough to derive a new type, and implement one or more of the following methods: |
| |
| _init_weight(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| _init_bias(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| _init_gamma(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| _init_beta(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| |
| Or, if full behavior customization is needed, override the following function |
| |
| init(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| """ |
| abstract type AbstractInitializer end |
| |
| function init(self :: T, name :: Base.Symbol, array :: NDArray) where T<:AbstractInitializer |
| strname = string(name) |
| if startswith(strname,"upsampling") |
| _init_bilinear(self,name, array) |
| elseif startswith(strname,"stn_loc") && endswith(strname,"weight") |
| _init_zero(self,name, array) |
| elseif startswith(strname,"stn_loc") && endswith(strname,"bias") |
| _init_loc_bias(self,name, array) |
| elseif endswith(strname, "bias") |
| _init_bias(self, name, array) |
| elseif endswith(strname, "gamma") |
| _init_gamma(self, name, array) |
| elseif endswith(strname, "beta") |
| _init_beta(self, name, array) |
| elseif endswith(strname, "weight") |
| _init_weight(self, name, array) |
| elseif endswith(strname, "moving_mean") |
| _init_zero(self, name, array) |
| elseif endswith(strname, "moving_var") |
| _init_zero(self, name, array) |
| else |
| _init_default(self, name, array) |
| end |
| end |
| |
| function _init_loc_bias(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| assert(size(array) == (6,)) |
| array[:]= [1.0, 0, 0, 0, 1.0, 0] |
| end |
| |
| function _init_bilinear(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| @assert ndims(array) == 4 |
| |
| W, H, C, N = size(array) # Inverse of NCHW layout |
| filter = Base.zeros(eltype(array), W, H) |
| |
| @assert H == W |
| |
| f = ceil(Int, W / 2) # factor |
| c = (2 * f - 1 - f % 2) / (2 * f) # center |
| for x in 0:(W-1) |
| for y in 0:(H-1) |
| filter[x+1, y+1] = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) |
| end |
| end |
| |
| @nd_as_jl rw=array begin |
| for i in 1:N |
| for j in 1:C |
| array[:,:, j, i] = filter |
| end |
| end |
| end |
| end |
| |
| function _init_bias(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| array[:] = 0 |
| end |
| function _init_gamma(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| array[:] = 1 |
| end |
| function _init_beta(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| array[:] = 0 |
| end |
| function _init_zero(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| array[:] = 0 |
| end |
| |
| function _init_default(self :: AbstractInitializer, name :: Base.Symbol, array :: NDArray) |
| error("Do not know how to init $name") |
| end |
| |
| """ |
| UniformInitializer |
| |
| Initialize weights according to a uniform distribution within the provided scale. |
| """ |
| struct UniformInitializer <: AbstractInitializer |
| scale :: AbstractFloat |
| end |
| """ |
| UniformInitializer(scale=0.07) |
| |
| Construct a `UniformInitializer` with the specified scale. |
| """ |
| UniformInitializer() = UniformInitializer(0.07) |
| |
| _init_weight(i::UniformInitializer, name::Symbol, x::NDArray) = |
| rand!(x, low = -i.scale, high = i.scale) |
| |
| """ |
| NormalInitializer |
| |
| Initialize weights according to a univariate Gaussian distribution. |
| """ |
| struct NormalInitializer <: AbstractInitializer |
| μ :: AbstractFloat |
| σ :: AbstractFloat |
| end |
| """ |
| NormalInitializer(; mu=0, sigma=0.01) |
| |
| Construct a `NormalInitializer` with mean `mu` and variance `sigma`. |
| """ |
| NormalInitializer(; mu=0, sigma=0.01) = NormalInitializer(mu, sigma) |
| |
| _init_weight(i::NormalInitializer, name::Symbol, x::NDArray) = |
| randn!(x, μ = i.μ, σ = i.σ) |
| |
| @enum XavierDistribution xv_uniform xv_normal |
| @enum XavierRegularization xv_avg xv_in xv_out |
| |
| |
| """ |
| XavierInitializer |
| |
| The initializer documented in the paper [Bengio and Glorot 2010]: *Understanding |
| the difficulty of training deep feedforward neuralnetworks*. |
| |
| There are several different version of the XavierInitializer used in the wild. |
| The general idea is that the variance of the initialization distribution is controlled |
| by the dimensionality of the input and output. As a distribution one can either choose |
| a normal distribution with μ = 0 and σ² or a uniform distribution from -σ to σ. |
| |
| Several different ways of calculating the variance are given in the literature or are |
| used by various libraries. |
| |
| * [Bengio and Glorot 2010]: `mx.XavierInitializer(distribution = mx.xv_uniform, regularization = mx.xv_avg, magnitude = 1)` |
| * [K. He, X. Zhang, S. Ren, and J. Sun 2015]: `mx.XavierInitializer(distribution = mx.xv_gaussian, regularization = mx.xv_in, magnitude = 2)` |
| * caffe_avg: `mx.XavierInitializer(distribution = mx.xv_uniform, regularization = mx.xv_avg, magnitude = 3)` |
| """ |
| struct XavierInitializer <: AbstractInitializer |
| distribution :: XavierDistribution |
| regularization :: XavierRegularization |
| magnitude :: Float64 |
| end |
| |
| XavierInitializer(; distribution = xv_uniform, regularization = xv_avg, magnitude = 3.0) = |
| XavierInitializer(distribution, regularization, magnitude) |
| |
| function _init_weight(self :: XavierInitializer, name :: Base.Symbol, array :: NDArray) |
| dims = size(array) |
| fan_in = prod(dims[2:end]) |
| fan_out = dims[1] |
| |
| if self.regularization == xv_avg |
| factor = (fan_in + fan_out) / 2 |
| elseif self.regularization == xv_in |
| factor = fan_in |
| elseif self.regularization == xv_out |
| factor = fan_out |
| end |
| |
| σ = √(self.magnitude / factor) |
| |
| if self.distribution == xv_uniform |
| rand!(array, low = -σ, high = σ) |
| elseif self.distribution == xv_normal |
| randn!(array; μ = 0.0, σ = σ) |
| end |
| end |