blob: 81d965df760b595443c0676f28b1377496560feb [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
#-------------------------------------------------------------
# This is a simple example for the use of the ResNets. Here,
# ResNet18 is used. Random data is used to only showcase the
# usage of the ResNets and their implemented utility functions.
# There are utility functions for different optimizers. To
# swtich the used optimizer, comment and uncomment the
# corresponding lines marked with '***' in the initialization
# and training sections.
#-------------------------------------------------------------
source("scripts/nn/networks/resnet_util.dml") as resnet_util
source("scripts/nn/networks/resnet18.dml") as resnet18
source("scripts/nn/layers/softmax.dml") as softmax
source("scripts/nn/layers/cross_entropy_loss.dml") as cross_entropy
source("scripts/nn/layers/logcosh_loss.dml") as logcosh
source("nn/optim/adam.dml") as adam
# model
classes = 1000
# get initial model parameters
[model, ema_means_vars] = resnet18::init(classes, -1)
# get initial optimizer parameters
# choose your optimizer:
# *** adagrad
# optimizer_params = resnet18::init_adagrad_optim_params(classes)
# *** adam
# optimizer_params = resnet18::init_adam_optim_params(classes)
# *** rmsprop
# optimizer_params = resnet18::init_rmsprop_optim_params(classes)
# *** sgd
# optimizer_params = list()
# *** sgd momenutum
# optimizer_params = resnet18::init_sgd_momentumg_optim_params(classes)
# *** sgd nesterov
# optimizer_params = resnet18::init_sgd_nesterov_optim_params(classes)
# *** lars
optimizer_params = resnet18::init_lars_optim_params(classes)
# create random data
N = 100
Hin = 32
Win = 32
C = 3 # input channels
X = rand(rows=N, cols=Hin*Win*C)
Y = rand(rows=N, cols=classes, min=0, max=1, pdf="normal")
# train ResNet
epochs = 20
batch_size = 16
[learned_model, learned_emas] = train(X, Y, model, ema_means_vars, N, Hin, Win, epochs, batch_size, optimizer_params)
train = function(matrix[double] X, matrix[double] Y, list[unknown] model, list[unknown] emas, int samples, int Hin,
int Win, int epochs, int batch_size, list[unknown] optim_params)
return (list[unknown] learned_model, list[unknown] learned_emas) {
/*
* Trains the model and returns the learned parameters.
*/
# optimizer hyper parameters
lr = 0.001
epsilon = 1e-8
# *** adam
beta1 = 0.9
beta2 = 0.999
t = 0
# *** rmsprop
decay_rate = 0.99
# sgd momentum & nesterov
momentum = 0.8
# lars
trust_coeff = 0.001
momentum = 0.9
weight_decay = 0.0001
decay_power = 2
learned_model = list()
learned_emas = list()
iterations = ceil(samples/batch_size)
mode = "train"
for (epoch in 1:epochs) {
loss_avg = 0.0
print("Start epoch: " + epoch)
for (i in 1:iterations) {
print(" - Iteration: " + i)
# get batch
start = (i - 1) * batch_size + 1
end = min(samples, i * batch_size)
X_batch = X[start:end,]
Y_batch = Y[start:end,]
# forward pass
[out, emas, cached_out, cached_means_vars] = resnet18::forward(X_batch, Hin, Win, model, mode, emas)
# loss
loss = logcosh::forward(out, Y_batch)
loss_avg = (loss_avg * (i - 1) + loss) / i
# backward
dOut = logcosh::backward(out, Y_batch)
[dX, gradients] = resnet18::backward(dOut, cached_out, model, cached_means_vars)
# update parameters
# choose your optimizer
# *** adagrad
# [model, optim_params] = resnet18::update_params_with_adagrad(model, gradients, lr, epsilon, optim_params)
# *** adam
# [model, optim_params] = resnet18::update_params_with_adam(model, gradients, lr, beta1, beta2, epsilon,
# t, optim_params)
# t = t + 1
# *** rmsprop
# [model, optim_params] = resnet18::update_params_with_rmsprop(model, gradients, lr, decay_rate, epsilon,
# optim_params)
# *** sgd
# [model, optim_params] = resnet18::update_params_with_sgd(model, gradients, lr)
# *** sgd momentum
# [model, optim_params] = resnet18::update_params_with_sgd_momentum(model, gradients, lr, momentum,
# optim_params)
# *** sgd nesterov
# [model, optim_params] = resnet18::update_params_with_sgd_nesterov(model, gradients, lr, momentum,
# optim_params)
# *** lars
[model, optim_params] = resnet18::update_params_with_lars(model, gradients, lr, momentum, weight_decay, trust_coeff,
optim_params)
}
# reshuffle mini batches
r = rand(rows=nrow(Y), cols=1, min=0, max=1, pdf="uniform")
X_tmp = order(target=cbind(r, X), by=1)
Y_tmp = order(target=cbind(r, Y), by=1)
X = X_tmp[,2:ncol(X_tmp)]
Y = Y_tmp[,2:ncol(Y_tmp)]
print("Epoch Avg. Loss: " + loss_avg)
}
learned_model = model
learned_emas = emas
}