| |
| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| #------------------------------------------------------------- |
| # This is a simple example for the use of the ResNets. Here, |
| # ResNet18 is used. Random data is used to only showcase the |
| # usage of the ResNets and their implemented utility functions. |
| # There are utility functions for different optimizers. To |
| # swtich the used optimizer, comment and uncomment the |
| # corresponding lines marked with '***' in the initialization |
| # and training sections. |
| #------------------------------------------------------------- |
| |
| source("scripts/nn/networks/resnet_util.dml") as resnet_util |
| source("scripts/nn/networks/resnet18.dml") as resnet18 |
| source("scripts/nn/layers/softmax.dml") as softmax |
| source("scripts/nn/layers/cross_entropy_loss.dml") as cross_entropy |
| source("scripts/nn/layers/logcosh_loss.dml") as logcosh |
| source("nn/optim/adam.dml") as adam |
| |
| # model |
| classes = 1000 |
| |
| # get initial model parameters |
| [model, ema_means_vars] = resnet18::init(classes, -1) |
| |
| # get initial optimizer parameters |
| # choose your optimizer: |
| # *** adagrad |
| # optimizer_params = resnet18::init_adagrad_optim_params(classes) |
| # *** adam |
| # optimizer_params = resnet18::init_adam_optim_params(classes) |
| # *** rmsprop |
| # optimizer_params = resnet18::init_rmsprop_optim_params(classes) |
| # *** sgd |
| # optimizer_params = list() |
| # *** sgd momenutum |
| # optimizer_params = resnet18::init_sgd_momentumg_optim_params(classes) |
| # *** sgd nesterov |
| # optimizer_params = resnet18::init_sgd_nesterov_optim_params(classes) |
| # *** lars |
| optimizer_params = resnet18::init_lars_optim_params(classes) |
| |
| # create random data |
| N = 100 |
| Hin = 32 |
| Win = 32 |
| C = 3 # input channels |
| X = rand(rows=N, cols=Hin*Win*C) |
| Y = rand(rows=N, cols=classes, min=0, max=1, pdf="normal") |
| |
| # train ResNet |
| epochs = 20 |
| batch_size = 16 |
| |
| [learned_model, learned_emas] = train(X, Y, model, ema_means_vars, N, Hin, Win, epochs, batch_size, optimizer_params) |
| |
| |
| train = function(matrix[double] X, matrix[double] Y, list[unknown] model, list[unknown] emas, int samples, int Hin, |
| int Win, int epochs, int batch_size, list[unknown] optim_params) |
| return (list[unknown] learned_model, list[unknown] learned_emas) { |
| /* |
| * Trains the model and returns the learned parameters. |
| */ |
| # optimizer hyper parameters |
| lr = 0.001 |
| epsilon = 1e-8 |
| # *** adam |
| beta1 = 0.9 |
| beta2 = 0.999 |
| t = 0 |
| # *** rmsprop |
| decay_rate = 0.99 |
| # sgd momentum & nesterov |
| momentum = 0.8 |
| # lars |
| trust_coeff = 0.001 |
| momentum = 0.9 |
| weight_decay = 0.0001 |
| decay_power = 2 |
| |
| learned_model = list() |
| learned_emas = list() |
| |
| iterations = ceil(samples/batch_size) |
| mode = "train" |
| |
| for (epoch in 1:epochs) { |
| loss_avg = 0.0 |
| |
| print("Start epoch: " + epoch) |
| |
| for (i in 1:iterations) { |
| print(" - Iteration: " + i) |
| |
| # get batch |
| start = (i - 1) * batch_size + 1 |
| end = min(samples, i * batch_size) |
| X_batch = X[start:end,] |
| Y_batch = Y[start:end,] |
| |
| # forward pass |
| [out, emas, cached_out, cached_means_vars] = resnet18::forward(X_batch, Hin, Win, model, mode, emas) |
| |
| # loss |
| loss = logcosh::forward(out, Y_batch) |
| loss_avg = (loss_avg * (i - 1) + loss) / i |
| |
| # backward |
| dOut = logcosh::backward(out, Y_batch) |
| [dX, gradients] = resnet18::backward(dOut, cached_out, model, cached_means_vars) |
| |
| # update parameters |
| # choose your optimizer |
| # *** adagrad |
| # [model, optim_params] = resnet18::update_params_with_adagrad(model, gradients, lr, epsilon, optim_params) |
| # *** adam |
| # [model, optim_params] = resnet18::update_params_with_adam(model, gradients, lr, beta1, beta2, epsilon, |
| # t, optim_params) |
| # t = t + 1 |
| # *** rmsprop |
| # [model, optim_params] = resnet18::update_params_with_rmsprop(model, gradients, lr, decay_rate, epsilon, |
| # optim_params) |
| # *** sgd |
| # [model, optim_params] = resnet18::update_params_with_sgd(model, gradients, lr) |
| # *** sgd momentum |
| # [model, optim_params] = resnet18::update_params_with_sgd_momentum(model, gradients, lr, momentum, |
| # optim_params) |
| # *** sgd nesterov |
| # [model, optim_params] = resnet18::update_params_with_sgd_nesterov(model, gradients, lr, momentum, |
| # optim_params) |
| # *** lars |
| [model, optim_params] = resnet18::update_params_with_lars(model, gradients, lr, momentum, weight_decay, trust_coeff, |
| optim_params) |
| } |
| |
| # reshuffle mini batches |
| r = rand(rows=nrow(Y), cols=1, min=0, max=1, pdf="uniform") |
| X_tmp = order(target=cbind(r, X), by=1) |
| Y_tmp = order(target=cbind(r, Y), by=1) |
| X = X_tmp[,2:ncol(X_tmp)] |
| Y = Y_tmp[,2:ncol(Y_tmp)] |
| |
| print("Epoch Avg. Loss: " + loss_avg) |
| } |
| |
| learned_model = model |
| learned_emas = emas |
| } |