scripts/nn/examples/Example-ResNet.dml - systemds - Git at Google


 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 #-------------------------------------------------------------
 # This is a simple example for the use of the ResNets. Here,
 # ResNet18 is used. Random data is used to only showcase the
 # usage of the ResNets and their implemented utility functions.
 # There are utility functions for different optimizers. To
 # swtich the used optimizer, comment and uncomment the
 # corresponding lines marked with '***' in the initialization
 # and training sections.
 #-------------------------------------------------------------

 source("scripts/nn/networks/resnet_util.dml") as resnet_util
 source("scripts/nn/networks/resnet18.dml") as resnet18
 source("scripts/nn/layers/softmax.dml") as softmax
 source("scripts/nn/layers/cross_entropy_loss.dml") as cross_entropy
 source("scripts/nn/layers/logcosh_loss.dml") as logcosh
 source("nn/optim/adam.dml") as adam

 # model
 classes = 1000

 # get initial model parameters
 [model, ema_means_vars] = resnet18::init(classes, -1)

 # get initial optimizer parameters
 # choose your optimizer:
 # *** adagrad
 # optimizer_params = resnet18::init_adagrad_optim_params(classes)
 # *** adam
 # optimizer_params = resnet18::init_adam_optim_params(classes)
 # *** rmsprop
 # optimizer_params = resnet18::init_rmsprop_optim_params(classes)
 # *** sgd
 # optimizer_params = list()
 # *** sgd momenutum
 # optimizer_params = resnet18::init_sgd_momentumg_optim_params(classes)
 # *** sgd nesterov
 # optimizer_params = resnet18::init_sgd_nesterov_optim_params(classes)
 # *** lars
 optimizer_params = resnet18::init_lars_optim_params(classes)

 # create random data
 N = 100
 Hin = 32
 Win = 32
 C = 3  # input channels
 X = rand(rows=N, cols=Hin*Win*C)
 Y = rand(rows=N, cols=classes, min=0, max=1, pdf="normal")

 # train ResNet
 epochs = 20
 batch_size = 16

 [learned_model, learned_emas] = train(X, Y, model, ema_means_vars, N, Hin, Win, epochs, batch_size, optimizer_params)


 train = function(matrix[double] X, matrix[double] Y, list[unknown] model, list[unknown] emas, int samples, int Hin,
     int Win, int epochs, int batch_size, list[unknown] optim_params)
     return (list[unknown] learned_model, list[unknown] learned_emas) {
     /*
      * Trains the model and returns the learned parameters.
      */
     # optimizer hyper parameters
     lr = 0.001
     epsilon = 1e-8
     # *** adam
     beta1 = 0.9
     beta2 = 0.999
     t = 0
     # *** rmsprop
     decay_rate = 0.99
     # sgd momentum & nesterov
     momentum = 0.8
     # lars
     trust_coeff = 0.001
     momentum = 0.9
     weight_decay = 0.0001
     decay_power = 2

     learned_model = list()
     learned_emas = list()

     iterations = ceil(samples/batch_size)
     mode = "train"

     for (epoch in 1:epochs) {
         loss_avg = 0.0

         print("Start epoch: " + epoch)

         for (i in 1:iterations) {
             print(" - Iteration: " + i)

             # get batch
             start = (i - 1) * batch_size + 1
             end = min(samples, i * batch_size)
             X_batch = X[start:end,]
             Y_batch = Y[start:end,]

             # forward pass
             [out, emas, cached_out, cached_means_vars] = resnet18::forward(X_batch, Hin, Win, model, mode, emas)

             # loss
             loss = logcosh::forward(out, Y_batch)
             loss_avg = (loss_avg * (i - 1) + loss) / i

             # backward
             dOut = logcosh::backward(out, Y_batch)
             [dX, gradients] = resnet18::backward(dOut, cached_out, model, cached_means_vars)

             # update parameters
             # choose your optimizer
             # *** adagrad
             # [model, optim_params] = resnet18::update_params_with_adagrad(model, gradients, lr, epsilon, optim_params)
             # *** adam
             # [model, optim_params] = resnet18::update_params_with_adam(model, gradients, lr, beta1, beta2, epsilon,
             #    t, optim_params)
             # t = t + 1
             # *** rmsprop
             # [model, optim_params] = resnet18::update_params_with_rmsprop(model, gradients, lr, decay_rate, epsilon,
             #     optim_params)
             # *** sgd
             # [model, optim_params] = resnet18::update_params_with_sgd(model, gradients, lr)
             # *** sgd momentum
             # [model, optim_params] = resnet18::update_params_with_sgd_momentum(model, gradients, lr, momentum,
             #     optim_params)
             # *** sgd nesterov
             # [model, optim_params] = resnet18::update_params_with_sgd_nesterov(model, gradients, lr, momentum,
             #     optim_params)
             # *** lars
             [model, optim_params] = resnet18::update_params_with_lars(model, gradients, lr, momentum, weight_decay, trust_coeff,
                   optim_params)
         }

         # reshuffle mini batches
         r = rand(rows=nrow(Y), cols=1, min=0, max=1, pdf="uniform")
         X_tmp = order(target=cbind(r, X), by=1)
         Y_tmp = order(target=cbind(r, Y), by=1)
         X = X_tmp[,2:ncol(X_tmp)]
         Y = Y_tmp[,2:ncol(Y_tmp)]

         print("Epoch Avg. Loss: " + loss_avg)
     }

     learned_model = model
     learned_emas = emas
 }

	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	#-------------------------------------------------------------
	# This is a simple example for the use of the ResNets. Here,
	# ResNet18 is used. Random data is used to only showcase the
	# usage of the ResNets and their implemented utility functions.
	# There are utility functions for different optimizers. To
	# swtich the used optimizer, comment and uncomment the
	# corresponding lines marked with '***' in the initialization
	# and training sections.
	#-------------------------------------------------------------

	source("scripts/nn/networks/resnet_util.dml") as resnet_util
	source("scripts/nn/networks/resnet18.dml") as resnet18
	source("scripts/nn/layers/softmax.dml") as softmax
	source("scripts/nn/layers/cross_entropy_loss.dml") as cross_entropy
	source("scripts/nn/layers/logcosh_loss.dml") as logcosh
	source("nn/optim/adam.dml") as adam

	# model
	classes = 1000

	# get initial model parameters
	[model, ema_means_vars] = resnet18::init(classes, -1)

	# get initial optimizer parameters
	# choose your optimizer:
	# *** adagrad
	# optimizer_params = resnet18::init_adagrad_optim_params(classes)
	# *** adam
	# optimizer_params = resnet18::init_adam_optim_params(classes)
	# *** rmsprop
	# optimizer_params = resnet18::init_rmsprop_optim_params(classes)
	# *** sgd
	# optimizer_params = list()
	# *** sgd momenutum
	# optimizer_params = resnet18::init_sgd_momentumg_optim_params(classes)
	# *** sgd nesterov
	# optimizer_params = resnet18::init_sgd_nesterov_optim_params(classes)
	# *** lars
	optimizer_params = resnet18::init_lars_optim_params(classes)

	# create random data
	N = 100
	Hin = 32
	Win = 32
	C = 3 # input channels
	X = rand(rows=N, cols=HinWinC)
	Y = rand(rows=N, cols=classes, min=0, max=1, pdf="normal")

	# train ResNet
	epochs = 20
	batch_size = 16

	[learned_model, learned_emas] = train(X, Y, model, ema_means_vars, N, Hin, Win, epochs, batch_size, optimizer_params)


	train = function(matrix[double] X, matrix[double] Y, list[unknown] model, list[unknown] emas, int samples, int Hin,
	int Win, int epochs, int batch_size, list[unknown] optim_params)
	return (list[unknown] learned_model, list[unknown] learned_emas) {
	/*
	* Trains the model and returns the learned parameters.
	*/
	# optimizer hyper parameters
	lr = 0.001
	epsilon = 1e-8
	# *** adam
	beta1 = 0.9
	beta2 = 0.999
	t = 0
	# *** rmsprop
	decay_rate = 0.99
	# sgd momentum & nesterov
	momentum = 0.8
	# lars
	trust_coeff = 0.001
	momentum = 0.9
	weight_decay = 0.0001
	decay_power = 2

	learned_model = list()
	learned_emas = list()

	iterations = ceil(samples/batch_size)
	mode = "train"

	for (epoch in 1:epochs) {
	loss_avg = 0.0

	print("Start epoch: " + epoch)

	for (i in 1:iterations) {
	print(" - Iteration: " + i)

	# get batch
	start = (i - 1) * batch_size + 1
	end = min(samples, i * batch_size)
	X_batch = X[start:end,]
	Y_batch = Y[start:end,]

	# forward pass
	[out, emas, cached_out, cached_means_vars] = resnet18::forward(X_batch, Hin, Win, model, mode, emas)

	# loss
	loss = logcosh::forward(out, Y_batch)
	loss_avg = (loss_avg * (i - 1) + loss) / i

	# backward
	dOut = logcosh::backward(out, Y_batch)
	[dX, gradients] = resnet18::backward(dOut, cached_out, model, cached_means_vars)

	# update parameters
	# choose your optimizer
	# *** adagrad
	# [model, optim_params] = resnet18::update_params_with_adagrad(model, gradients, lr, epsilon, optim_params)
	# *** adam
	# [model, optim_params] = resnet18::update_params_with_adam(model, gradients, lr, beta1, beta2, epsilon,
	# t, optim_params)
	# t = t + 1
	# *** rmsprop
	# [model, optim_params] = resnet18::update_params_with_rmsprop(model, gradients, lr, decay_rate, epsilon,
	# optim_params)
	# *** sgd
	# [model, optim_params] = resnet18::update_params_with_sgd(model, gradients, lr)
	# *** sgd momentum
	# [model, optim_params] = resnet18::update_params_with_sgd_momentum(model, gradients, lr, momentum,
	# optim_params)
	# *** sgd nesterov
	# [model, optim_params] = resnet18::update_params_with_sgd_nesterov(model, gradients, lr, momentum,
	# optim_params)
	# *** lars
	[model, optim_params] = resnet18::update_params_with_lars(model, gradients, lr, momentum, weight_decay, trust_coeff,
	optim_params)
	}

	# reshuffle mini batches
	r = rand(rows=nrow(Y), cols=1, min=0, max=1, pdf="uniform")
	X_tmp = order(target=cbind(r, X), by=1)
	Y_tmp = order(target=cbind(r, Y), by=1)
	X = X_tmp[,2:ncol(X_tmp)]
	Y = Y_tmp[,2:ncol(Y_tmp)]

	print("Epoch Avg. Loss: " + loss_avg)
	}

	learned_model = model
	learned_emas = emas
	}