blob: ba165c269e477682f835eedd84ca41b7954f9d9d [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
source("nn/layers/affine.dml") as affine
source("nn/layers/leaky_relu.dml") as leaky_relu
source("nn/layers/log_loss.dml") as log_loss
source("nn/layers/softmax.dml") as softmax
source("nn/layers/sigmoid.dml") as sigmoid
source("nn/layers/tanh.dml") as tanh
source("nn/optim/adam.dml") as adam
train = function(matrix[double] X, int iterations)
return (matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, matrix[double] Gb_2, matrix[double] DW_1,
matrix[double] Db_1, matrix[double] DW_2, matrix[double] Db_2)
{
/*
* Trains the generator and the discriminator of the GAN.
*
* The input matrix, X, has N examples, each with 784 features.
*
* Inputs:
* - X: Input data matrix, of shape (N, 784).
* - iterations: number of iterations for training
*
* Outputs:
* - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 128).
* - Gb_1: Generator 1st layer biases vector, of shape (1, 128).
* - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (128, 784).
* - Gb_2: Generator 2st layer biases vector, of shape (1, 784).
* - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape (784, 128).
* - Db_1: Discriminator 1st layer biases vector, of shape (1, 128).
* - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape (128, 1).
* - Db_2: Discriminator 2nd layer biases vector, of shape (1, 1).
*/
N = nrow(X)
batch_size = 128
half_batch = batch_size / 2
#Define Generator:
[GW_1, Gb_1] = affine::init(100, 128, -1)
[GW_2, Gb_2] = affine::init(128, 28*28, -1)
[mGW_1, vGW_1] = adam::init(GW_1)
[mGb_1, vGb_1] = adam::init(Gb_1)
[mGW_2, vGW_2] = adam::init(GW_2)
[mGb_2, vGb_2] = adam::init(Gb_2)
gen_model = list(GW_1, Gb_1, GW_2, Gb_2)
gen_grad = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2)
#Define Discriminator:
[DW_1, Db_1] = affine::init(28*28, 128, -1)
[DW_2, Db_2] = affine::init(128, 1, -1)
[mDW_1, vDW_1] = adam::init(DW_1)
[mDb_1, vDb_1] = adam::init(Db_1)
[mDW_2, vDW_2] = adam::init(DW_2)
[mDb_2, vDb_2] = adam::init(Db_2)
disc_model = list(DW_1, Db_1, DW_2, Db_2)
disc_grad = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2)
fake = matrix(0, 0, 784)
for(i in 1:iterations)
{
print('step ' + toString(i) + ' / ' + toString(iterations))
#generate samples
noise = rand(rows = half_batch, cols = 100, min = 0.0, max = 1.0)
[fake_images, gen_params] = gen_forward(noise, gen_model)
rand = sample(N, half_batch)
real_images = matrix(0, half_batch, 784)
for(r in 1:half_batch)
{
real_images[r,] = X[as.scalar(rand[r]),]
}
#train discriminator
[decision, disc_params] = disc_forward(real_images, disc_model)
targets = matrix(1, half_batch, 1)
dloss1 = log_loss::forward(decision, targets)
[dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, i, disc_model, disc_grad, disc_params)
[decision, disc_params] = disc_forward(fake_images, disc_model)
targets = matrix(0, half_batch, 1)
dloss2 = log_loss::forward(decision, targets)
[dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, i, disc_model, disc_grad, disc_params)
print('discriminator_loss: ' + toString((dloss1 + dloss2)))
#generate samples
noise = rand(rows = batch_size, cols = 100, min = 0.0, max = 1.0)
[fake_images, gen_params] = gen_forward(noise, gen_model)
#train generator
[decision, disc_params] = disc_forward(fake_images, disc_model)
targets = matrix(1, batch_size, 1)
gloss = log_loss::forward(decision, targets)
[dX, disc_model, disc_grad] = disc_backward(decision, targets, TRUE, i, disc_model, disc_grad, disc_params)
[gen_model, gen_grad] = gen_backward(dX, i, gen_model, gen_grad, gen_params)
print('generator_loss: ' + toString(gloss))
# get sample generated image to observe evolution of generated images
if(i %% (iterations/10) == 0)
{
fake = rbind(fake, fake_images[1])
}
}
out_dir = "target/testTemp/applications/GAN/GANTest/"
fake = 0.5 * fake + 0.5
write(fake, out_dir+"/evo")
DW_1 = as.matrix(disc_model[1])
Db_1 = as.matrix(disc_model[2])
DW_2 = as.matrix(disc_model[3])
Db_2 = as.matrix(disc_model[4])
GW_1 = as.matrix(gen_model[1])
Gb_1 = as.matrix(gen_model[2])
GW_2 = as.matrix(gen_model[3])
Gb_2 = as.matrix(gen_model[4])
}
gen_forward = function(matrix[double] noise, list[unknown] model)
return(matrix[double] images, list[unknown] params)
{
/*
* Computes the forward pass of the generator.
* Generates fake images from input noise.
*
* Inputs:
* - noise: Randomly generated noise, of shape (N, 100).
* - model: List containing the generator weights and biases.
*
* Outputs:
* - images: Generated images, of shape (N, 784).
* - params: List of outputs of the generator layers, needed for backward pass.
*/
GW_1 = as.matrix(model[1])
Gb_1 = as.matrix(model[2])
GW_2 = as.matrix(model[3])
Gb_2 = as.matrix(model[4])
#Generator forward:
#Layer 1
out_1G = affine::forward(noise, GW_1, Gb_1)
out_1G_leaky_relu = leaky_relu::forward(out_1G)
#Layer 2
out_2G = affine::forward(out_1G_leaky_relu, GW_2, Gb_2)
out_2G_tanh = tanh::forward(out_2G)
images = out_2G_tanh
params = list(noise, out_1G, out_1G_leaky_relu, out_2G)
}
disc_forward = function(matrix[double] X, list[unknown] model)
return(matrix[double] decision, list[unknown] params)
{
/*
* Computes the forward pass of the discriminator.
* Decides if input images are real or fake.
*
* Inputs:
* - X: Input matrix containing sample images, of shape (N, 784).
* - model: List containing the discriminator weights and biases.
*
* Outputs:
* - decision: Decisions for realness of input, of shape (N, 1).
* - params: List of outputs of the discriminator layers, needed for backward pass.
*/
DW_1 = as.matrix(model[1])
Db_1 = as.matrix(model[2])
DW_2 = as.matrix(model[3])
Db_2 = as.matrix(model[4])
#Discriminator forward
#Layer 1
out_1D = affine::forward(X, DW_1, Db_1)
out_1D_leaky_relu = leaky_relu::forward(out_1D)
#Layer 2
out_2D = affine::forward(out_1D_leaky_relu, DW_2, Db_2)
decision = sigmoid::forward(out_2D)
params = list(X, out_1D, out_1D_leaky_relu, out_2D)
}
disc_backward = function(matrix[double] decision, matrix[double] targets, boolean lock, int iteration, list[unknown] model, list[unknown] gradients,
list[unknown] params)
return(matrix[double] dX, list[unknown] model, list[unknown] gradients)
{
/*
* Computes the backward pass of the discriminator.
* Updates gradients and weights of the discriminator.
*
* Inputs:
* - decisions: Input matrix containing discriminator decisions, of shape (N, 1).
* - targets: Target values for the decisions, of shape (N, 1).
* - lock: Boolean that governs if discriminator weights are to be updated, TRUE means the weights are not updated.
* - iteration: Current iteration of the training.
* - model: List containing the discriminator weights and biases.
* - gradients: List containing the discriminator gradients.
* - params: List of outputs of the discriminator layers from the forward pass.
*
* Outputs:
* - dX: Gradient wrt `X`, of shape (N, 784).
* - model: List containing the updated discriminator weights and biases.
* - gradients: List containing the updated discriminator gradients.
*/
lr = 0.0002
beta1 = 0.5
beta2 = 0.999
epsilon = 1e-07
DW_1 = as.matrix(model[1])
Db_1 = as.matrix(model[2])
DW_2 = as.matrix(model[3])
Db_2 = as.matrix(model[4])
mDW_1 = as.matrix(gradients[1])
vDW_1 = as.matrix(gradients[2])
mDb_1 = as.matrix(gradients[3])
vDb_1 = as.matrix(gradients[4])
mDW_2 = as.matrix(gradients[5])
vDW_2 = as.matrix(gradients[6])
mDb_2 = as.matrix(gradients[7])
vDb_2 = as.matrix(gradients[8])
#Discriminator backward
#Layer 2
dloss = log_loss::backward(decision, targets)
dout_2D = sigmoid::backward(dloss, as.matrix(params[4]))
[dout_1D, dDW_2, dDb_2] = affine::backward(dout_2D, as.matrix(params[3]), DW_2, Db_2)
#Layer 1
dout_1D_leaky_relu = leaky_relu::backward(dout_1D, as.matrix(params[2]))
[dX, dDW_1, dDb_1] = affine::backward(dout_1D_leaky_relu, as.matrix(params[1]), DW_1, Db_1)
if(!lock)
{
#optimize
[DW_1, mDW_1, vDW_1] = adam::update(DW_1, dDW_1, lr, beta1, beta2, epsilon, iteration, mDW_1, vDW_1)
[Db_1, mDb_1, vDb_1] = adam::update(Db_1, dDb_1, lr, beta1, beta2, epsilon, iteration, mDb_1, vDb_1)
[DW_2, mDW_2, vDW_2] = adam::update(DW_2, dDW_2, lr, beta1, beta2, epsilon, iteration, mDW_2, vDW_2)
[Db_2, mDb_2, vDb_2] = adam::update(Db_2, dDb_2, lr, beta1, beta2, epsilon, iteration, mDb_2, vDb_2)
model = list(DW_1, Db_1, DW_2, Db_2)
gradients = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2)
}
}
gen_backward = function(matrix[double] dX, int iteration, list[unknown] model, list[unknown] gradients, list[unknown] params)
return(list[unknown] model, list[unknown] gradients)
{
/*
* Computes the backward pass of the generator.
* Updates gradients and weights of the generator.
*
* Inputs:
* - dX: Gradient wrt `X`, of shape (N, 784).
* - iteration: Current iteration of the training.
* - model: List containing the generator weights and biases.
* - gradients: List containing the generator gradients.
* - params: List of outputs of the generator layers from the forward pass.
*
* Outputs:
* - model: List containing the updated generator weights and biases.
* - gradients: List containing the updated generator gradients.
*/
lr = 0.0002
beta1 = 0.5
beta2 = 0.999
epsilon = 1e-07
GW_1 = as.matrix(model[1])
Gb_1 = as.matrix(model[2])
GW_2 = as.matrix(model[3])
Gb_2 = as.matrix(model[4])
mGW_1 = as.matrix(gradients[1])
vGW_1 = as.matrix(gradients[2])
mGb_1 = as.matrix(gradients[3])
vGb_1 = as.matrix(gradients[4])
mGW_2 = as.matrix(gradients[5])
vGW_2 = as.matrix(gradients[6])
mGb_2 = as.matrix(gradients[7])
vGb_2 = as.matrix(gradients[8])
#Layer 2
dout_2G_tanh = tanh::backward(dX, as.matrix(params[4]))
[dout_2G, dGW_2, dGb_2] = affine::backward(dout_2G_tanh, as.matrix(params[3]), GW_2, Gb_2)
#Layer 1
dout_1G_leaky_relu = leaky_relu::backward(dout_2G, as.matrix(params[2]))
[dout_1G, dGW_1, dGb_1] = affine::backward(dout_1G_leaky_relu, as.matrix(params[1]), GW_1, Gb_1)
#optimize
[GW_1, mGW_1, vGW_1] = adam::update(GW_1, dGW_1, lr, beta1, beta2, epsilon, iteration, mGW_1, vGW_1)
[Gb_1, mGb_1, vGb_1] = adam::update(Gb_1, dGb_1, lr, beta1, beta2, epsilon, iteration, mGb_1, vGb_1)
[GW_2, mGW_2, vGW_2] = adam::update(GW_2, dGW_2, lr, beta1, beta2, epsilon, iteration, mGW_2, vGW_2)
[Gb_2, mGb_2, vGb_2] = adam::update(Gb_2, dGb_2, lr, beta1, beta2, epsilon, iteration, mGb_2, vGb_2)
model = list(GW_1, Gb_1, GW_2, Gb_2)
gradients = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2)
}
generate = function(int amount, matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, matrix[double] Gb_2)
return(matrix[double] images)
{
/*
* Generates amount images from random noise.
*
*
* Inputs:
* - amount: Amount of images to be generated.
* - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 128).
* - Gb_1: Generator 1st layer biases vector, of shape (1, 128).
* - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (128, 784).
* - Gb_2: Generator 2st layer biases vector, of shape (1, 784).
*
* Outputs:
* - images: Matrix of generated images, of shape (amount, D).
*/
noise = rand(rows = amount, cols = 100, min = 0.0, max = 1.0)
[images, params] = gen_forward(noise, list(GW_1, Gb_1, GW_2, Gb_2))
}
eval = function(matrix[double] images, matrix[double] DW_1, matrix[double] Db_1, matrix[double] DW_2, matrix[double] Db_2)
return(matrix[double] decision)
{
/*
* Predicts if set of input images is real or fake.
*
*
* Inputs:
* - images: Matrix of sample images of shape (N, D).
* - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape (784, 128).
* - Db_1: Discriminator 1st layer biases vector, of shape (1, 128).
* - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape (128, 1).
* - Db_2: Discriminator 2nd layer biases vector, of shape (1, 1).
*
* Outputs:
* - prediction: Matrix of predictions, of shape (N, 1).
*/
[decision, disc_params] = disc_forward(images, list(DW_1, Db_1, DW_2, Db_2))
}