src/test/scripts/applications/GAN/GAN_simple.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 source("nn/layers/affine.dml") as affine
 source("nn/layers/leaky_relu.dml") as leaky_relu
 source("nn/layers/log_loss.dml") as log_loss
 source("nn/layers/softmax.dml") as softmax
 source("nn/layers/sigmoid.dml") as sigmoid
 source("nn/layers/tanh.dml") as tanh
 source("nn/optim/adam.dml") as adam

 train = function(matrix[double] X, int iterations)
     return (matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, matrix[double] Gb_2, matrix[double] DW_1,
             matrix[double] Db_1, matrix[double] DW_2, matrix[double] Db_2)
 {
 /*
    * Trains the generator and the discriminator of the GAN.
    *
    * The input matrix, X, has N examples, each with 784 features.
    *
    * Inputs:
    *  - X: Input data matrix, of shape (N, 784).
    *  - iterations: number of iterations for training
    *
    * Outputs:
    *  - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 128).
    *  - Gb_1: Generator 1st layer biases vector, of shape (1, 128).
    *  - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (128, 784).
    *  - Gb_2: Generator 2st layer biases vector, of shape (1, 784).
    *  - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape (784, 128).
    *  - Db_1: Discriminator 1st layer biases vector, of shape (1, 128).
    *  - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape (128, 1).
    *  - Db_2: Discriminator 2nd layer biases vector, of shape (1, 1).
 */
     N = nrow(X)
     batch_size = 128
     half_batch = batch_size / 2

     #Define Generator:
     [GW_1, Gb_1] = affine::init(100, 128, -1)
     [GW_2, Gb_2] = affine::init(128, 28*28, -1)
     [mGW_1, vGW_1] = adam::init(GW_1)
     [mGb_1, vGb_1] = adam::init(Gb_1)
     [mGW_2, vGW_2] = adam::init(GW_2)
     [mGb_2, vGb_2] = adam::init(Gb_2)

     gen_model = list(GW_1, Gb_1, GW_2, Gb_2)
     gen_grad = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2)

     #Define Discriminator:
     [DW_1, Db_1] = affine::init(28*28, 128, -1)
     [DW_2, Db_2] = affine::init(128, 1, -1)
     [mDW_1, vDW_1] = adam::init(DW_1)
     [mDb_1, vDb_1] = adam::init(Db_1)
     [mDW_2, vDW_2] = adam::init(DW_2)
     [mDb_2, vDb_2] = adam::init(Db_2)

     disc_model = list(DW_1, Db_1, DW_2, Db_2)
     disc_grad = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2)

     fake = matrix(0, 0, 784)

     for(i in 1:iterations)
     {
         print('step ' + toString(i) + ' / ' + toString(iterations))
         #generate samples
         noise = rand(rows = half_batch, cols = 100, min = 0.0, max = 1.0)
         [fake_images, gen_params] = gen_forward(noise, gen_model)
         rand = sample(N, half_batch)
         real_images = matrix(0, half_batch, 784)
         for(r in 1:half_batch)
         {
             real_images[r,] = X[as.scalar(rand[r]),]
         }

         #train discriminator
         [decision, disc_params] = disc_forward(real_images, disc_model)
         targets = matrix(1, half_batch, 1)
         dloss1 = log_loss::forward(decision, targets)
         [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, i, disc_model, disc_grad, disc_params)
         [decision, disc_params] = disc_forward(fake_images, disc_model)
         targets = matrix(0, half_batch, 1)
         dloss2 = log_loss::forward(decision, targets)
         [dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, i, disc_model, disc_grad, disc_params)
         print('discriminator_loss: ' + toString((dloss1 + dloss2)))

         #generate samples
         noise = rand(rows = batch_size, cols = 100, min = 0.0, max = 1.0)
         [fake_images, gen_params] = gen_forward(noise, gen_model)

         #train generator
         [decision, disc_params] = disc_forward(fake_images, disc_model)
         targets = matrix(1, batch_size, 1)
         gloss = log_loss::forward(decision, targets)
         [dX, disc_model, disc_grad] = disc_backward(decision, targets, TRUE, i, disc_model, disc_grad, disc_params)
         [gen_model, gen_grad] = gen_backward(dX, i, gen_model, gen_grad, gen_params)
         print('generator_loss: ' + toString(gloss))

         # get sample generated image to observe evolution of generated images
         if(i %% (iterations/10) == 0)
         {
             fake = rbind(fake, fake_images[1])
         }
     }
     out_dir = "target/testTemp/applications/GAN/GANTest/"
     fake = 0.5 * fake + 0.5
     write(fake, out_dir+"/evo")
     DW_1 = as.matrix(disc_model[1])
     Db_1 = as.matrix(disc_model[2])
     DW_2 = as.matrix(disc_model[3])
     Db_2 = as.matrix(disc_model[4])
     GW_1 = as.matrix(gen_model[1])
     Gb_1 = as.matrix(gen_model[2])
     GW_2 = as.matrix(gen_model[3])
     Gb_2 = as.matrix(gen_model[4])
 }

 gen_forward = function(matrix[double] noise, list[unknown] model)
     return(matrix[double] images, list[unknown] params)
 {
 /*
    * Computes the forward pass of the generator.
    * Generates fake images from input noise.
    *
    * Inputs:
    *  - noise: Randomly generated noise, of shape (N, 100).
    *  - model: List containing the generator weights and biases.
    *
    * Outputs:
    *  - images: Generated images, of shape (N, 784).
    *  - params: List of outputs of the generator layers, needed for backward pass.
 */
     GW_1 = as.matrix(model[1])
     Gb_1 = as.matrix(model[2])
     GW_2 = as.matrix(model[3])
     Gb_2 = as.matrix(model[4])

     #Generator forward:
     #Layer 1
     out_1G = affine::forward(noise, GW_1, Gb_1)
     out_1G_leaky_relu = leaky_relu::forward(out_1G)
     #Layer 2
     out_2G = affine::forward(out_1G_leaky_relu, GW_2, Gb_2)
     out_2G_tanh = tanh::forward(out_2G)
     images = out_2G_tanh
     params = list(noise, out_1G, out_1G_leaky_relu, out_2G)
 }

 disc_forward = function(matrix[double] X, list[unknown] model)
     return(matrix[double] decision, list[unknown] params)
 {
 /*
    * Computes the forward pass of the discriminator.
    * Decides if input images are real or fake.
    *
    * Inputs:
    *  - X: Input matrix containing sample images, of shape (N, 784).
    *  - model: List containing the discriminator weights and biases.
    *
    * Outputs:
    *  - decision: Decisions for realness of input, of shape (N, 1).
    *  - params: List of outputs of the discriminator layers, needed for backward pass.
 */
     DW_1 = as.matrix(model[1])
     Db_1 = as.matrix(model[2])
     DW_2 = as.matrix(model[3])
     Db_2 = as.matrix(model[4])

     #Discriminator forward
     #Layer 1
     out_1D = affine::forward(X, DW_1, Db_1)
     out_1D_leaky_relu = leaky_relu::forward(out_1D)

     #Layer 2
     out_2D = affine::forward(out_1D_leaky_relu, DW_2, Db_2)
     decision = sigmoid::forward(out_2D)
     params = list(X, out_1D, out_1D_leaky_relu, out_2D)
 }

 disc_backward = function(matrix[double] decision, matrix[double] targets, boolean lock, int iteration, list[unknown] model, list[unknown] gradients,
                          list[unknown] params)
     return(matrix[double] dX, list[unknown] model, list[unknown] gradients)
 {
 /*
    * Computes the backward pass of the discriminator.
    * Updates gradients and weights of the discriminator.
    *
    * Inputs:
    *  - decisions: Input matrix containing discriminator decisions, of shape (N, 1).
    *  - targets: Target values for the decisions, of shape (N, 1).
    *  - lock: Boolean that governs if discriminator weights are to be updated, TRUE means the weights are not updated.
    *  - iteration: Current iteration of the training.
    *  - model: List containing the discriminator weights and biases.
    *  - gradients: List containing the discriminator gradients.
    *  - params: List of outputs of the discriminator layers from the forward pass.
    *
    * Outputs:
    *  - dX: Gradient wrt `X`, of shape (N, 784).
    *  - model: List containing the updated discriminator weights and biases.
    *  - gradients: List containing the updated discriminator gradients.
 */
     lr = 0.0002
     beta1 = 0.5
     beta2 = 0.999
     epsilon = 1e-07

     DW_1 = as.matrix(model[1])
     Db_1 = as.matrix(model[2])
     DW_2 = as.matrix(model[3])
     Db_2 = as.matrix(model[4])

     mDW_1 = as.matrix(gradients[1])
     vDW_1 = as.matrix(gradients[2])
     mDb_1 = as.matrix(gradients[3])
     vDb_1 = as.matrix(gradients[4])
     mDW_2 = as.matrix(gradients[5])
     vDW_2 = as.matrix(gradients[6])
     mDb_2 = as.matrix(gradients[7])
     vDb_2 = as.matrix(gradients[8])

     #Discriminator backward
     #Layer 2
     dloss = log_loss::backward(decision, targets)
     dout_2D = sigmoid::backward(dloss, as.matrix(params[4]))
     [dout_1D, dDW_2, dDb_2] = affine::backward(dout_2D, as.matrix(params[3]), DW_2, Db_2)

     #Layer 1
     dout_1D_leaky_relu = leaky_relu::backward(dout_1D, as.matrix(params[2]))
     [dX, dDW_1, dDb_1] = affine::backward(dout_1D_leaky_relu, as.matrix(params[1]), DW_1, Db_1)

     if(!lock)
     {
         #optimize
         [DW_1, mDW_1, vDW_1] = adam::update(DW_1, dDW_1, lr, beta1, beta2, epsilon, iteration, mDW_1, vDW_1)
         [Db_1, mDb_1, vDb_1] = adam::update(Db_1, dDb_1, lr, beta1, beta2, epsilon, iteration, mDb_1, vDb_1)
         [DW_2, mDW_2, vDW_2] = adam::update(DW_2, dDW_2, lr, beta1, beta2, epsilon, iteration, mDW_2, vDW_2)
         [Db_2, mDb_2, vDb_2] = adam::update(Db_2, dDb_2, lr, beta1, beta2, epsilon, iteration, mDb_2, vDb_2)
         model = list(DW_1, Db_1, DW_2, Db_2)
         gradients = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2)
     }
 }

 gen_backward = function(matrix[double] dX, int iteration, list[unknown] model, list[unknown] gradients, list[unknown] params)
     return(list[unknown] model, list[unknown] gradients)
 {
 /*
    * Computes the backward pass of the generator.
    * Updates gradients and weights of the generator.
    *
    * Inputs:
    *  - dX: Gradient wrt `X`, of shape (N, 784).
    *  - iteration: Current iteration of the training.
    *  - model: List containing the generator weights and biases.
    *  - gradients: List containing the generator gradients.
    *  - params: List of outputs of the generator layers from the forward pass.
    *
    * Outputs:
    *  - model: List containing the updated generator weights and biases.
    *  - gradients: List containing the updated generator gradients.
 */
     lr = 0.0002
     beta1 = 0.5
     beta2 = 0.999
     epsilon = 1e-07

     GW_1 = as.matrix(model[1])
     Gb_1 = as.matrix(model[2])
     GW_2 = as.matrix(model[3])
     Gb_2 = as.matrix(model[4])

     mGW_1 = as.matrix(gradients[1])
     vGW_1 = as.matrix(gradients[2])
     mGb_1 = as.matrix(gradients[3])
     vGb_1 = as.matrix(gradients[4])
     mGW_2 = as.matrix(gradients[5])
     vGW_2 = as.matrix(gradients[6])
     mGb_2 = as.matrix(gradients[7])
     vGb_2 = as.matrix(gradients[8])

     #Layer 2
     dout_2G_tanh = tanh::backward(dX, as.matrix(params[4]))
     [dout_2G, dGW_2, dGb_2] = affine::backward(dout_2G_tanh, as.matrix(params[3]), GW_2, Gb_2)

     #Layer 1
     dout_1G_leaky_relu = leaky_relu::backward(dout_2G, as.matrix(params[2]))
     [dout_1G, dGW_1, dGb_1] = affine::backward(dout_1G_leaky_relu, as.matrix(params[1]), GW_1, Gb_1)

     #optimize
     [GW_1, mGW_1, vGW_1] = adam::update(GW_1, dGW_1, lr, beta1, beta2, epsilon, iteration, mGW_1, vGW_1)
     [Gb_1, mGb_1, vGb_1] = adam::update(Gb_1, dGb_1, lr, beta1, beta2, epsilon, iteration, mGb_1, vGb_1)
     [GW_2, mGW_2, vGW_2] = adam::update(GW_2, dGW_2, lr, beta1, beta2, epsilon, iteration, mGW_2, vGW_2)
     [Gb_2, mGb_2, vGb_2] = adam::update(Gb_2, dGb_2, lr, beta1, beta2, epsilon, iteration, mGb_2, vGb_2)

     model = list(GW_1, Gb_1, GW_2, Gb_2)
     gradients = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2)
 }

 generate = function(int amount, matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, matrix[double] Gb_2)
     return(matrix[double] images)
 {
 /*
    * Generates amount images from random noise.
    *
    *
    * Inputs:
    *  - amount: Amount of images to be generated.
    *  - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 128).
    *  - Gb_1: Generator 1st layer biases vector, of shape (1, 128).
    *  - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (128, 784).
    *  - Gb_2: Generator 2st layer biases vector, of shape (1, 784).
    *
    * Outputs:
    *  - images: Matrix of generated images, of shape (amount, D).
 */

     noise = rand(rows = amount, cols = 100, min = 0.0, max = 1.0)
     [images, params] = gen_forward(noise, list(GW_1, Gb_1, GW_2, Gb_2))
 }

 eval = function(matrix[double] images, matrix[double] DW_1, matrix[double] Db_1, matrix[double] DW_2, matrix[double] Db_2)
     return(matrix[double] decision)
 {
 /*
    * Predicts if set of input images is real or fake.
    *
    *
    * Inputs:
    *  - images: Matrix of sample images of  shape (N, D).
    *  - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape (784, 128).
    *  - Db_1: Discriminator 1st layer biases vector, of shape (1, 128).
    *  - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape (128, 1).
    *  - Db_2: Discriminator 2nd layer biases vector, of shape (1, 1).
    *
    * Outputs:
    *  - prediction: Matrix of predictions, of shape (N, 1).
 */

     [decision, disc_params] = disc_forward(images, list(DW_1, Db_1, DW_2, Db_2))
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	source("nn/layers/affine.dml") as affine
	source("nn/layers/leaky_relu.dml") as leaky_relu
	source("nn/layers/log_loss.dml") as log_loss
	source("nn/layers/softmax.dml") as softmax
	source("nn/layers/sigmoid.dml") as sigmoid
	source("nn/layers/tanh.dml") as tanh
	source("nn/optim/adam.dml") as adam

	train = function(matrix[double] X, int iterations)
	return (matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, matrix[double] Gb_2, matrix[double] DW_1,
	matrix[double] Db_1, matrix[double] DW_2, matrix[double] Db_2)
	{
	/*
	* Trains the generator and the discriminator of the GAN.
	*
	* The input matrix, X, has N examples, each with 784 features.
	*
	* Inputs:
	* - X: Input data matrix, of shape (N, 784).
	* - iterations: number of iterations for training
	*
	* Outputs:
	* - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 128).
	* - Gb_1: Generator 1st layer biases vector, of shape (1, 128).
	* - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (128, 784).
	* - Gb_2: Generator 2st layer biases vector, of shape (1, 784).
	* - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape (784, 128).
	* - Db_1: Discriminator 1st layer biases vector, of shape (1, 128).
	* - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape (128, 1).
	* - Db_2: Discriminator 2nd layer biases vector, of shape (1, 1).
	*/
	N = nrow(X)
	batch_size = 128
	half_batch = batch_size / 2

	#Define Generator:
	[GW_1, Gb_1] = affine::init(100, 128, -1)
	[GW_2, Gb_2] = affine::init(128, 28*28, -1)
	[mGW_1, vGW_1] = adam::init(GW_1)
	[mGb_1, vGb_1] = adam::init(Gb_1)
	[mGW_2, vGW_2] = adam::init(GW_2)
	[mGb_2, vGb_2] = adam::init(Gb_2)

	gen_model = list(GW_1, Gb_1, GW_2, Gb_2)
	gen_grad = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2)

	#Define Discriminator:
	[DW_1, Db_1] = affine::init(28*28, 128, -1)
	[DW_2, Db_2] = affine::init(128, 1, -1)
	[mDW_1, vDW_1] = adam::init(DW_1)
	[mDb_1, vDb_1] = adam::init(Db_1)
	[mDW_2, vDW_2] = adam::init(DW_2)
	[mDb_2, vDb_2] = adam::init(Db_2)

	disc_model = list(DW_1, Db_1, DW_2, Db_2)
	disc_grad = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2)

	fake = matrix(0, 0, 784)

	for(i in 1:iterations)
	{
	print('step ' + toString(i) + ' / ' + toString(iterations))
	#generate samples
	noise = rand(rows = half_batch, cols = 100, min = 0.0, max = 1.0)
	[fake_images, gen_params] = gen_forward(noise, gen_model)
	rand = sample(N, half_batch)
	real_images = matrix(0, half_batch, 784)
	for(r in 1:half_batch)
	{
	real_images[r,] = X[as.scalar(rand[r]),]
	}

	#train discriminator
	[decision, disc_params] = disc_forward(real_images, disc_model)
	targets = matrix(1, half_batch, 1)
	dloss1 = log_loss::forward(decision, targets)
	[dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, i, disc_model, disc_grad, disc_params)
	[decision, disc_params] = disc_forward(fake_images, disc_model)
	targets = matrix(0, half_batch, 1)
	dloss2 = log_loss::forward(decision, targets)
	[dX, disc_model, disc_grad] = disc_backward(decision, targets, FALSE, i, disc_model, disc_grad, disc_params)
	print('discriminator_loss: ' + toString((dloss1 + dloss2)))

	#generate samples
	noise = rand(rows = batch_size, cols = 100, min = 0.0, max = 1.0)
	[fake_images, gen_params] = gen_forward(noise, gen_model)

	#train generator
	[decision, disc_params] = disc_forward(fake_images, disc_model)
	targets = matrix(1, batch_size, 1)
	gloss = log_loss::forward(decision, targets)
	[dX, disc_model, disc_grad] = disc_backward(decision, targets, TRUE, i, disc_model, disc_grad, disc_params)
	[gen_model, gen_grad] = gen_backward(dX, i, gen_model, gen_grad, gen_params)
	print('generator_loss: ' + toString(gloss))

	# get sample generated image to observe evolution of generated images
	if(i %% (iterations/10) == 0)
	{
	fake = rbind(fake, fake_images[1])
	}
	}
	out_dir = "target/testTemp/applications/GAN/GANTest/"
	fake = 0.5 * fake + 0.5
	write(fake, out_dir+"/evo")
	DW_1 = as.matrix(disc_model[1])
	Db_1 = as.matrix(disc_model[2])
	DW_2 = as.matrix(disc_model[3])
	Db_2 = as.matrix(disc_model[4])
	GW_1 = as.matrix(gen_model[1])
	Gb_1 = as.matrix(gen_model[2])
	GW_2 = as.matrix(gen_model[3])
	Gb_2 = as.matrix(gen_model[4])
	}

	gen_forward = function(matrix[double] noise, list[unknown] model)
	return(matrix[double] images, list[unknown] params)
	{
	/*
	* Computes the forward pass of the generator.
	* Generates fake images from input noise.
	*
	* Inputs:
	* - noise: Randomly generated noise, of shape (N, 100).
	* - model: List containing the generator weights and biases.
	*
	* Outputs:
	* - images: Generated images, of shape (N, 784).
	* - params: List of outputs of the generator layers, needed for backward pass.
	*/
	GW_1 = as.matrix(model[1])
	Gb_1 = as.matrix(model[2])
	GW_2 = as.matrix(model[3])
	Gb_2 = as.matrix(model[4])

	#Generator forward:
	#Layer 1
	out_1G = affine::forward(noise, GW_1, Gb_1)
	out_1G_leaky_relu = leaky_relu::forward(out_1G)
	#Layer 2
	out_2G = affine::forward(out_1G_leaky_relu, GW_2, Gb_2)
	out_2G_tanh = tanh::forward(out_2G)
	images = out_2G_tanh
	params = list(noise, out_1G, out_1G_leaky_relu, out_2G)
	}

	disc_forward = function(matrix[double] X, list[unknown] model)
	return(matrix[double] decision, list[unknown] params)
	{
	/*
	* Computes the forward pass of the discriminator.
	* Decides if input images are real or fake.
	*
	* Inputs:
	* - X: Input matrix containing sample images, of shape (N, 784).
	* - model: List containing the discriminator weights and biases.
	*
	* Outputs:
	* - decision: Decisions for realness of input, of shape (N, 1).
	* - params: List of outputs of the discriminator layers, needed for backward pass.
	*/
	DW_1 = as.matrix(model[1])
	Db_1 = as.matrix(model[2])
	DW_2 = as.matrix(model[3])
	Db_2 = as.matrix(model[4])

	#Discriminator forward
	#Layer 1
	out_1D = affine::forward(X, DW_1, Db_1)
	out_1D_leaky_relu = leaky_relu::forward(out_1D)

	#Layer 2
	out_2D = affine::forward(out_1D_leaky_relu, DW_2, Db_2)
	decision = sigmoid::forward(out_2D)
	params = list(X, out_1D, out_1D_leaky_relu, out_2D)
	}

	disc_backward = function(matrix[double] decision, matrix[double] targets, boolean lock, int iteration, list[unknown] model, list[unknown] gradients,
	list[unknown] params)
	return(matrix[double] dX, list[unknown] model, list[unknown] gradients)
	{
	/*
	* Computes the backward pass of the discriminator.
	* Updates gradients and weights of the discriminator.
	*
	* Inputs:
	* - decisions: Input matrix containing discriminator decisions, of shape (N, 1).
	* - targets: Target values for the decisions, of shape (N, 1).
	* - lock: Boolean that governs if discriminator weights are to be updated, TRUE means the weights are not updated.
	* - iteration: Current iteration of the training.
	* - model: List containing the discriminator weights and biases.
	* - gradients: List containing the discriminator gradients.
	* - params: List of outputs of the discriminator layers from the forward pass.
	*
	* Outputs:
	* - dX: Gradient wrt `X`, of shape (N, 784).
	* - model: List containing the updated discriminator weights and biases.
	* - gradients: List containing the updated discriminator gradients.
	*/
	lr = 0.0002
	beta1 = 0.5
	beta2 = 0.999
	epsilon = 1e-07

	DW_1 = as.matrix(model[1])
	Db_1 = as.matrix(model[2])
	DW_2 = as.matrix(model[3])
	Db_2 = as.matrix(model[4])

	mDW_1 = as.matrix(gradients[1])
	vDW_1 = as.matrix(gradients[2])
	mDb_1 = as.matrix(gradients[3])
	vDb_1 = as.matrix(gradients[4])
	mDW_2 = as.matrix(gradients[5])
	vDW_2 = as.matrix(gradients[6])
	mDb_2 = as.matrix(gradients[7])
	vDb_2 = as.matrix(gradients[8])

	#Discriminator backward
	#Layer 2
	dloss = log_loss::backward(decision, targets)
	dout_2D = sigmoid::backward(dloss, as.matrix(params[4]))
	[dout_1D, dDW_2, dDb_2] = affine::backward(dout_2D, as.matrix(params[3]), DW_2, Db_2)

	#Layer 1
	dout_1D_leaky_relu = leaky_relu::backward(dout_1D, as.matrix(params[2]))
	[dX, dDW_1, dDb_1] = affine::backward(dout_1D_leaky_relu, as.matrix(params[1]), DW_1, Db_1)

	if(!lock)
	{
	#optimize
	[DW_1, mDW_1, vDW_1] = adam::update(DW_1, dDW_1, lr, beta1, beta2, epsilon, iteration, mDW_1, vDW_1)
	[Db_1, mDb_1, vDb_1] = adam::update(Db_1, dDb_1, lr, beta1, beta2, epsilon, iteration, mDb_1, vDb_1)
	[DW_2, mDW_2, vDW_2] = adam::update(DW_2, dDW_2, lr, beta1, beta2, epsilon, iteration, mDW_2, vDW_2)
	[Db_2, mDb_2, vDb_2] = adam::update(Db_2, dDb_2, lr, beta1, beta2, epsilon, iteration, mDb_2, vDb_2)
	model = list(DW_1, Db_1, DW_2, Db_2)
	gradients = list(mDW_1, vDW_1, mDb_1, vDb_1, mDW_2, vDW_2, mDb_2, vDb_2)
	}
	}

	gen_backward = function(matrix[double] dX, int iteration, list[unknown] model, list[unknown] gradients, list[unknown] params)
	return(list[unknown] model, list[unknown] gradients)
	{
	/*
	* Computes the backward pass of the generator.
	* Updates gradients and weights of the generator.
	*
	* Inputs:
	* - dX: Gradient wrt `X`, of shape (N, 784).
	* - iteration: Current iteration of the training.
	* - model: List containing the generator weights and biases.
	* - gradients: List containing the generator gradients.
	* - params: List of outputs of the generator layers from the forward pass.
	*
	* Outputs:
	* - model: List containing the updated generator weights and biases.
	* - gradients: List containing the updated generator gradients.
	*/
	lr = 0.0002
	beta1 = 0.5
	beta2 = 0.999
	epsilon = 1e-07

	GW_1 = as.matrix(model[1])
	Gb_1 = as.matrix(model[2])
	GW_2 = as.matrix(model[3])
	Gb_2 = as.matrix(model[4])

	mGW_1 = as.matrix(gradients[1])
	vGW_1 = as.matrix(gradients[2])
	mGb_1 = as.matrix(gradients[3])
	vGb_1 = as.matrix(gradients[4])
	mGW_2 = as.matrix(gradients[5])
	vGW_2 = as.matrix(gradients[6])
	mGb_2 = as.matrix(gradients[7])
	vGb_2 = as.matrix(gradients[8])

	#Layer 2
	dout_2G_tanh = tanh::backward(dX, as.matrix(params[4]))
	[dout_2G, dGW_2, dGb_2] = affine::backward(dout_2G_tanh, as.matrix(params[3]), GW_2, Gb_2)

	#Layer 1
	dout_1G_leaky_relu = leaky_relu::backward(dout_2G, as.matrix(params[2]))
	[dout_1G, dGW_1, dGb_1] = affine::backward(dout_1G_leaky_relu, as.matrix(params[1]), GW_1, Gb_1)

	#optimize
	[GW_1, mGW_1, vGW_1] = adam::update(GW_1, dGW_1, lr, beta1, beta2, epsilon, iteration, mGW_1, vGW_1)
	[Gb_1, mGb_1, vGb_1] = adam::update(Gb_1, dGb_1, lr, beta1, beta2, epsilon, iteration, mGb_1, vGb_1)
	[GW_2, mGW_2, vGW_2] = adam::update(GW_2, dGW_2, lr, beta1, beta2, epsilon, iteration, mGW_2, vGW_2)
	[Gb_2, mGb_2, vGb_2] = adam::update(Gb_2, dGb_2, lr, beta1, beta2, epsilon, iteration, mGb_2, vGb_2)

	model = list(GW_1, Gb_1, GW_2, Gb_2)
	gradients = list(mGW_1, vGW_1, mGb_1, vGb_1, mGW_2, vGW_2, mGb_2, vGb_2)
	}

	generate = function(int amount, matrix[double] GW_1, matrix[double] Gb_1, matrix[double] GW_2, matrix[double] Gb_2)
	return(matrix[double] images)
	{
	/*
	* Generates amount images from random noise.
	*
	*
	* Inputs:
	* - amount: Amount of images to be generated.
	* - GW_1: Generator 1st layer weights (parameters) matrix, of shape (100, 128).
	* - Gb_1: Generator 1st layer biases vector, of shape (1, 128).
	* - GW_2: Generator 2nd layer weights (parameters) matrix, of shape (128, 784).
	* - Gb_2: Generator 2st layer biases vector, of shape (1, 784).
	*
	* Outputs:
	* - images: Matrix of generated images, of shape (amount, D).
	*/

	noise = rand(rows = amount, cols = 100, min = 0.0, max = 1.0)
	[images, params] = gen_forward(noise, list(GW_1, Gb_1, GW_2, Gb_2))
	}

	eval = function(matrix[double] images, matrix[double] DW_1, matrix[double] Db_1, matrix[double] DW_2, matrix[double] Db_2)
	return(matrix[double] decision)
	{
	/*
	* Predicts if set of input images is real or fake.
	*
	*
	* Inputs:
	* - images: Matrix of sample images of shape (N, D).
	* - DW_1: Discriminator 1st layer weights (parameters) matrix, of shape (784, 128).
	* - Db_1: Discriminator 1st layer biases vector, of shape (1, 128).
	* - DW_2: Discriminator 2nd layer weights (parameters) matrix, of shape (128, 1).
	* - Db_2: Discriminator 2nd layer biases vector, of shape (1, 1).
	*
	* Outputs:
	* - prediction: Matrix of predictions, of shape (N, 1).
	*/

	[decision, disc_params] = disc_forward(images, list(DW_1, Db_1, DW_2, Db_2))
	}