scripts/staging/fm-binclass.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------


 /*
  * Factorization Machines for binary classification.
  */

 # Imports
 source("nn/optim/adam.dml") as adam
 source("nn/layers/fm.dml") as fm
 source("nn/layers/log_loss.dml") as log_loss
 source("nn/layers/sigmoid.dml") as sigmoid
 source("nn/layers/l2_reg.dml") as l2_reg
 source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss

 train = function(matrix[double] X, matrix[double] y, matrix[double] X_val, matrix[double] y_val)
     return (matrix[double] w0, matrix[double] W, matrix[double] V, double loss) {
   /*
    * Trains the FM model.
    *
    * Inputs:
    *  - X : n examples with d features, of shape (n, d).
    *  - y : label corresponds to n examples
    *  - lambda : regularization (5e-04)
    *
    * Outputs:
    *  - w0, W, V : updated model parameters.
    *  - loss : computed loss with log_loss.
    *
    * input propagation through layers
    * fm::init -> adam::init -> fm::forward -> sigmoid::forward -> log_loss::forward \
    *          adam::update <- fm::backward <- sigmoid::backward <- log_loss::backward <-
    */

     n = nrow(X);
     d = ncol(X);
     k = 2; # factorization dimensionality, only(=2) possible for now.

     # 1.initialize fm core
     [w0, W, V] = fm::init(d, k);

     # 2.initialize adam optimizer
     ## Default values for some parameters
     lr      = 0.001;
     beta1   = 0.9;       # [0, 1)
     beta2   = 0.999;     # [0, 1)
     epsilon = 0.00000001;
     t       = 0;

     # [mX, vX] = adam::init(X); # to optimize input.
     [mw0, vw0] = adam::init(w0);
     [mW, vW]   = adam::init(W);
     [mV, vV]   = adam::init(V);

     # Regularization
     lambda = 5e-04

     # Optimize
     print("Starting optimization")
     batch_size = 10
     iters = ceil(1000 / batch_size)
     epochs = 100; N = n;
     for (e in 1:epochs) {
       for (i in 1:iters) {
         # Get the next batch
         beg = ((i-1) * batch_size) %% N + 1
         end = min(N, beg + batch_size - 1)
         X_batch = X[beg:end,]
         y_batch = y[beg:end,]

         # 3.Send inputs through fm::forward
         y_res = fm::forward(X_batch, w0, W, V);

         # 4.Send the above result through sigmoid::forward
         sfy = sigmoid::forward(y_res);

         # 5.Send the above result through log_loss::forward
         loss = log_loss::forward(sfy, y_batch);

         # Compute loss & accuracy for training & validation data every 100 iterations.
         if (i %% 100 == 0) {
           # Compute training loss & accuracy
           loss_data = log_loss::forward(sfy, y_batch);
           loss_reg_w0 = l2_reg::forward(w0, lambda);
           loss_reg_W  = l2_reg::forward(W, lambda);
           loss_reg_V  = l2_reg::forward(V, lambda);

           accuracy = mean((sfy<0.5) == (y_batch<0.5));
           loss = loss_data + loss_reg_w0 + loss_reg_W + loss_reg_V;

           # Compute validation loss & accuracy
           probs_val = predict(X_val, w0, W, V)
           loss_val = log_loss::forward(probs_val, y_val)
           accuracy_val = mean((probs_val<0.5) == (y_val<0.5))

           # Output results
           print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: "
                 + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val)
         }

         # 6.Send the result of sigmoid::forward and the correct labels y to log_loss::backward
         dsfy = log_loss::backward(sfy, y_batch);

         # 7.Send the above result through sigmoid::backward
         dy = sigmoid::backward(dsfy, y_res);

         # 8.Send the above result through fm::backward
         [dw0, dW, dV] = fm::backward(dy, X_batch, w0, W, V);

         # 9. update the timestep
         t = e * i - 1;

         # 10.Call adam::update for all parameters

         # Incase we want to optimize inputs (X) also, as in deep dream.
         #[X, mX, vX] = adam::update(X, dX, lr, beta1, beta2, epsilon, t, mX, vX);

         [w0, mw0, vw0] = adam::update(w0, dw0, lr, beta1, beta2, epsilon, t, mw0, vw0);
         [W, mW, vW]  = adam::update(W, dW, lr, beta1, beta2, epsilon, t, mW, vW );
         [V, mV, vV]  = adam::update(V, dV, lr, beta1, beta2, epsilon, t, mV, vV );
       }
     }
 }

 predict = function(matrix[double] X, matrix[double] w0, matrix[double] W, matrix[double] V)
     return (matrix[double] out) {
   /*
    * Computes the predictions for the given inputs.
    *
    * Inputs:
    *  - X : n examples with d features, of shape (n, d).
    *  - w0, W, V : trained model parameters.
    *
    * Outputs:
    *  - out : target vector, y.
    */

     # 1.initialize fm core
     #[w0, W, V] = fm::init(d, k);

     # 2.Send inputs through fm::forward
     y = fm::forward(X, w0, W, V);

     # 3.Send the above result through sigmoid::forward
     out = sigmoid::forward(y);

     # 4.Send the above result through log_loss::forward
     # loss = log_loss::forward(out);

 }

 eval = function(matrix[double] probs, matrix[double] y)
     return (double loss, double accuracy) {
   /**
    * Computes loss and accuracy.
    */

     # 1. compute log loss
     loss = log_loss::forward(probs, y);

     # 2. compute accuracy
     accuracy = mean( (probs<0.5) == (y<0.5) )
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------


	/*
	* Factorization Machines for binary classification.
	*/

	# Imports
	source("nn/optim/adam.dml") as adam
	source("nn/layers/fm.dml") as fm
	source("nn/layers/log_loss.dml") as log_loss
	source("nn/layers/sigmoid.dml") as sigmoid
	source("nn/layers/l2_reg.dml") as l2_reg
	source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss

	train = function(matrix[double] X, matrix[double] y, matrix[double] X_val, matrix[double] y_val)
	return (matrix[double] w0, matrix[double] W, matrix[double] V, double loss) {
	/*
	* Trains the FM model.
	*
	* Inputs:
	* - X : n examples with d features, of shape (n, d).
	* - y : label corresponds to n examples
	* - lambda : regularization (5e-04)
	*
	* Outputs:
	* - w0, W, V : updated model parameters.
	* - loss : computed loss with log_loss.
	*
	* input propagation through layers
	* fm::init -> adam::init -> fm::forward -> sigmoid::forward -> log_loss::forward \
	* adam::update <- fm::backward <- sigmoid::backward <- log_loss::backward <-
	*/

	n = nrow(X);
	d = ncol(X);
	k = 2; # factorization dimensionality, only(=2) possible for now.

	# 1.initialize fm core
	[w0, W, V] = fm::init(d, k);

	# 2.initialize adam optimizer
	## Default values for some parameters
	lr = 0.001;
	beta1 = 0.9; # [0, 1)
	beta2 = 0.999; # [0, 1)
	epsilon = 0.00000001;
	t = 0;

	# [mX, vX] = adam::init(X); # to optimize input.
	[mw0, vw0] = adam::init(w0);
	[mW, vW] = adam::init(W);
	[mV, vV] = adam::init(V);

	# Regularization
	lambda = 5e-04

	# Optimize
	print("Starting optimization")
	batch_size = 10
	iters = ceil(1000 / batch_size)
	epochs = 100; N = n;
	for (e in 1:epochs) {
	for (i in 1:iters) {
	# Get the next batch
	beg = ((i-1) * batch_size) %% N + 1
	end = min(N, beg + batch_size - 1)
	X_batch = X[beg:end,]
	y_batch = y[beg:end,]

	# 3.Send inputs through fm::forward
	y_res = fm::forward(X_batch, w0, W, V);

	# 4.Send the above result through sigmoid::forward
	sfy = sigmoid::forward(y_res);

	# 5.Send the above result through log_loss::forward
	loss = log_loss::forward(sfy, y_batch);

	# Compute loss & accuracy for training & validation data every 100 iterations.
	if (i %% 100 == 0) {
	# Compute training loss & accuracy
	loss_data = log_loss::forward(sfy, y_batch);
	loss_reg_w0 = l2_reg::forward(w0, lambda);
	loss_reg_W = l2_reg::forward(W, lambda);
	loss_reg_V = l2_reg::forward(V, lambda);

	accuracy = mean((sfy<0.5) == (y_batch<0.5));
	loss = loss_data + loss_reg_w0 + loss_reg_W + loss_reg_V;

	# Compute validation loss & accuracy
	probs_val = predict(X_val, w0, W, V)
	loss_val = log_loss::forward(probs_val, y_val)
	accuracy_val = mean((probs_val<0.5) == (y_val<0.5))

	# Output results
	print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: "
	+ accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val)
	}

	# 6.Send the result of sigmoid::forward and the correct labels y to log_loss::backward
	dsfy = log_loss::backward(sfy, y_batch);

	# 7.Send the above result through sigmoid::backward
	dy = sigmoid::backward(dsfy, y_res);

	# 8.Send the above result through fm::backward
	[dw0, dW, dV] = fm::backward(dy, X_batch, w0, W, V);

	# 9. update the timestep
	t = e * i - 1;

	# 10.Call adam::update for all parameters

	# Incase we want to optimize inputs (X) also, as in deep dream.
	#[X, mX, vX] = adam::update(X, dX, lr, beta1, beta2, epsilon, t, mX, vX);

	[w0, mw0, vw0] = adam::update(w0, dw0, lr, beta1, beta2, epsilon, t, mw0, vw0);
	[W, mW, vW] = adam::update(W, dW, lr, beta1, beta2, epsilon, t, mW, vW );
	[V, mV, vV] = adam::update(V, dV, lr, beta1, beta2, epsilon, t, mV, vV );
	}
	}
	}

	predict = function(matrix[double] X, matrix[double] w0, matrix[double] W, matrix[double] V)
	return (matrix[double] out) {
	/*
	* Computes the predictions for the given inputs.
	*
	* Inputs:
	* - X : n examples with d features, of shape (n, d).
	* - w0, W, V : trained model parameters.
	*
	* Outputs:
	* - out : target vector, y.
	*/

	# 1.initialize fm core
	#[w0, W, V] = fm::init(d, k);

	# 2.Send inputs through fm::forward
	y = fm::forward(X, w0, W, V);

	# 3.Send the above result through sigmoid::forward
	out = sigmoid::forward(y);

	# 4.Send the above result through log_loss::forward
	# loss = log_loss::forward(out);

	}

	eval = function(matrix[double] probs, matrix[double] y)
	return (double loss, double accuracy) {
	/**
	* Computes loss and accuracy.
	*/

	# 1. compute log loss
	loss = log_loss::forward(probs, y);

	# 2. compute accuracy
	accuracy = mean( (probs<0.5) == (y<0.5) )
	}