scripts/builtin/alsDS.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 #
 # Alternating-Least-Squares (ALS) algorithm using a direct solve method for
 # individual least squares problems (reg="L2"). This script computes an
 # approximate factorization of a low-rank matrix V into two matrices L and R.
 # Matrices L and R are computed by minimizing a loss function (with regularization).
 #
 # INPUT   PARAMETERS:
 # ---------------------------------------------------------------------------------------------
 # NAME    TYPE     DEFAULT  MEANING
 # ---------------------------------------------------------------------------------------------
 # V       String   ---      Location to read the input matrix V to be factorized
 # L       String   ---      Location to write the factor matrix L
 # R       String   ---      Location to write the factor matrix R
 # rank    Int      10       Rank of the factorization
 # lambda  Double   0.000001 Regularization parameter, no regularization if 0.0
 # maxi    Int      50       Maximum number of iterations
 # check   Boolean  FALSE    Check for convergence after every iteration, i.e., updating L and R once
 # thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared
 #                           if the decrease in loss in any two consecutive iterations falls below this threshold;
 #                           if check is FALSE thr is ignored
 # ---------------------------------------------------------------------------------------------
 # OUTPUT:
 # 1- An m x r matrix L, where r is the factorization rank
 # 2- An r x n matrix R
 #

 m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double lambda = 0.000001,
   Integer maxi = 50, Boolean check = FALSE, Double thr = 0.0001, Boolean verbose = TRUE)
   return (Matrix[Double] U, Matrix[Double] V)
 {
   r = rank;
   max_iter = maxi;

   # check the input matrix V, if some rows or columns contain only zeros remove them from V
   X_nonzero_ind = X != 0;
   row_nonzeros = rowSums (X_nonzero_ind);
   col_nonzeros = t (colSums (X_nonzero_ind));
   orig_nonzero_rows_ind = row_nonzeros != 0;
   orig_nonzero_cols_ind = col_nonzeros != 0;
   num_zero_rows = nrow (X) - sum (orig_nonzero_rows_ind);
   num_zero_cols = ncol (X) - sum (orig_nonzero_cols_ind);
   if (num_zero_rows > 0) {
     if( verbose )
       print ("Matrix X contains empty rows! These rows will be removed.");
     X = removeEmpty (target = X, margin = "rows");
   }
   if (num_zero_cols > 0) {
     if( verbose )
       print ("Matrix X contains empty columns! These columns will be removed.");
     X = removeEmpty (target = X, margin = "cols");
   }
   if (num_zero_rows > 0 | num_zero_cols > 0) {
     if( verbose )
       print ("Recomputing nonzero rows and columns!");
     X_nonzero_ind = X != 0;
     row_nonzeros = rowSums (X_nonzero_ind);
     col_nonzeros = t (colSums (X_nonzero_ind));
   }

   ###### MAIN PART ######
   m = nrow (X);
   n = ncol (X);

   # initializing factor matrices
   U = rand (rows = m, cols = r, min = -0.5, max = 0.5);
   V = rand (rows = n, cols = r, min = -0.5, max = 0.5);

   # initializing transformed matrices
   Xt = t(X);

   # check for regularization
   if ( verbose )
     print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);

   loss_init = 0.0; # only used if check is TRUE
   if (check) {
     loss_init = sum (X_nonzero_ind * (X - (U %*% t(V)))^2)
       + lambda * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros));
     if( verbose )
       print ("----- Initial train loss: " + loss_init + " -----");
   }

   lambda_I = diag (matrix (lambda, rows = r, cols = 1));
   it = 0;
   converged = FALSE;
   while ((it < max_iter) & (!converged)) {
     it = it + 1;
     # keep V fixed and update U
     parfor (i in 1:m) {
       V_nonzero_ind = t(X[i,] != 0);
       V_nonzero = removeEmpty (target=V * V_nonzero_ind, margin="rows");
       A1 = (t(V_nonzero) %*% V_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
       U[i,] = t(solve (A1, t(X[i,] %*% V)));
     }

     # keep U fixed and update V
     parfor (j in 1:n) {
       U_nonzero_ind = t(Xt[j,] != 0)
       U_nonzero = removeEmpty (target=U * U_nonzero_ind, margin="rows");
       A2 = (t(U_nonzero) %*% U_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
       V[j,] = t(solve (A2, t(Xt[j,] %*% U)));
     }

     # check for convergence
     if (check) {
       loss_cur = sum (X_nonzero_ind * (X - (U %*% t(V)))^2)
         + lambda * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros));
       loss_dec = (loss_init - loss_cur) / loss_init;
       if( verbose )
         print ("Train loss at iteration (X) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
       if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
         if( verbose )
           print ("----- ALS converged after " + it + " iterations!");
         converged = TRUE;
       }
       loss_init = loss_cur;
     }
   } # end of while loop

   if(verbose) {
     if(check)
       print ("----- Final train loss: " + loss_init + " -----");
     if(!converged )
       print ("Max iteration achieved but not converged!");
   }

   # inject 0s in U if original X had empty rows
   if (num_zero_rows > 0)
     U = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% U;
   # inject 0s in V if original X had empty rows
   if (num_zero_cols > 0)
     V = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% V;
   V = t(V);
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	#
	# Alternating-Least-Squares (ALS) algorithm using a direct solve method for
	# individual least squares problems (reg="L2"). This script computes an
	# approximate factorization of a low-rank matrix V into two matrices L and R.
	# Matrices L and R are computed by minimizing a loss function (with regularization).
	#
	# INPUT PARAMETERS:
	# ---------------------------------------------------------------------------------------------
	# NAME TYPE DEFAULT MEANING
	# ---------------------------------------------------------------------------------------------
	# V String --- Location to read the input matrix V to be factorized
	# L String --- Location to write the factor matrix L
	# R String --- Location to write the factor matrix R
	# rank Int 10 Rank of the factorization
	# lambda Double 0.000001 Regularization parameter, no regularization if 0.0
	# maxi Int 50 Maximum number of iterations
	# check Boolean FALSE Check for convergence after every iteration, i.e., updating L and R once
	# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared
	# if the decrease in loss in any two consecutive iterations falls below this threshold;
	# if check is FALSE thr is ignored
	# ---------------------------------------------------------------------------------------------
	# OUTPUT:
	# 1- An m x r matrix L, where r is the factorization rank
	# 2- An r x n matrix R
	#

	m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double lambda = 0.000001,
	Integer maxi = 50, Boolean check = FALSE, Double thr = 0.0001, Boolean verbose = TRUE)
	return (Matrix[Double] U, Matrix[Double] V)
	{
	r = rank;
	max_iter = maxi;

	# check the input matrix V, if some rows or columns contain only zeros remove them from V
	X_nonzero_ind = X != 0;
	row_nonzeros = rowSums (X_nonzero_ind);
	col_nonzeros = t (colSums (X_nonzero_ind));
	orig_nonzero_rows_ind = row_nonzeros != 0;
	orig_nonzero_cols_ind = col_nonzeros != 0;
	num_zero_rows = nrow (X) - sum (orig_nonzero_rows_ind);
	num_zero_cols = ncol (X) - sum (orig_nonzero_cols_ind);
	if (num_zero_rows > 0) {
	if( verbose )
	print ("Matrix X contains empty rows! These rows will be removed.");
	X = removeEmpty (target = X, margin = "rows");
	}
	if (num_zero_cols > 0) {
	if( verbose )
	print ("Matrix X contains empty columns! These columns will be removed.");
	X = removeEmpty (target = X, margin = "cols");
	}
	if (num_zero_rows > 0 \| num_zero_cols > 0) {
	if( verbose )
	print ("Recomputing nonzero rows and columns!");
	X_nonzero_ind = X != 0;
	row_nonzeros = rowSums (X_nonzero_ind);
	col_nonzeros = t (colSums (X_nonzero_ind));
	}

	###### MAIN PART ######
	m = nrow (X);
	n = ncol (X);

	# initializing factor matrices
	U = rand (rows = m, cols = r, min = -0.5, max = 0.5);
	V = rand (rows = n, cols = r, min = -0.5, max = 0.5);

	# initializing transformed matrices
	Xt = t(X);

	# check for regularization
	if ( verbose )
	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);

	loss_init = 0.0; # only used if check is TRUE
	if (check) {
	loss_init = sum (X_nonzero_ind * (X - (U %*% t(V)))^2)
	+ lambda * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros));
	if( verbose )
	print ("----- Initial train loss: " + loss_init + " -----");
	}

	lambda_I = diag (matrix (lambda, rows = r, cols = 1));
	it = 0;
	converged = FALSE;
	while ((it < max_iter) & (!converged)) {
	it = it + 1;
	# keep V fixed and update U
	parfor (i in 1:m) {
	V_nonzero_ind = t(X[i,] != 0);
	V_nonzero = removeEmpty (target=V * V_nonzero_ind, margin="rows");
	A1 = (t(V_nonzero) %% V_nonzero) + (as.scalar(row_nonzeros[i,1]) lambda_I); # coefficient matrix
	U[i,] = t(solve (A1, t(X[i,] %*% V)));
	}

	# keep U fixed and update V
	parfor (j in 1:n) {
	U_nonzero_ind = t(Xt[j,] != 0)
	U_nonzero = removeEmpty (target=U * U_nonzero_ind, margin="rows");
	A2 = (t(U_nonzero) %% U_nonzero) + (as.scalar(col_nonzeros[j,1]) lambda_I); # coefficient matrix
	V[j,] = t(solve (A2, t(Xt[j,] %*% U)));
	}

	# check for convergence
	if (check) {
	loss_cur = sum (X_nonzero_ind * (X - (U %*% t(V)))^2)
	+ lambda * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros));
	loss_dec = (loss_init - loss_cur) / loss_init;
	if( verbose )
	print ("Train loss at iteration (X) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
	if (loss_dec >= 0 & loss_dec < thr \| loss_init == 0) {
	if( verbose )
	print ("----- ALS converged after " + it + " iterations!");
	converged = TRUE;
	}
	loss_init = loss_cur;
	}
	} # end of while loop

	if(verbose) {
	if(check)
	print ("----- Final train loss: " + loss_init + " -----");
	if(!converged )
	print ("Max iteration achieved but not converged!");
	}

	# inject 0s in U if original X had empty rows
	if (num_zero_rows > 0)
	U = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% U;
	# inject 0s in V if original X had empty rows
	if (num_zero_cols > 0)
	V = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% V;
	V = t(V);
	}