scripts/builtin/als.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 #
 # This script computes an approximate factorization of a low-rank matrix X into two matrices U and V
 # using different implementations of the Alternating-Least-Squares (ALS) algorithm.
 # Matrices U and V are computed by minimizing a loss function (with regularization).
 #
 # INPUT   PARAMETERS:
 # ---------------------------------------------------------------------------------------------
 # NAME    TYPE     DEFAULT  MEANING
 # ---------------------------------------------------------------------------------------------
 # X       String   ---      Location to read the input matrix X to be factorized
 # rank    Int      10       Rank of the factorization
 # reg     String   "L2"	    Regularization:
 #                           "L2" = L2 regularization;
 #                              f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
 #                                       + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
 #                           "wL2" = weighted L2 regularization
 #                              f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
 #                                       + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros)
 #                                       + sum (V ^ 2 * col_nonzeros))
 # lambda  Double   0.000001 Regularization parameter, no regularization if 0.0
 # maxi    Int      50       Maximum number of iterations
 # check   Boolean  TRUE     Check for convergence after every iteration, i.e., updating U and V once
 # thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared
 #                           if the decrease in loss in any two consecutive iterations falls below this threshold;
 #                           if check is FALSE thr is ignored
 # ---------------------------------------------------------------------------------------------
 # OUTPUT:
 # 1- An m x r matrix U, where r is the factorization rank
 # 2- An r x n matrix V

 m_als = function(Matrix[Double] X, Integer rank = 10, String reg = "L2", Double lambda = 0.000001,
   Integer maxi = 50, Boolean check = TRUE, Double thr = 0.0001, Boolean verbose = TRUE)
   return (Matrix[Double] U, Matrix[Double] V)
 {
   N = 10000; # for large problems, use scalable alsCG
   if( reg != "L2" | nrow(X) > N | ncol(X) > N )
     [U, V] = alsCG(X=X, rank=rank, reg=reg, lambda=lambda,
                    maxi=maxi, check=check, thr=thr, verbose=verbose);
   else
     [U, V] = alsDS(X=X, rank=rank, lambda=lambda, maxi=maxi,
                    check=check, thr=thr, verbose=verbose);
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	#
	# This script computes an approximate factorization of a low-rank matrix X into two matrices U and V
	# using different implementations of the Alternating-Least-Squares (ALS) algorithm.
	# Matrices U and V are computed by minimizing a loss function (with regularization).
	#
	# INPUT PARAMETERS:
	# ---------------------------------------------------------------------------------------------
	# NAME TYPE DEFAULT MEANING
	# ---------------------------------------------------------------------------------------------
	# X String --- Location to read the input matrix X to be factorized
	# rank Int 10 Rank of the factorization
	# reg String "L2" Regularization:
	# "L2" = L2 regularization;
	# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
	# + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
	# "wL2" = weighted L2 regularization
	# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
	# + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros)
	# + sum (V ^ 2 * col_nonzeros))
	# lambda Double 0.000001 Regularization parameter, no regularization if 0.0
	# maxi Int 50 Maximum number of iterations
	# check Boolean TRUE Check for convergence after every iteration, i.e., updating U and V once
	# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared
	# if the decrease in loss in any two consecutive iterations falls below this threshold;
	# if check is FALSE thr is ignored
	# ---------------------------------------------------------------------------------------------
	# OUTPUT:
	# 1- An m x r matrix U, where r is the factorization rank
	# 2- An r x n matrix V

	m_als = function(Matrix[Double] X, Integer rank = 10, String reg = "L2", Double lambda = 0.000001,
	Integer maxi = 50, Boolean check = TRUE, Double thr = 0.0001, Boolean verbose = TRUE)
	return (Matrix[Double] U, Matrix[Double] V)
	{
	N = 10000; # for large problems, use scalable alsCG
	if( reg != "L2" \| nrow(X) > N \| ncol(X) > N )
	[U, V] = alsCG(X=X, rank=rank, reg=reg, lambda=lambda,
	maxi=maxi, check=check, thr=thr, verbose=verbose);
	else
	[U, V] = alsDS(X=X, rank=rank, lambda=lambda, maxi=maxi,
	check=check, thr=thr, verbose=verbose);
	}