blob: e7ab63f06b20421c34c9d2fe4b32e0aee34d3f1f [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# This script computes an approximate factorization of a low-rank matrix X into two matrices U and V
# using different implementations of the Alternating-Least-Squares (ALS) algorithm.
# Matrices U and V are computed by minimizing a loss function (with regularization).
#
# INPUT:
# -------------------------------------------------------------------------------------------
# X Location to read the input matrix X to be factorized
# rank Rank of the factorization
# regType Regularization:
# "L2" = L2 regularization;
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
# + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
# "wL2" = weighted L2 regularization
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
# + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
# + sum (V ^ 2 * col_nonzeros))
# reg Regularization parameter, no regularization if 0.0
# maxi Maximum number of iterations
# check Check for convergence after every iteration, i.e., updating U and V once
# thr Assuming check is set to TRUE, the algorithm stops and convergence is declared
# if the decrease in loss in any two consecutive iterations falls below this threshold;
# if check is FALSE thr is ignored
# seed The seed to random parts of the algorithm
# verbose If the algorithm should run verbosely
# -------------------------------------------------------------------------------------------
#
# OUTPUT:
# -------------------------------------------------------------------------------------------
# U An m x r matrix where r is the factorization rank
# V An m x r matrix where r is the factorization rank
# -------------------------------------------------------------------------------------------
m_als = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001,
Integer maxi = 50, Boolean check = TRUE, Double thr = 0.0001, Integer seed = 1342516, Boolean verbose = TRUE)
return (Matrix[Double] U, Matrix[Double] V)
{
N = 10000; # for large problems, use scalable alsCG
if( reg != "L2" | nrow(X) > N | ncol(X) > N )
[U, V] = alsCG(X=X, rank=rank, regType=regType, reg=reg,
maxi=maxi, check=check, thr=thr, seed = seed, verbose=verbose);
else
[U, V] = alsDS(X=X, rank=rank, reg=reg, maxi=maxi,
check=check, thr=thr, seed =seed, verbose=verbose);
}