blob: de6c9ce17cde6f628886b6ca0d5d4a0ab5943a3c [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# This script computes the top-K rating/scores for a given list of userIDs
# using 2 factor matrices L and R. We assume that all users have rates
# at least once and all items have been rates at least once.
#
# INPUT PARAMETERS:
# ---------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# ---------------------------------------------------------------------------------------------
# userIDs Matrix --- Column vector of user-ids (n x 1)
# I Matrix --- Indicator matrix user-id x user-id to exclude from scoring
# L Matrix --- The factor matrix L: user-id x feature-id
# R Matrix --- The factor matrix R: feature-id x item-id
# K Int 5 The number of top-K items
# ---------------------------------------------------------------------------------------------
# OUTPUT:
# TopIxs Matrix --- A matrix containing the top-K item-ids with highest predicted ratings
# for the specified users (rows)
# TopVals Matrix --- A matrix containing the top-K predicted ratings for the specified users (rows)
m_alsTopkPredict = function(Matrix[Double] userIDs, Matrix[Double] I, Matrix[Double] L, Matrix[Double] R, Integer K = 5)
return (Matrix[Double] TopIxs, Matrix[Double] TopVals)
{
zero_cols_ind = (colSums (R != 0)) == 0;
K = min (ncol(R) - sum (zero_cols_ind), K);
Y = alsPredict(userIDs=userIDs, I=I, L=L, R=R)
# stores sorted movies for selected users
TopIxs = matrix(0, rows = nrow (userIDs), cols = K);
TopVals = matrix(0, rows = nrow (userIDs), cols = K);
# uses rowIndexMax/rowMaxs to update kth ratings for all users (assumes no duplicates)
# (alternatively, we could sort the scores per user, but likely nrow(userIDs)>>K)
for (i in 1:K) {
TopIxs[,i] = rowIndexMax(Y);
TopVals[,i] = rowMaxs(Y);
Y = Y * (table(seq(1,nrow(Y)), rowIndexMax(Y), nrow(Y), ncol(Y)) != 0);
}
# post-processing to handle edge cases
TopIxs = TopIxs * (TopVals > 0);
}