| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # THIS SCRIPT COMPUTES THE RATING/SCORE FOR A GIVEN LIST OF PAIRS: (USER-ID, ITEM-ID) USING 2 FACTOR MATRICES L AND R |
| # WE ASSUME THAT ALL USERS HAVE RATED AT LEAST ONCE AND ALL ITEMS HAVE BEEN RATED AT LEAST ONCE. |
| # INPUT PARAMETERS: |
| # --------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # --------------------------------------------------------------------------------------------- |
| # X String --- The input user-id/item-id list |
| # Y String --- The output user-id/item-id/score |
| # L String --- Location of the factor matrix L: user-id x feature-id |
| # R String --- Location to the factor matrix R: feature-id x item-id |
| # Vrows Integer --- The number of rows in the original matrix |
| # Vcols Integer --- The number of columns in the original matrix |
| # fmt String "text" The output format of the factor matrix user-id/item-id/score |
| # --------------------------------------------------------------------------------------------- |
| # OUTPUT: Matrix Y containing the predicted ratings for users and items specified in input matrix X |
| # |
| # HOW TO INVOKE THIS SCRIPT - EXAMPLE: |
| # hadoop jar SystemDS.jar -f ALS-predict.dml -nvargs Vrows=100000 Vcols=10000 \ |
| # X=INPUT_DIR/X L=OUTPUT_DIR/L R=OUTPUT_DIR/R Y=OUTPUT_DIR/Y fmt=csv |
| |
| fileX = $X; |
| fileY = $Y; |
| fileL = $L; |
| fileR = $R; |
| Vrows = $Vrows; |
| Vcols = $Vcols; |
| fmtO = ifdef ($fmt, "text"); |
| |
| X = read (fileX); |
| L = read (fileL); |
| R = read (fileR); |
| |
| # we assume that the number of columns in X is equal to 2: user-id and item-id |
| n = nrow (X); |
| m = ncol (X); |
| |
| if (m != 2) { |
| stop("The input matrix must have 2 columns: user-id and item-id"); |
| } |
| |
| Lrows = nrow (L); |
| Rcols = ncol (R); |
| |
| X_user_max = max (X[,1]); |
| X_item_max = max (X[,2]); |
| |
| if (X_user_max > Vrows | X_item_max > Vcols ) { |
| stop ("Predictions cannot be provided. Maximum user-id (item-id) exceed the number of rows (columns) of V."); |
| } |
| if (Lrows != Vrows | Rcols != Vcols) { |
| stop ("Predictions cannot be provided. Number of rows of L (columns of R) does not match the number of rows (column) of V."); |
| } |
| |
| # user2item table |
| UI = table (X[,1], X[,2], Vrows, Vcols); |
| |
| # Applying items filter |
| V_prime = UI * (L %*% R); |
| |
| write(V_prime, fileY, format = fmtO); |