| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # This script can be used to compute label predictions |
| # Meant for use with a model learnt using l2-svm.dml |
| # |
| # Given ground truth labels, the script will compute an |
| # accuracy (%) for the predictions |
| # |
| # INPUT PARAMETERS: |
| # --------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # --------------------------------------------------------------------------------------------- |
| # X String --- Location to read the matrix X of feature vectors |
| # model String --- Location of the existing model generated by l2-svm |
| # fmt String "text" The output format of the output, such as "text" or "csv" |
| # Y String --- [OPTIONAL] Location to read the true label matrix Y. Only needed |
| # for evaluating performance (accuracy, confusion) of the model. |
| # confusion String --- [OPTIONAL] Location to write confusion matrix, valid if Y supplied |
| # accuracy String --- [OPTIONAL] Location to write accuracy matrix, valid if Y supplied |
| # scores String --- [OPTIONAL] Location to write model predictions |
| # --------------------------------------------------------------------------------------------- |
| # |
| # Example Usage: |
| # hadoop jar SystemDS.jar -f l2-svm-predict.dml -nvargs X=data Y=labels model=model scores=scores accuracy=accuracy confusion=confusion fmt="text" |
| # |
| # Note about inputs: |
| # labels (entries in Y) should either be set to +1/-1 |
| # or be the result of recoding |
| # anything else may prompt an error message from this script |
| |
| cmdLine_Y = ifdef($Y, " ") |
| cmdLine_confusion = ifdef($confusion, " ") |
| cmdLine_accuracy = ifdef($accuracy, " ") |
| cmdLine_scores = ifdef($scores, " ") |
| cmdLine_scoring_only = ifdef($scoring_only, FALSE) |
| cmdLine_fmt = ifdef($fmt, "text") |
| |
| X = read($X) |
| |
| w = read($model) |
| |
| dimensions = as.scalar(w[nrow(w),1]) |
| if(dimensions != ncol(X)) |
| stop("Stopping due to invalid input: Model dimensions do not seem to match input data dimensions") |
| |
| intercept = as.scalar(w[nrow(w)-1,1]) |
| negative_label = as.scalar(w[nrow(w)-2,1]) |
| positive_label = as.scalar(w[nrow(w)-3,1]) |
| w = w[1:(nrow(w)-4),] |
| |
| b = 0.0 |
| if(intercept == 1) |
| b = as.scalar(w[nrow(w),1]) |
| |
| scores = b + (X %*% w[1:ncol(X),]) |
| |
| if(cmdLine_scores != " ") |
| write(scores, cmdLine_scores, format=cmdLine_fmt) |
| |
| if(!cmdLine_scoring_only){ |
| Y = read(cmdLine_Y) |
| |
| pred = (scores >= 0) |
| pred_labels = pred*positive_label + (1-pred)*negative_label |
| num_correct = sum(pred_labels == Y) |
| acc = 100*num_correct/nrow(X) |
| |
| acc_str = "Accuracy (%): " + acc |
| print(acc_str) |
| |
| if(cmdLine_accuracy != " ") |
| write(acc_str, cmdLine_accuracy) |
| |
| if(cmdLine_confusion != " "){ |
| |
| pred = 2*pred - 1 |
| |
| if(negative_label != -1 | positive_label != +1) |
| Y = 2/(positive_label - negative_label)*Y - (negative_label + positive_label)/(positive_label - negative_label) |
| |
| pred_is_minus = (pred == -1) |
| pred_is_plus = 1 - pred_is_minus |
| y_is_minus = (Y == -1) |
| y_is_plus = 1 - y_is_minus |
| |
| check_min_y_minus = sum(pred_is_minus*y_is_minus) |
| check_min_y_plus = sum(pred_is_minus*y_is_plus) |
| check_max_y_minus = sum(pred_is_plus*y_is_minus) |
| check_max_y_plus = sum(pred_is_plus*y_is_plus) |
| |
| confusion_mat = matrix(0, rows=2, cols=2) |
| confusion_mat[1,1] = check_min_y_minus |
| confusion_mat[1,2] = check_min_y_plus |
| confusion_mat[2,1] = check_max_y_minus |
| confusion_mat[2,2] = check_max_y_plus |
| |
| write(confusion_mat, cmdLine_confusion, format="csv") |
| } |
| } |
| |