scripts/algorithms/l2-svm-predict.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 # This script can be used to compute label predictions
 # Meant for use with a model learnt using l2-svm.dml
 #
 # Given ground truth labels, the script will compute an
 # accuracy (%) for the predictions
 #
 # INPUT PARAMETERS:
 # ---------------------------------------------------------------------------------------------
 # NAME      TYPE        DEFAULT     MEANING
 # ---------------------------------------------------------------------------------------------
 # X         String      ---         Location to read the matrix X of feature vectors
 # model     String      ---         Location of the existing model generated by l2-svm
 # fmt       String      "text"      The output format of the output, such as "text" or "csv"
 # Y         String      ---         [OPTIONAL] Location to read the true label matrix Y. Only needed
 #                                   for evaluating performance (accuracy, confusion) of the model.
 # confusion String      ---         [OPTIONAL] Location to write confusion matrix, valid if Y supplied
 # accuracy  String      ---         [OPTIONAL] Location to write accuracy matrix, valid if Y supplied
 # scores    String      ---         [OPTIONAL] Location to write model predictions
 # ---------------------------------------------------------------------------------------------
 #
 # Example Usage:
 # hadoop jar SystemDS.jar -f l2-svm-predict.dml -nvargs X=data Y=labels model=model scores=scores accuracy=accuracy confusion=confusion fmt="text"
 #
 # Note about inputs:
 # labels (entries in Y) should either be set to +1/-1
 # or be the result of recoding
 # anything else may prompt an error message from this script

 cmdLine_Y = ifdef($Y, " ")
 cmdLine_confusion = ifdef($confusion, " ")
 cmdLine_accuracy = ifdef($accuracy, " ")
 cmdLine_scores = ifdef($scores, " ")
 cmdLine_scoring_only = ifdef($scoring_only, FALSE)
 cmdLine_fmt = ifdef($fmt, "text")

 X = read($X)

 w = read($model)

 dimensions = as.scalar(w[nrow(w),1])
 if(dimensions != ncol(X))
   stop("Stopping due to invalid input: Model dimensions do not seem to match input data dimensions")

 intercept = as.scalar(w[nrow(w)-1,1])
 negative_label = as.scalar(w[nrow(w)-2,1])
 positive_label = as.scalar(w[nrow(w)-3,1])
 w = w[1:(nrow(w)-4),]

 b = 0.0
 if(intercept == 1)
   b = as.scalar(w[nrow(w),1])

 scores = b + (X %*% w[1:ncol(X),])

 if(cmdLine_scores != " ")
   write(scores, cmdLine_scores, format=cmdLine_fmt)

 if(!cmdLine_scoring_only){
   Y = read(cmdLine_Y)

   pred = (scores >= 0)
   pred_labels = pred*positive_label + (1-pred)*negative_label
   num_correct = sum(pred_labels == Y)
   acc = 100*num_correct/nrow(X)

   acc_str = "Accuracy (%): " + acc
   print(acc_str)

   if(cmdLine_accuracy != " ")
     write(acc_str, cmdLine_accuracy)

   if(cmdLine_confusion != " "){

     pred = 2*pred - 1

     if(negative_label != -1 | positive_label != +1)
       Y = 2/(positive_label - negative_label)*Y - (negative_label + positive_label)/(positive_label - negative_label)

     pred_is_minus = (pred == -1)
     pred_is_plus = 1 - pred_is_minus
     y_is_minus = (Y == -1)
     y_is_plus = 1 - y_is_minus

     check_min_y_minus = sum(pred_is_minus*y_is_minus)
     check_min_y_plus = sum(pred_is_minus*y_is_plus)
     check_max_y_minus = sum(pred_is_plus*y_is_minus)
     check_max_y_plus = sum(pred_is_plus*y_is_plus)

     confusion_mat = matrix(0, rows=2, cols=2)
     confusion_mat[1,1] = check_min_y_minus
     confusion_mat[1,2] = check_min_y_plus
     confusion_mat[2,1] = check_max_y_minus
     confusion_mat[2,2] = check_max_y_plus

     write(confusion_mat, cmdLine_confusion, format="csv")
   }
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	# This script can be used to compute label predictions
	# Meant for use with a model learnt using l2-svm.dml
	#
	# Given ground truth labels, the script will compute an
	# accuracy (%) for the predictions
	#
	# INPUT PARAMETERS:
	# ---------------------------------------------------------------------------------------------
	# NAME TYPE DEFAULT MEANING
	# ---------------------------------------------------------------------------------------------
	# X String --- Location to read the matrix X of feature vectors
	# model String --- Location of the existing model generated by l2-svm
	# fmt String "text" The output format of the output, such as "text" or "csv"
	# Y String --- [OPTIONAL] Location to read the true label matrix Y. Only needed
	# for evaluating performance (accuracy, confusion) of the model.
	# confusion String --- [OPTIONAL] Location to write confusion matrix, valid if Y supplied
	# accuracy String --- [OPTIONAL] Location to write accuracy matrix, valid if Y supplied
	# scores String --- [OPTIONAL] Location to write model predictions
	# ---------------------------------------------------------------------------------------------
	#
	# Example Usage:
	# hadoop jar SystemDS.jar -f l2-svm-predict.dml -nvargs X=data Y=labels model=model scores=scores accuracy=accuracy confusion=confusion fmt="text"
	#
	# Note about inputs:
	# labels (entries in Y) should either be set to +1/-1
	# or be the result of recoding
	# anything else may prompt an error message from this script

	cmdLine_Y = ifdef($Y, " ")
	cmdLine_confusion = ifdef($confusion, " ")
	cmdLine_accuracy = ifdef($accuracy, " ")
	cmdLine_scores = ifdef($scores, " ")
	cmdLine_scoring_only = ifdef($scoring_only, FALSE)
	cmdLine_fmt = ifdef($fmt, "text")

	X = read($X)

	w = read($model)

	dimensions = as.scalar(w[nrow(w),1])
	if(dimensions != ncol(X))
	stop("Stopping due to invalid input: Model dimensions do not seem to match input data dimensions")

	intercept = as.scalar(w[nrow(w)-1,1])
	negative_label = as.scalar(w[nrow(w)-2,1])
	positive_label = as.scalar(w[nrow(w)-3,1])
	w = w[1:(nrow(w)-4),]

	b = 0.0
	if(intercept == 1)
	b = as.scalar(w[nrow(w),1])

	scores = b + (X %*% w[1:ncol(X),])

	if(cmdLine_scores != " ")
	write(scores, cmdLine_scores, format=cmdLine_fmt)

	if(!cmdLine_scoring_only){
	Y = read(cmdLine_Y)

	pred = (scores >= 0)
	pred_labels = predpositive_label + (1-pred)negative_label
	num_correct = sum(pred_labels == Y)
	acc = 100*num_correct/nrow(X)

	acc_str = "Accuracy (%): " + acc
	print(acc_str)

	if(cmdLine_accuracy != " ")
	write(acc_str, cmdLine_accuracy)

	if(cmdLine_confusion != " "){

	pred = 2*pred - 1

	if(negative_label != -1 \| positive_label != +1)
	Y = 2/(positive_label - negative_label)*Y - (negative_label + positive_label)/(positive_label - negative_label)

	pred_is_minus = (pred == -1)
	pred_is_plus = 1 - pred_is_minus
	y_is_minus = (Y == -1)
	y_is_plus = 1 - y_is_minus

	check_min_y_minus = sum(pred_is_minus*y_is_minus)
	check_min_y_plus = sum(pred_is_minus*y_is_plus)
	check_max_y_minus = sum(pred_is_plus*y_is_minus)
	check_max_y_plus = sum(pred_is_plus*y_is_plus)

	confusion_mat = matrix(0, rows=2, cols=2)
	confusion_mat[1,1] = check_min_y_minus
	confusion_mat[1,2] = check_min_y_plus
	confusion_mat[2,1] = check_max_y_minus
	confusion_mat[2,2] = check_max_y_plus

	write(confusion_mat, cmdLine_confusion, format="csv")
	}
	}