blob: a8e6166ea97f1eba8f16d2dbc5773791a5319c4d [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Implements binary-class SVM with squared slack variables
# Example Usage:
# Assume L2SVM_HOME is set to the home of the dml script
# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
# Assume epsilon = 0.001, lambda = 1, maxiterations = 100
# ---------------------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------------------
# X String --- Location to read the matrix X of feature vectors
# Y String --- Location to read response matrix Y
# icpt Int 0 Intercept presence
# 0 = no intercept
# 1 = add intercept;
# tol Double 0.001 Tolerance (epsilon);
# reg Double 1.0 Regularization parameter (lambda) for L2 regularization
# maxiter Int 100 Maximum number of conjugate gradient iterations
# model String --- Location to write model
# fmt String "text" The output format of the output, such as "text" or "csv"
# Log String --- [OPTIONAL] Location to write the log file
# ---------------------------------------------------------------------------------------------
# hadoop jar SystemML.jar -f $L2SVM_HOME/l2-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/Y \
# icpt=0 tol=0.001 reg=1 maxiter=100 model=$OUPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
# Note about inputs:
# Assumes that labels (entries in Y) are set to either -1 or +1 or non-negative integers
fmt = ifdef($fmt, "text")
intercept = ifdef($icpt, 0)
epsilon = ifdef($tol, 0.001)
lambda = ifdef($reg, 1.0)
maxiterations = ifdef($maxiter, 100)
X = read($X)
Y = read($Y)
#check input parameter assertions
if(nrow(X) < 2)
stop("Stopping due to invalid inputs: Not possible to learn a binary class classifier without at least 2 rows")
if(intercept != 0 & intercept != 1)
stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1")
if(epsilon < 0)
stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative")
if(lambda < 0)
stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
if(maxiterations < 1)
stop("Stopping due to invalid argument: Maximum iterations should be a positive integer")
#check input lables and transform into -1/1
check_min = min(Y)
check_max = max(Y)
num_min = sum(Y == check_min)
num_max = sum(Y == check_max)
if(check_min == check_max)
stop("Stopping due to invalid inputs: Y seems to contain exactly one label")
if(num_min + num_max != nrow(Y))
stop("Stopping due to invalid inputs: Y seems to contain more than 2 labels")
if(check_min != -1 | check_max != 1)
Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
positive_label = check_max
negative_label = check_min
num_samples = nrow(X)
dimensions = ncol(X)
num_rows_in_w = dimensions
if (intercept == 1) {
ones = matrix(1, rows=num_samples, cols=1)
X = cbind(X, ones);
num_rows_in_w += 1
w = matrix(0, num_rows_in_w, 1)
Xw = matrix(0, rows=nrow(X), cols=1)
g_old = t(X) %*% Y
s = g_old
debug_str = "# Iter, Obj"
iter = 0
continue = TRUE
while(continue & iter < maxiterations) {
# minimizing primal obj along direction s
step_sz = 0
Xd = X %*% s
wd = lambda * sum(w * s)
dd = lambda * sum(s * s)
continue1 = TRUE
while(continue1) {
tmp_Xw = Xw + step_sz*Xd
out = 1 - Y * tmp_Xw
sv = out > 0
out = out * sv
g = wd + step_sz*dd - sum(out * Y * Xd)
h = dd + sum(Xd * sv * Xd)
step_sz = step_sz - g/h
continue1 = (g*g/h >= 0.0000000001);
#update weights
w += step_sz * s
Xw += step_sz * Xd
out = 1 - Y * Xw
sv = out > 0
out = sv * out
obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
g_new = t(X) %*% (out * Y) - lambda * w
print("ITER " + iter + ": OBJ=" + obj)
debug_str = append(debug_str, iter + "," + obj)
tmp = sum(s * g_old)
#non-linear CG step
be = sum(g_new * g_new)/sum(g_old * g_old)
s = be * s + g_new
g_old = g_new
continue = (step_sz*tmp >= epsilon*obj & sum(s^2) != 0);
iter = iter + 1
extra_model_params = matrix(0, 4, 1)
extra_model_params[1,1] = positive_label
extra_model_params[2,1] = negative_label
extra_model_params[3,1] = intercept
extra_model_params[4,1] = dimensions
w = rbind(w, extra_model_params)
write(w, $model, format=fmt)
logFile = $Log
if(logFile != " ")
write(debug_str, logFile)