blob: f411fb9198d64306997a2f67e7ef5a41473d3c9a [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# Builtin function Implements binary-class SVM with squared slack variables
#
# INPUT PARAMETERS:
# ---------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# ---------------------------------------------------------------------------------------------
# X Double --- matrix X of feature vectors
# Y Double --- matrix Y of class labels have to be a single column
# intercept Boolean False No Intercept ( If set to TRUE then a constant bias column is added to X)
# epsilon Double 0.001 Procedure terminates early if the reduction in objective function
# value is less than epsilon (tolerance) times the initial objective function value.
# lambda Double 1.0 Regularization parameter (lambda) for L2 regularization
# maxiterations Int 100 Maximum number of conjugate gradient iterations
# verbose Boolean False Set to true if one wants print statements updating on loss.
# column_id Int -1 The column Id used if one wants to add a ID to the print statement,
# Specificly usefull when L2SVM is used in MSVM.
# ---------------------------------------------------------------------------------------------
#Output(s)
# ---------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# ---------------------------------------------------------------------------------------------
# model Double --- model matrix
m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE,
Double epsilon = 0.001, Double lambda = 1, Integer maxIterations = 100,
Boolean verbose = FALSE, Integer columnId = -1)
return(Matrix[Double] model)
{
#check input parameter assertions
if(nrow(X) < 2)
stop("L2SVM: Stopping due to invalid inputs: Not possible to learn a binary class classifier without at least 2 rows")
if(epsilon < 0)
stop("L2SVM: Stopping due to invalid argument: Tolerance (tol) must be non-negative")
if(lambda < 0)
stop("L2SVM: Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
if(maxIterations < 1)
stop("L2SVM: Stopping due to invalid argument: Maximum iterations should be a positive integer")
if(ncol(Y) < 1)
stop("L2SVM: Stopping due to invalid multiple label columns, maybe use MSVM instead?")
#check input lables and transform into -1/1
check_min = min(Y)
check_max = max(Y)
num_min = sum(Y == check_min)
num_max = sum(Y == check_max)
# TODO make this a stop condition for l2svm instead of just printing.
if(num_min + num_max != nrow(Y))
print("L2SVM: WARNING invalid number of labels in Y: "+num_min+" "+num_max)
# Scale inputs to -1 for negative, and 1 for positive classification
if(check_min != -1 | check_max != +1)
Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
# If column_id is -1 then we assume that the fundamental algorithm is MSVM,
# Therefore don't print message.
if(verbose & columnId == -1)
print('Running L2-SVM ')
num_samples = nrow(X)
num_classes = ncol(Y)
# Add Bias
num_rows_in_w = ncol(X)
if (intercept) {
ones = matrix(1, rows=num_samples, cols=1)
X = cbind(X, ones);
num_rows_in_w += 1
}
w = matrix(0, rows=num_rows_in_w, cols=1)
g_old = t(X) %*% Y
s = g_old
Xw = matrix(0, rows=nrow(X), cols=1)
iter = 0
continue = TRUE
while(continue & iter < maxIterations) {
# minimizing primal obj along direction s
step_sz = 0
Xd = X %*% s
wd = lambda * sum(w * s)
dd = lambda * sum(s * s)
continue1 = TRUE
while(continue1){
tmp_Xw = Xw + step_sz*Xd
out = 1 - Y * (tmp_Xw)
sv = (out > 0)
out = out * sv
g = wd + step_sz*dd - sum(out * Y * Xd)
h = dd + sum(Xd * sv * Xd)
step_sz = step_sz - g/h
continue1 = (g*g/h >= epsilon)
}
#update weights
w = w + step_sz*s
Xw = Xw + step_sz*Xd
out = 1 - Y * Xw
sv = (out > 0)
out = sv * out
obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
g_new = t(X) %*% (out * Y) - lambda * w
if(verbose) {
colstr = ifelse(columnId!=-1, ", Col:"+columnId + " ,", " ,")
print("Iter:" + toString(iter) + colstr + " Obj:" + obj)
}
tmp = sum(s * g_old)
continue = (step_sz*tmp >= epsilon*obj & sum(s^2) != 0);
#non-linear CG step
be = sum(g_new * g_new)/sum(g_old * g_old)
s = be * s + g_new
g_old = g_new
iter = iter + 1
}
model = w
}