| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| |
| # Builtin function Implements binary-class SVM with squared slack variables |
| # |
| # INPUT PARAMETERS: |
| # --------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # --------------------------------------------------------------------------------------------- |
| # X Double --- matrix X of feature vectors |
| # Y Double --- matrix Y of class labels have to be a single column |
| # intercept Boolean False No Intercept ( If set to TRUE then a constant bias column is added to X) |
| # epsilon Double 0.001 Procedure terminates early if the reduction in objective function |
| # value is less than epsilon (tolerance) times the initial objective function value. |
| # lambda Double 1.0 Regularization parameter (lambda) for L2 regularization |
| # maxiterations Int 100 Maximum number of conjugate gradient iterations |
| # verbose Boolean False Set to true if one wants print statements updating on loss. |
| # column_id Int -1 The column Id used if one wants to add a ID to the print statement, |
| # Specificly usefull when L2SVM is used in MSVM. |
| # --------------------------------------------------------------------------------------------- |
| |
| |
| #Output(s) |
| # --------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # --------------------------------------------------------------------------------------------- |
| # model Double --- model matrix |
| |
| |
| m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE, |
| Double epsilon = 0.001, Double lambda = 1, Integer maxIterations = 100, |
| Boolean verbose = FALSE, Integer columnId = -1) |
| return(Matrix[Double] model) |
| { |
| #check input parameter assertions |
| if(nrow(X) < 2) |
| stop("L2SVM: Stopping due to invalid inputs: Not possible to learn a binary class classifier without at least 2 rows") |
| if(epsilon < 0) |
| stop("L2SVM: Stopping due to invalid argument: Tolerance (tol) must be non-negative") |
| if(lambda < 0) |
| stop("L2SVM: Stopping due to invalid argument: Regularization constant (reg) must be non-negative") |
| if(maxIterations < 1) |
| stop("L2SVM: Stopping due to invalid argument: Maximum iterations should be a positive integer") |
| if(ncol(Y) < 1) |
| stop("L2SVM: Stopping due to invalid multiple label columns, maybe use MSVM instead?") |
| |
| #check input lables and transform into -1/1 |
| check_min = min(Y) |
| check_max = max(Y) |
| |
| num_min = sum(Y == check_min) |
| num_max = sum(Y == check_max) |
| |
| # TODO make this a stop condition for l2svm instead of just printing. |
| if(num_min + num_max != nrow(Y)) |
| print("L2SVM: WARNING invalid number of labels in Y: "+num_min+" "+num_max) |
| |
| # Scale inputs to -1 for negative, and 1 for positive classification |
| if(check_min != -1 | check_max != +1) |
| Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min) |
| |
| # If column_id is -1 then we assume that the fundamental algorithm is MSVM, |
| # Therefore don't print message. |
| if(verbose & columnId == -1) |
| print('Running L2-SVM ') |
| |
| num_samples = nrow(X) |
| num_classes = ncol(Y) |
| |
| # Add Bias |
| num_rows_in_w = ncol(X) |
| if (intercept) { |
| ones = matrix(1, rows=num_samples, cols=1) |
| X = cbind(X, ones); |
| num_rows_in_w += 1 |
| } |
| |
| w = matrix(0, rows=num_rows_in_w, cols=1) |
| |
| g_old = t(X) %*% Y |
| s = g_old |
| |
| Xw = matrix(0, rows=nrow(X), cols=1) |
| |
| iter = 0 |
| continue = TRUE |
| while(continue & iter < maxIterations) { |
| # minimizing primal obj along direction s |
| step_sz = 0 |
| Xd = X %*% s |
| wd = lambda * sum(w * s) |
| dd = lambda * sum(s * s) |
| continue1 = TRUE |
| while(continue1){ |
| tmp_Xw = Xw + step_sz*Xd |
| out = 1 - Y * (tmp_Xw) |
| sv = (out > 0) |
| out = out * sv |
| g = wd + step_sz*dd - sum(out * Y * Xd) |
| h = dd + sum(Xd * sv * Xd) |
| step_sz = step_sz - g/h |
| continue1 = (g*g/h >= epsilon) |
| } |
| |
| #update weights |
| w = w + step_sz*s |
| Xw = Xw + step_sz*Xd |
| |
| out = 1 - Y * Xw |
| sv = (out > 0) |
| out = sv * out |
| obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) |
| g_new = t(X) %*% (out * Y) - lambda * w |
| |
| if(verbose) { |
| colstr = ifelse(columnId!=-1, ", Col:"+columnId + " ,", " ,") |
| print("Iter:" + toString(iter) + colstr + " Obj:" + obj) |
| } |
| |
| tmp = sum(s * g_old) |
| continue = (step_sz*tmp >= epsilon*obj & sum(s^2) != 0); |
| |
| #non-linear CG step |
| be = sum(g_new * g_new)/sum(g_old * g_old) |
| s = be * s + g_new |
| g_old = g_new |
| |
| iter = iter + 1 |
| } |
| model = w |
| } |