blob: 8edae6ad5c86e635105dd3acb3a328371fdd9db2 [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# JUnit test class: dml.test.integration.applications.LinearLogReg.java
# command line invocation assuming $LLR_HOME is set to the home of the R script
# Rscript $LLR_HOME/LinearLogReg.R $LLR_HOME/in/ $LLR_HOME/expected/
args <- commandArgs(TRUE)
library("Matrix")
# Usage: /home/vikas/R-2.10.1/bin/R --vanilla --args Xfile X yfile y Cval 2 tol 0.01 maxiter 100 < linearLogReg.r
# Solves Linear Logistic Regression using Trust Region methods.
# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
options(warn=-1)
C = 2;
tol = 0.001
maxiter = 3
maxinneriter = 3
eta0 = 0.0001
eta1 = 0.25
eta2 = 0.75
sigma1 = 0.25
sigma2 = 0.5
sigma3 = 4.0
psi = 0.1
# read (training and test) data files -- should be in matrix market format. see data.mtx
X = readMM(paste(args[1], "X.mtx", sep=""));
Xt = readMM(paste(args[1], "Xt.mtx", sep=""));
N = nrow(X)
D = ncol(X)
Nt = nrow(Xt)
# read (training and test) labels
y = readMM(paste(args[1], "y.mtx", sep=""));
yt = readMM(paste(args[1], "yt.mtx", sep=""));
# initialize w
w = matrix(0,D,1)
o = X %*% w
logistic = 1.0/(1.0 + exp(-y*o))
# VS : change
obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
grad = w + C*t(X) %*% ((logistic - 1)*y)
logisticD = logistic*(1-logistic)
delta = sqrt(sum(grad*grad))
# number of iterations
iter = 0
# starting point for CG
zeros_D = matrix(0,D,1)
# VS: change
zeros_N = matrix(0,N,1)
# boolean for convergence check
converge = (delta < tol)
norm_r2 = sum(grad*grad)
gnorm = sqrt(norm_r2)
# VS: change
norm_grad = sqrt(norm_r2)
norm_grad_initial = norm_grad
while(!converge) {
norm_grad = sqrt(sum(grad*grad))
print("next iteration..")
print(paste("Iterations : ",iter, "Objective : ", obj[1,1], "Gradient Norm : ", norm_grad))
# SOLVE TRUST REGION SUB-PROBLEM
s = zeros_D
os = zeros_N
r = -grad
d = r
innerconverge = (sqrt(sum(r*r)) <= psi*norm_grad)
inneriter = 0;
while(!innerconverge) {
inneriter = inneriter + 1
norm_r2 = sum(r*r)
od = X %*% d
Hd = d + C*(t(X) %*% (logisticD*od))
alpha_deno = t(d) %*% Hd
alpha = norm_r2/alpha_deno
s = s + alpha[1,1]*d
os = os + alpha[1,1]*od
sts = t(s) %*% s
delta2 = delta*delta
if (sts[1,1] > delta2) {
# VS: change
print("cg reaches trust region boundary")
# VS: change
s = s - alpha[1,1]*d
os = os - alpha[1,1]*od
std = t(s) %*% d
dtd = t(d) %*% d
# VS:change
sts = t(s) %*% s
rad = sqrt(std*std + dtd*(delta2 - sts))
if(std[1,1]>=0) {
tau = (delta2 - sts)/(std + rad)
}
else {
tau = (rad - std)/dtd
}
s = s + tau[1,1]*d
os = os + tau[1,1]*od
r = r - tau[1,1]*Hd
break
}
r = r - alpha[1,1]*Hd
old_norm_r2 = norm_r2
norm_r2 = sum(r*r)
beta = norm_r2/old_norm_r2
d = r + beta*d
innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter) # innerconverge = (sqrt(norm_r2) <= psi*norm_grad)
}
print(paste("Inner CG Iteration = ", inneriter))
# END TRUST REGION SUB-PROBLEM
# compute rho, update w, obtain delta
gs = t(s) %*% grad
qk = -0.5*(gs - (t(s) %*% r))
wnew = w + s
# VS Change X %*% wnew removed
onew = o + os
# VS: change
logisticnew = 1.0/(1.0 + exp(-y*onew))
objnew = 0.5 * t(wnew) %*% wnew + C*sum(-log(logisticnew))
# VS: change
actred = (obj - objnew)
rho = actred/qk
print(paste("Actual :", actred[1,1], "Predicted :", qk[1,1]))
rho = rho[1,1]
snorm = sqrt(sum(s*s))
if(iter==0) {
delta = min(delta, snorm)
}
if (objnew[1,1] - obj[1,1] - gs[1,1] <= 0) {
alpha = sigma3;
}
else {
alpha = max(sigma1, -0.5*gs[1,1]/(objnew[1,1] - obj[1,1] - gs[1,1]))
}
if (rho > eta0) {
w = wnew
o = onew
grad = w + C*t(X) %*% ((logisticnew - 1)*y)
# VS: change
norm_grad = sqrt(sum(grad*grad))
logisticD = logisticnew*(1-logisticnew)
obj = objnew
}
if (rho < eta0)
{delta = min(max(alpha, sigma1)*snorm, sigma2*delta)}
else if (rho < eta1)
{delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta))}
else if (rho < eta2)
{delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta))}
else
{delta = max(delta, min(alpha*snorm, sigma3*delta))}
ot = Xt %*% w
correct = sum((yt*ot)>0)
iter = iter + 1
converge = (norm_grad < tol*norm_grad_initial) | (iter>maxiter)
print(paste("Delta :", delta))
print(paste("Accuracy=", correct*100/Nt))
print(paste("OuterIter=", iter))
print(paste("Converge=", converge))
}
writeMM(as(w,"CsparseMatrix"), paste(args[2],"w", sep=""), format = "text")