blob: 3b7c89d2511f08ec5121906b2ddcd3f5722723c4 [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# Solves Linear Logistic Regression using Trust Region methods.
# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
# How to invoke this dml script LinearLogReg.dml?
# Assume LLR_HOME is set to the home of the dml script
# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
# Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt
# hadoop jar SystemDS.jar -f $LLR_HOME/LinearLogReg.dml -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w"
C = 2
tol = 0.001
maxiter = 3
maxinneriter = 3
eta0 = 0.0001
eta1 = 0.25
eta2 = 0.75
sigma1 = 0.25
sigma2 = 0.5
sigma3 = 4.0
psi = 0.1
# read (training and test) data files
X = read($1)
Xt = read($2)
N = nrow(X)
D = ncol(X)
Nt = nrow(Xt)
# read (training and test) labels
y = read($3)
yt = read($4)
#initialize w
w = Rand(rows=D, cols=1, min=0.0, max=0.0);
e = Rand(rows=1, cols=1, min=1.0, max=1.0);
o = X %*% w
logistic = 1.0/(1.0 + exp( -y * o))
obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
grad = w + C*t(X) %*% ((logistic - 1)*y)
logisticD = logistic*(1-logistic)
delta = sqrt(sum(grad*grad))
# number of iterations
iter = 0
# starting point for CG
zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0);
# VS: change
zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0);
# boolean for convergence check
converge = (delta < tol) | (iter > maxiter)
norm_r2 = sum(grad*grad)
# VS: change
norm_grad = sqrt(norm_r2)
norm_grad_initial = norm_grad
alpha = t(w) %*% w
alpha2 = alpha
while(!converge) {
norm_grad = sqrt(sum(grad*grad))
print("-- Outer Iteration = " + iter)
objScalar = as.scalar(obj)
print(" Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)
# SOLVE TRUST REGION SUB-PROBLEM
s = zeros_D
os = zeros_N
r = -grad
d = r
inneriter = 0
innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad)
while (!innerconverge) {
inneriter = inneriter + 1
norm_r2 = sum(r*r)
od = X %*% d
Hd = d + C*(t(X) %*% (logisticD*od))
alpha_deno = t(d) %*% Hd
alpha = norm_r2 / alpha_deno
s = s + as.scalar(alpha) * d
os = os + as.scalar(alpha) * od
sts = t(s) %*% s
delta2 = delta*delta
stsScalar = as.scalar(sts)
shouldBreak = FALSE; # to mimic "break" in the following 'if' condition
if (stsScalar > delta2) {
print(" --- cg reaches trust region boundary")
s = s - as.scalar(alpha) * d
os = os - as.scalar(alpha) * od
std = t(s) %*% d
dtd = t(d) %*% d
sts = t(s) %*% s
rad = sqrt(std*std + dtd*(delta2 - sts))
stdScalar = as.scalar(std)
if(stdScalar >= 0) {
tau = (delta2 - sts)/(std + rad)
}
else {
tau = (rad - std)/dtd
}
s = s + as.scalar(tau) * d
os = os + as.scalar(tau) * od
r = r - as.scalar(tau) * Hd
#break
shouldBreak = TRUE;
innerconverge = TRUE;
}
if (!shouldBreak) {
r = r - as.scalar(alpha) * Hd
old_norm_r2 = norm_r2
norm_r2 = sum(r*r)
beta = norm_r2/old_norm_r2
d = r + beta*d
innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
}
}
print(" --- Inner CG Iteration = " + inneriter)
# END TRUST REGION SUB-PROBLEM
# compute rho, update w, obtain delta
gs = t(s) %*% grad
qk = -0.5*(gs - (t(s) %*% r))
wnew = w + s
onew = o + os
logisticnew = 1.0/(1.0 + exp(-y * onew ))
objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew))
actred = (obj - objnew)
actredScalar = as.scalar(actred)
rho = actred / qk
qkScalar = as.scalar(qk)
rhoScalar = as.scalar(rho);
snorm = sqrt(sum( s * s ))
print(" Actual = " + actredScalar)
print(" Predicted = " + qkScalar)
if (iter==0) {
delta = min(delta, snorm)
}
alpha2 = objnew - obj - gs
alpha2Scalar = as.scalar(alpha2)
if (alpha2Scalar <= 0) {
alpha = sigma3*e
}
else {
ascalar = max(sigma1, -0.5*as.scalar(gs)/alpha2Scalar)
alpha = ascalar*e
}
if (rhoScalar > eta0) {
w = wnew
o = onew
grad = w + C*t(X) %*% ((logisticnew - 1) * y )
norm_grad = sqrt(sum(grad*grad))
logisticD = logisticnew * (1 - logisticnew)
obj = objnew
}
alphaScalar = as.scalar(alpha)
if (rhoScalar < eta0){
delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
}
else {
if (rhoScalar < eta1){
delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma2 * delta))
}
else {
if (rhoScalar < eta2) {
delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
}
else {
delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
}
}
}
ot = Xt %*% w
ot2 = yt * ot
correct = sum(ot2 > 0)
accuracy = correct*100.0/Nt
iter = iter + 1
converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)
print(" Delta = " + delta)
print(" Accuracy = " + accuracy)
print(" Correct = " + correct)
print(" OuterIter = " + iter)
print(" Converge = " + converge)
}
write(w, $5, format="text");