| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # Solves Linear Logistic Regression using Trust Region methods. |
| # Can be adapted for L2-SVMs and more general unconstrained optimization problems also |
| # setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650) |
| |
| # How to invoke this dml script LinearLogReg.dml? |
| # Assume LLR_HOME is set to the home of the dml script |
| # Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR |
| # Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt |
| # hadoop jar SystemDS.jar -f $LLR_HOME/LinearLogReg.dml -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w" |
| |
| C = 2 |
| tol = 0.001 |
| maxiter = 3 |
| maxinneriter = 3 |
| |
| eta0 = 0.0001 |
| eta1 = 0.25 |
| eta2 = 0.75 |
| sigma1 = 0.25 |
| sigma2 = 0.5 |
| sigma3 = 4.0 |
| psi = 0.1 |
| |
| # read (training and test) data files |
| X = read($1) |
| Xt = read($2) |
| N = nrow(X) |
| D = ncol(X) |
| Nt = nrow(Xt) |
| |
| # read (training and test) labels |
| y = read($3) |
| yt = read($4) |
| |
| #initialize w |
| w = Rand(rows=D, cols=1, min=0.0, max=0.0); |
| e = Rand(rows=1, cols=1, min=1.0, max=1.0); |
| o = X %*% w |
| logistic = 1.0/(1.0 + exp( -y * o)) |
| |
| obj = 0.5 * t(w) %*% w + C*sum(-log(logistic)) |
| grad = w + C*t(X) %*% ((logistic - 1)*y) |
| logisticD = logistic*(1-logistic) |
| delta = sqrt(sum(grad*grad)) |
| |
| # number of iterations |
| iter = 0 |
| |
| # starting point for CG |
| zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0); |
| # VS: change |
| zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0); |
| |
| # boolean for convergence check |
| |
| converge = (delta < tol) | (iter > maxiter) |
| norm_r2 = sum(grad*grad) |
| |
| # VS: change |
| norm_grad = sqrt(norm_r2) |
| norm_grad_initial = norm_grad |
| |
| alpha = t(w) %*% w |
| alpha2 = alpha |
| |
| while(!converge) { |
| |
| norm_grad = sqrt(sum(grad*grad)) |
| |
| print("-- Outer Iteration = " + iter) |
| objScalar = as.scalar(obj) |
| print(" Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad) |
| |
| # SOLVE TRUST REGION SUB-PROBLEM |
| s = zeros_D |
| os = zeros_N |
| r = -grad |
| d = r |
| inneriter = 0 |
| innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad) |
| while (!innerconverge) { |
| inneriter = inneriter + 1 |
| norm_r2 = sum(r*r) |
| od = X %*% d |
| Hd = d + C*(t(X) %*% (logisticD*od)) |
| alpha_deno = t(d) %*% Hd |
| alpha = norm_r2 / alpha_deno |
| |
| s = s + as.scalar(alpha) * d |
| os = os + as.scalar(alpha) * od |
| |
| sts = t(s) %*% s |
| delta2 = delta*delta |
| stsScalar = as.scalar(sts) |
| |
| shouldBreak = FALSE; # to mimic "break" in the following 'if' condition |
| if (stsScalar > delta2) { |
| print(" --- cg reaches trust region boundary") |
| s = s - as.scalar(alpha) * d |
| os = os - as.scalar(alpha) * od |
| std = t(s) %*% d |
| dtd = t(d) %*% d |
| sts = t(s) %*% s |
| rad = sqrt(std*std + dtd*(delta2 - sts)) |
| stdScalar = as.scalar(std) |
| if(stdScalar >= 0) { |
| tau = (delta2 - sts)/(std + rad) |
| } |
| else { |
| tau = (rad - std)/dtd |
| } |
| |
| s = s + as.scalar(tau) * d |
| os = os + as.scalar(tau) * od |
| r = r - as.scalar(tau) * Hd |
| |
| #break |
| shouldBreak = TRUE; |
| innerconverge = TRUE; |
| |
| } |
| |
| if (!shouldBreak) { |
| r = r - as.scalar(alpha) * Hd |
| old_norm_r2 = norm_r2 |
| norm_r2 = sum(r*r) |
| beta = norm_r2/old_norm_r2 |
| d = r + beta*d |
| innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter) |
| } |
| } |
| |
| print(" --- Inner CG Iteration = " + inneriter) |
| # END TRUST REGION SUB-PROBLEM |
| # compute rho, update w, obtain delta |
| gs = t(s) %*% grad |
| qk = -0.5*(gs - (t(s) %*% r)) |
| |
| wnew = w + s |
| onew = o + os |
| logisticnew = 1.0/(1.0 + exp(-y * onew )) |
| objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew)) |
| |
| actred = (obj - objnew) |
| actredScalar = as.scalar(actred) |
| rho = actred / qk |
| qkScalar = as.scalar(qk) |
| rhoScalar = as.scalar(rho); |
| snorm = sqrt(sum( s * s )) |
| print(" Actual = " + actredScalar) |
| print(" Predicted = " + qkScalar) |
| |
| if (iter==0) { |
| delta = min(delta, snorm) |
| } |
| alpha2 = objnew - obj - gs |
| alpha2Scalar = as.scalar(alpha2) |
| if (alpha2Scalar <= 0) { |
| alpha = sigma3*e |
| } |
| else { |
| ascalar = max(sigma1, -0.5*as.scalar(gs)/alpha2Scalar) |
| alpha = ascalar*e |
| } |
| |
| if (rhoScalar > eta0) { |
| |
| w = wnew |
| o = onew |
| grad = w + C*t(X) %*% ((logisticnew - 1) * y ) |
| norm_grad = sqrt(sum(grad*grad)) |
| logisticD = logisticnew * (1 - logisticnew) |
| obj = objnew |
| } |
| |
| alphaScalar = as.scalar(alpha) |
| if (rhoScalar < eta0){ |
| delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta ) |
| } |
| else { |
| if (rhoScalar < eta1){ |
| delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma2 * delta)) |
| } |
| else { |
| if (rhoScalar < eta2) { |
| delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta)) |
| } |
| else { |
| delta = max(delta, min( alphaScalar * snorm, sigma3 * delta)) |
| } |
| } |
| } |
| |
| |
| ot = Xt %*% w |
| ot2 = yt * ot |
| correct = sum(ot2 > 0) |
| accuracy = correct*100.0/Nt |
| iter = iter + 1 |
| converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter) |
| |
| print(" Delta = " + delta) |
| print(" Accuracy = " + accuracy) |
| print(" Correct = " + correct) |
| print(" OuterIter = " + iter) |
| print(" Converge = " + converge) |
| } |
| |
| write(w, $5, format="text"); |