src/test/scripts/applications/linearLogReg/LinearLogReg.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 # Solves Linear Logistic Regression using Trust Region methods.
 # Can be adapted for L2-SVMs and more general unconstrained optimization problems also
 # setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)

 # How to invoke this dml script LinearLogReg.dml?
 # Assume LLR_HOME is set to the home of the dml script
 # Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
 # Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt
 # hadoop jar SystemDS.jar -f $LLR_HOME/LinearLogReg.dml -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w"

 C = 2
 tol = 0.001
 maxiter = 3
 maxinneriter = 3

 eta0 = 0.0001
 eta1 = 0.25
 eta2 = 0.75
 sigma1 = 0.25
 sigma2 = 0.5
 sigma3 = 4.0
 psi = 0.1

 # read (training and test) data files
 X = read($1)
 Xt = read($2)
 N = nrow(X)
 D = ncol(X)
 Nt = nrow(Xt)

 # read (training and test) labels
 y = read($3)
 yt = read($4)

 #initialize w
 w = Rand(rows=D, cols=1, min=0.0, max=0.0);
 e = Rand(rows=1, cols=1, min=1.0, max=1.0);
 o = X %*% w
 logistic = 1.0/(1.0 + exp( -y * o))

 obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
 grad = w + C*t(X) %*% ((logistic - 1)*y)
 logisticD = logistic*(1-logistic)
 delta = sqrt(sum(grad*grad))

 # number of iterations
 iter = 0

 # starting point for CG
 zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0);
 # VS: change
 zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0);

 # boolean for convergence check

 converge = (delta < tol) | (iter > maxiter)
 norm_r2 = sum(grad*grad)

 # VS: change
 norm_grad = sqrt(norm_r2)
 norm_grad_initial = norm_grad

 alpha = t(w) %*% w
 alpha2 = alpha

 while(!converge) {

 	norm_grad = sqrt(sum(grad*grad))

 	print("-- Outer Iteration = " + iter)
 	objScalar = as.scalar(obj)
 	print("     Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)

 	# SOLVE TRUST REGION SUB-PROBLEM
 	s = zeros_D
 	os = zeros_N
 	r = -grad
 	d = r
 	inneriter = 0
 	innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad)
 	while (!innerconverge) {
 	    inneriter = inneriter + 1
 		norm_r2 = sum(r*r)
 		od = X %*% d
 		Hd = d + C*(t(X) %*% (logisticD*od))
 		alpha_deno = t(d) %*% Hd
 		alpha = norm_r2 / alpha_deno

 		s = s + as.scalar(alpha) * d
 		os = os + as.scalar(alpha) * od

 		sts = t(s) %*% s
 		delta2 = delta*delta
 		stsScalar = as.scalar(sts)

 		shouldBreak = FALSE;  # to mimic "break" in the following 'if' condition
 		if (stsScalar > delta2) {
 		   	print("      --- cg reaches trust region boundary")
 			s = s - as.scalar(alpha) * d
 			os = os - as.scalar(alpha) * od
 			std = t(s) %*% d
 			dtd = t(d) %*% d
 			sts = t(s) %*% s
 			rad = sqrt(std*std + dtd*(delta2 - sts))
 			stdScalar = as.scalar(std)
 			if(stdScalar >= 0) {
 				tau = (delta2 - sts)/(std + rad)
 			}
 			else {
 				tau = (rad - std)/dtd
 			}

 			s = s + as.scalar(tau) * d
 			os = os + as.scalar(tau) * od
 			r = r - as.scalar(tau) * Hd

 			#break
 			shouldBreak = TRUE;
 			innerconverge = TRUE;

 		}

 		if (!shouldBreak) {
 			r = r - as.scalar(alpha) * Hd
 			old_norm_r2 = norm_r2
 			norm_r2 = sum(r*r)
 			beta = norm_r2/old_norm_r2
 			d = r + beta*d
 			innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
 		}
 	}

 	print("      --- Inner CG Iteration =  " + inneriter)
 	# END TRUST REGION SUB-PROBLEM
 	# compute rho, update w, obtain delta
 	gs = t(s) %*% grad
 	qk = -0.5*(gs - (t(s) %*% r))

 	wnew = w + s
 	onew = o + os
 	logisticnew = 1.0/(1.0 + exp(-y * onew ))
 	objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew))

 	actred = (obj - objnew)
 	actredScalar = as.scalar(actred)
 	rho = actred / qk
 	qkScalar = as.scalar(qk)
 	rhoScalar = as.scalar(rho);
 	snorm = sqrt(sum( s * s ))
 	print("     Actual    = " + actredScalar)
 	print("     Predicted = " + qkScalar)

 	if (iter==0) {
 	   delta = min(delta, snorm)
 	}
 	alpha2 = objnew - obj - gs
 	alpha2Scalar = as.scalar(alpha2)
 	if (alpha2Scalar <= 0) {
 	   alpha = sigma3*e
 	}
 	else {
 	   ascalar = max(sigma1, -0.5*as.scalar(gs)/alpha2Scalar)
 	   alpha = ascalar*e
 	}

 	if (rhoScalar > eta0) {

 		w = wnew
 		o = onew
 		grad = w + C*t(X) %*% ((logisticnew - 1) * y )
 		norm_grad = sqrt(sum(grad*grad))
 		logisticD = logisticnew * (1 - logisticnew)
 		obj = objnew
 	}

 	alphaScalar = as.scalar(alpha)
 	if (rhoScalar < eta0){
 		delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
 	}
 	else {
 		if (rhoScalar < eta1){
 			delta = max(sigma1 * delta, min( alphaScalar  * snorm, sigma2 * delta))
 		}
 		else {
 			if (rhoScalar < eta2) {
 				delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
 			}
 			else {
 				delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
 			}
 		}
 	}


 	ot = Xt %*% w
 	ot2 = yt * ot
 	correct = sum(ot2 > 0)
 	accuracy = correct*100.0/Nt
 	iter = iter + 1
 	converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)

 	print("     Delta =  " + delta)
 	print("     Accuracy =  " +  accuracy)
 	print("     Correct =  " + correct)
 	print("     OuterIter =  " + iter)
 	print("     Converge =  " + converge)
 }

 write(w, $5, format="text");
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	# Solves Linear Logistic Regression using Trust Region methods.
	# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
	# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)

	# How to invoke this dml script LinearLogReg.dml?
	# Assume LLR_HOME is set to the home of the dml script
	# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
	# Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt
	# hadoop jar SystemDS.jar -f $LLR_HOME/LinearLogReg.dml -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w"

	C = 2
	tol = 0.001
	maxiter = 3
	maxinneriter = 3

	eta0 = 0.0001
	eta1 = 0.25
	eta2 = 0.75
	sigma1 = 0.25
	sigma2 = 0.5
	sigma3 = 4.0
	psi = 0.1

	# read (training and test) data files
	X = read($1)
	Xt = read($2)
	N = nrow(X)
	D = ncol(X)
	Nt = nrow(Xt)

	# read (training and test) labels
	y = read($3)
	yt = read($4)

	#initialize w
	w = Rand(rows=D, cols=1, min=0.0, max=0.0);
	e = Rand(rows=1, cols=1, min=1.0, max=1.0);
	o = X %*% w
	logistic = 1.0/(1.0 + exp( -y * o))

	obj = 0.5 * t(w) %% w + Csum(-log(logistic))
	grad = w + Ct(X) %% ((logistic - 1)*y)
	logisticD = logistic*(1-logistic)
	delta = sqrt(sum(grad*grad))

	# number of iterations
	iter = 0

	# starting point for CG
	zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0);
	# VS: change
	zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0);

	# boolean for convergence check

	converge = (delta < tol) \| (iter > maxiter)
	norm_r2 = sum(grad*grad)

	# VS: change
	norm_grad = sqrt(norm_r2)
	norm_grad_initial = norm_grad

	alpha = t(w) %*% w
	alpha2 = alpha

	while(!converge) {

	norm_grad = sqrt(sum(grad*grad))

	print("-- Outer Iteration = " + iter)
	objScalar = as.scalar(obj)
	print(" Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)

	# SOLVE TRUST REGION SUB-PROBLEM
	s = zeros_D
	os = zeros_N
	r = -grad
	d = r
	inneriter = 0
	innerconverge = ( sqrt(sum(rr)) <= psi norm_grad)
	while (!innerconverge) {
	inneriter = inneriter + 1
	norm_r2 = sum(r*r)
	od = X %*% d
	Hd = d + C(t(X) %% (logisticD*od))
	alpha_deno = t(d) %*% Hd
	alpha = norm_r2 / alpha_deno

	s = s + as.scalar(alpha) * d
	os = os + as.scalar(alpha) * od

	sts = t(s) %*% s
	delta2 = delta*delta
	stsScalar = as.scalar(sts)

	shouldBreak = FALSE; # to mimic "break" in the following 'if' condition
	if (stsScalar > delta2) {
	print(" --- cg reaches trust region boundary")
	s = s - as.scalar(alpha) * d
	os = os - as.scalar(alpha) * od
	std = t(s) %*% d
	dtd = t(d) %*% d
	sts = t(s) %*% s
	rad = sqrt(stdstd + dtd(delta2 - sts))
	stdScalar = as.scalar(std)
	if(stdScalar >= 0) {
	tau = (delta2 - sts)/(std + rad)
	}
	else {
	tau = (rad - std)/dtd
	}

	s = s + as.scalar(tau) * d
	os = os + as.scalar(tau) * od
	r = r - as.scalar(tau) * Hd

	#break
	shouldBreak = TRUE;
	innerconverge = TRUE;

	}

	if (!shouldBreak) {
	r = r - as.scalar(alpha) * Hd
	old_norm_r2 = norm_r2
	norm_r2 = sum(r*r)
	beta = norm_r2/old_norm_r2
	d = r + beta*d
	innerconverge = (sqrt(norm_r2) <= psi * norm_grad) \| (inneriter > maxinneriter)
	}
	}

	print(" --- Inner CG Iteration = " + inneriter)
	# END TRUST REGION SUB-PROBLEM
	# compute rho, update w, obtain delta
	gs = t(s) %*% grad
	qk = -0.5(gs - (t(s) %% r))

	wnew = w + s
	onew = o + os
	logisticnew = 1.0/(1.0 + exp(-y * onew ))
	objnew = 0.5 * t(wnew) %% wnew + C sum(-log(logisticnew))

	actred = (obj - objnew)
	actredScalar = as.scalar(actred)
	rho = actred / qk
	qkScalar = as.scalar(qk)
	rhoScalar = as.scalar(rho);
	snorm = sqrt(sum( s * s ))
	print(" Actual = " + actredScalar)
	print(" Predicted = " + qkScalar)

	if (iter==0) {
	delta = min(delta, snorm)
	}
	alpha2 = objnew - obj - gs
	alpha2Scalar = as.scalar(alpha2)
	if (alpha2Scalar <= 0) {
	alpha = sigma3*e
	}
	else {
	ascalar = max(sigma1, -0.5*as.scalar(gs)/alpha2Scalar)
	alpha = ascalar*e
	}

	if (rhoScalar > eta0) {

	w = wnew
	o = onew
	grad = w + Ct(X) %% ((logisticnew - 1) * y )
	norm_grad = sqrt(sum(grad*grad))
	logisticD = logisticnew * (1 - logisticnew)
	obj = objnew
	}

	alphaScalar = as.scalar(alpha)
	if (rhoScalar < eta0){
	delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
	}
	else {
	if (rhoScalar < eta1){
	delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma2 * delta))
	}
	else {
	if (rhoScalar < eta2) {
	delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
	}
	else {
	delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
	}
	}
	}


	ot = Xt %*% w
	ot2 = yt * ot
	correct = sum(ot2 > 0)
	accuracy = correct*100.0/Nt
	iter = iter + 1
	converge = (norm_grad < (tol * norm_grad_initial)) \| (iter > maxiter)

	print(" Delta = " + delta)
	print(" Accuracy = " + accuracy)
	print(" Correct = " + correct)
	print(" OuterIter = " + iter)
	print(" Converge = " + converge)
	}

	write(w, $5, format="text");