src/test/scripts/functions/async/LineageReuseSpark6.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------
 randRegSample = function(Matrix[Double] lamdas, Double ratio)
 return (Matrix[Double] samples) {
   temp = rand(rows=nrow(lamdas), cols=1, min=0, max=1, seed=42) < ratio;
   samples = removeEmpty(target=lamdas, margin="rows", select=temp);
 }

 l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B)
 return (Double accuracy) {
   #loss = as.matrix(sum((y - X%*%B)^2));
   [yRaw, yPred] = l2svmPredict(X=X, W=B, verbose=FALSE);
   accuracy = sum((yPred - y) == 0) / nrow(y) * 100;
 }

 M = 10000;
 N = 200;
 sp = 1.0; #1.0
 no_bracket = 2; #5

 X = rand(rows=M, cols=N, sparsity=sp, seed=42);
 y = rand(rows=M, cols=1, min=0, max=2, seed=42);
 y = ceil(y);

 no_lamda = 25; #starting combintaions = 25 * 4 = 100 HPs
 stp = (0.1 - 0.0001)/no_lamda;
 HPlamdas = seq(0.0001, 0.1, stp);
 maxIter = 10; #starting interation count = 100 * 10 = 1k

 for (r in 1:no_bracket) {
   i = 1;
   svmModels = matrix(0, rows=no_lamda, cols=ncol(X)+2); #first col is accuracy
   mlrModels = matrix(0, rows=no_lamda, cols=ncol(X)+2); #first col is accuracy
   # Optimize for regularization parameters
   print("#lamda = "+no_lamda+", maxIterations = "+maxIter);
   for (l in 1:no_lamda)
   {
     #print("lamda = "+as.scalar(HPlamdas[i,1])+", maxIterations = "+maxIter);
     #Run L2svm with intercept true
     beta = l2svm(X=X, Y=y, intercept=TRUE, epsilon=1e-12,
       reg = as.scalar(HPlamdas[i,1]), maxIterations=maxIter, verbose=FALSE);
     svmModels[i,1] = l2norm(X, y, beta); #1st column
     svmModels[i,2:nrow(beta)+1] = t(beta);

     #Run L2svm with intercept false
     beta = l2svm(X=X, Y=y, intercept=FALSE, epsilon=1e-12,
       reg = as.scalar(HPlamdas[i,1]), maxIterations=maxIter, verbose=FALSE);
     svmModels[i,1] = l2norm(X, y, beta); #1st column
     svmModels[i,2:nrow(beta)+1] = t(beta);

     #Run multilogreg with intercept true
     beta = multiLogReg(X=X, Y=y, icpt=2, tol=1e-6, reg=as.scalar(HPlamdas[i,1]),
       maxi=maxIter, maxii=20, verbose=FALSE);
     [prob_mlr, Y_mlr, acc] = multiLogRegPredict(X=X, B=beta, Y=y, verbose=FALSE);
     mlrModels[i,1] = acc; #1st column
     mlrModels[i,2:nrow(beta)+1] = t(beta);

     #Run multilogreg with intercept false
     beta = multiLogReg(X=X, Y=y, icpt=1, tol=1e-6, reg=as.scalar(HPlamdas[i,1]),
       maxi=maxIter, maxii=20, verbose=FALSE);
     [prob_mlr, Y_mlr, acc] = multiLogRegPredict(X=X, B=beta, Y=y, verbose=FALSE);
     mlrModels[i,1] = acc; #1st column
     mlrModels[i,2:nrow(beta)+1] = t(beta);

     i = i + 1;
   }
   #Sort the models based on accuracy
   svm_order = order(target=svmModels, by=1);
   bestAccSvm = svm_order[1,1];
   print(toString(bestAccSvm));
   mlr_order = order(target=mlrModels, by=1);
   bestAccMlr = mlr_order[1,1];
   print(toString(bestAccMlr));

   #double the iteration count and half the HPs
   maxIter = maxIter * 2;
   HPlamdas = randRegSample(HPlamdas, 0.5);
   #TODO: select the models with highest accruacies
   no_lamda = nrow(HPlamdas);
 }
 R = sum(bestAccSvm) + sum(bestAccMlr);
 write(R, $1, format="text");
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------
	randRegSample = function(Matrix[Double] lamdas, Double ratio)
	return (Matrix[Double] samples) {
	temp = rand(rows=nrow(lamdas), cols=1, min=0, max=1, seed=42) < ratio;
	samples = removeEmpty(target=lamdas, margin="rows", select=temp);
	}

	l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B)
	return (Double accuracy) {
	#loss = as.matrix(sum((y - X%*%B)^2));
	[yRaw, yPred] = l2svmPredict(X=X, W=B, verbose=FALSE);
	accuracy = sum((yPred - y) == 0) / nrow(y) * 100;
	}

	M = 10000;
	N = 200;
	sp = 1.0; #1.0
	no_bracket = 2; #5

	X = rand(rows=M, cols=N, sparsity=sp, seed=42);
	y = rand(rows=M, cols=1, min=0, max=2, seed=42);
	y = ceil(y);

	no_lamda = 25; #starting combintaions = 25 * 4 = 100 HPs
	stp = (0.1 - 0.0001)/no_lamda;
	HPlamdas = seq(0.0001, 0.1, stp);
	maxIter = 10; #starting interation count = 100 * 10 = 1k

	for (r in 1:no_bracket) {
	i = 1;
	svmModels = matrix(0, rows=no_lamda, cols=ncol(X)+2); #first col is accuracy
	mlrModels = matrix(0, rows=no_lamda, cols=ncol(X)+2); #first col is accuracy
	# Optimize for regularization parameters
	print("#lamda = "+no_lamda+", maxIterations = "+maxIter);
	for (l in 1:no_lamda)
	{
	#print("lamda = "+as.scalar(HPlamdas[i,1])+", maxIterations = "+maxIter);
	#Run L2svm with intercept true
	beta = l2svm(X=X, Y=y, intercept=TRUE, epsilon=1e-12,
	reg = as.scalar(HPlamdas[i,1]), maxIterations=maxIter, verbose=FALSE);
	svmModels[i,1] = l2norm(X, y, beta); #1st column
	svmModels[i,2:nrow(beta)+1] = t(beta);

	#Run L2svm with intercept false
	beta = l2svm(X=X, Y=y, intercept=FALSE, epsilon=1e-12,
	reg = as.scalar(HPlamdas[i,1]), maxIterations=maxIter, verbose=FALSE);
	svmModels[i,1] = l2norm(X, y, beta); #1st column
	svmModels[i,2:nrow(beta)+1] = t(beta);

	#Run multilogreg with intercept true
	beta = multiLogReg(X=X, Y=y, icpt=2, tol=1e-6, reg=as.scalar(HPlamdas[i,1]),
	maxi=maxIter, maxii=20, verbose=FALSE);
	[prob_mlr, Y_mlr, acc] = multiLogRegPredict(X=X, B=beta, Y=y, verbose=FALSE);
	mlrModels[i,1] = acc; #1st column
	mlrModels[i,2:nrow(beta)+1] = t(beta);

	#Run multilogreg with intercept false
	beta = multiLogReg(X=X, Y=y, icpt=1, tol=1e-6, reg=as.scalar(HPlamdas[i,1]),
	maxi=maxIter, maxii=20, verbose=FALSE);
	[prob_mlr, Y_mlr, acc] = multiLogRegPredict(X=X, B=beta, Y=y, verbose=FALSE);
	mlrModels[i,1] = acc; #1st column
	mlrModels[i,2:nrow(beta)+1] = t(beta);

	i = i + 1;
	}
	#Sort the models based on accuracy
	svm_order = order(target=svmModels, by=1);
	bestAccSvm = svm_order[1,1];
	print(toString(bestAccSvm));
	mlr_order = order(target=mlrModels, by=1);
	bestAccMlr = mlr_order[1,1];
	print(toString(bestAccMlr));

	#double the iteration count and half the HPs
	maxIter = maxIter * 2;
	HPlamdas = randRegSample(HPlamdas, 0.5);
	#TODO: select the models with highest accruacies
	no_lamda = nrow(HPlamdas);
	}
	R = sum(bestAccSvm) + sum(bestAccMlr);
	write(R, $1, format="text");