src/test/scripts/functions/lineage/LineageReuseAlg2.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B, Integer icpt)
 return (Matrix[Double] loss) {
   if (icpt > 0)
     X = cbind(X, matrix(1, nrow(X), 1));
   loss = as.matrix(sum((y - X%*%B)^2));
 }

 randColSet = function(Matrix[Double] X, Integer seed, Double sample) return (Matrix[Double] Xi) {
   temp = rand(rows=ncol(X), cols=1, min = 0, max = 1, sparsity=1, seed=seed) <= sample
   Xi = removeEmpty(target = X, margin = "cols", select = temp);
 }

 X = rand(rows=100, cols=100, sparsity=1.0, seed=1);
 y = rand(rows=100, cols=1, sparsity=1.0, seed=1);

 Rbeta = matrix(0, rows=525, cols=ncol(X)); #nrows = 5*5*3*7 = 525
 Rloss = matrix(0, rows=525, cols=1);
 k = 1;
 for (i in 1:5)
 {
   #randomly select 15% columns in every iteration
   Xi = randColSet(X, i, 0.15);

   #TODO: Use generalized gridsearch builtin after lineage integration with list.
   for (h1 in -4:0) {       #reg - values:10^-4 to 10^0
     for (h2 in 0:2) {      #icpt - range: 0, 1, 2
       for (h3 in -12:-6) { #tol -values: 10^-12 to 10^-6
         reg = 10^h1;
         icpt = h2;
         tol = 10^h3;
         beta = lm(X=Xi, y=y, icpt=icpt, reg=reg, tol=tol, maxi=0, verbose=FALSE);
         Rbeta[k, 1:nrow(beta)] = t(beta);
         Rloss[k,] = l2norm(Xi, y, beta, icpt);
         k = k + 1;
       }
     }
   }
 }

 while(FALSE) {}
 leastLoss = rowIndexMin(t(Rloss));
 bestModel = Rbeta[as.scalar(leastLoss),];
 write(bestModel, $1, format="text");
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B, Integer icpt)
	return (Matrix[Double] loss) {
	if (icpt > 0)
	X = cbind(X, matrix(1, nrow(X), 1));
	loss = as.matrix(sum((y - X%*%B)^2));
	}

	randColSet = function(Matrix[Double] X, Integer seed, Double sample) return (Matrix[Double] Xi) {
	temp = rand(rows=ncol(X), cols=1, min = 0, max = 1, sparsity=1, seed=seed) <= sample
	Xi = removeEmpty(target = X, margin = "cols", select = temp);
	}

	X = rand(rows=100, cols=100, sparsity=1.0, seed=1);
	y = rand(rows=100, cols=1, sparsity=1.0, seed=1);

	Rbeta = matrix(0, rows=525, cols=ncol(X)); #nrows = 553*7 = 525
	Rloss = matrix(0, rows=525, cols=1);
	k = 1;
	for (i in 1:5)
	{
	#randomly select 15% columns in every iteration
	Xi = randColSet(X, i, 0.15);

	#TODO: Use generalized gridsearch builtin after lineage integration with list.
	for (h1 in -4:0) { #reg - values:10^-4 to 10^0
	for (h2 in 0:2) { #icpt - range: 0, 1, 2
	for (h3 in -12:-6) { #tol -values: 10^-12 to 10^-6
	reg = 10^h1;
	icpt = h2;
	tol = 10^h3;
	beta = lm(X=Xi, y=y, icpt=icpt, reg=reg, tol=tol, maxi=0, verbose=FALSE);
	Rbeta[k, 1:nrow(beta)] = t(beta);
	Rloss[k,] = l2norm(Xi, y, beta, icpt);
	k = k + 1;
	}
	}
	}
	}

	while(FALSE) {}
	leastLoss = rowIndexMin(t(Rloss));
	bestModel = Rbeta[as.scalar(leastLoss),];
	write(bestModel, $1, format="text");