blob: d838e5b63bf0e6093b849a289021265f9baa5f5d [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B, Integer icpt)
return (Matrix[Double] loss) {
if (icpt > 0)
X = cbind(X, matrix(1, nrow(X), 1));
loss = as.matrix(sum((y - X%*%B)^2));
}
randColSet = function(Matrix[Double] X, Integer seed, Double sample) return (Matrix[Double] Xi) {
temp = rand(rows=ncol(X), cols=1, min = 0, max = 1, sparsity=1, seed=seed) <= sample
Xi = removeEmpty(target = X, margin = "cols", select = temp);
}
X = rand(rows=100, cols=100, sparsity=1.0, seed=1);
y = rand(rows=100, cols=1, sparsity=1.0, seed=1);
Rbeta = matrix(0, rows=525, cols=ncol(X)); #nrows = 5*5*3*7 = 525
Rloss = matrix(0, rows=525, cols=1);
k = 1;
for (i in 1:5)
{
#randomly select 15% columns in every iteration
Xi = randColSet(X, i, 0.15);
#TODO: Use generalized gridsearch builtin after lineage integration with list.
for (h1 in -4:0) { #reg - values:10^-4 to 10^0
for (h2 in 0:2) { #icpt - range: 0, 1, 2
for (h3 in -12:-6) { #tol -values: 10^-12 to 10^-6
reg = 10^h1;
icpt = h2;
tol = 10^h3;
beta = lm(X=Xi, y=y, icpt=icpt, reg=reg, tol=tol, maxi=0, verbose=FALSE);
Rbeta[k, 1:nrow(beta)] = t(beta);
Rloss[k,] = l2norm(Xi, y, beta, icpt);
k = k + 1;
}
}
}
}
while(FALSE) {}
leastLoss = rowIndexMin(t(Rloss));
bestModel = Rbeta[as.scalar(leastLoss),];
write(bestModel, $1, format="text");