blob: 1c0568d6cca06f81cbe320b5a2c97bf6e3c13fa7 [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
#-------------------------------------------------------------------------------
# X Input feature matrix
# y Input label vector (or matrix)
# train Name ft of the train function to call via ft(trainArgs)
# predict Name fp of the loss function to call via fp((predictArgs,B))
# numB Maximum number of parameters in model B (pass the maximum
# because the size of B may vary with parameters like icpt
# params List of varied hyper-parameter names
# paramValues List of matrices providing the parameter values as
# columnvectors for position-aligned hyper-parameters in 'params'
# trainArgs named List of arguments to pass to the 'train' function, where
# gridSearch replaces enumerated hyper-parameter by name, if
# not provided or an empty list, the lm parameters are used
# predictArgs List of arguments to pass to the 'predict' function, where
# gridSearch appends the trained models at the end, if
# not provided or an empty list, list(X, y) is used instead
# cv flag enabling k-fold cross validation, otherwise training loss
# cvk if cv=TRUE, specifies the the number of folds, otherwise ignored
# verbose flag for verbose debug output
#-------------------------------------------------------------------------------
# B the trained model with minimal loss (by the 'predict' function)
# opt one-row frame w/ optimal hyperparameters (by 'params' position)
#-------------------------------------------------------------------------------
m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String predict,
Integer numB=ncol(X), List[String] params, List[Unknown] paramValues,
List[Unknown] trainArgs = list(), List[Unknown] predictArgs = list(),
Boolean cv = FALSE, Integer cvk = 5, Boolean verbose = TRUE)
return (Matrix[Double] B, Frame[Unknown] opt)
{
# Step 0) handling default arguments, which require access to passed data
if( length(trainArgs) == 0 )
trainArgs = list(X=X, y=y, icpt=0, reg=-1, tol=-1, maxi=-1, verbose=FALSE);
if( length(predictArgs) == 0 )
predictArgs = list(X, y);
if( cv & cvk <= 1 ) {
print("gridSearch: called with cv=TRUE but cvk="+cvk+", set to default cvk=5.")
cvk = 5;
}
# Step 1) preparation of parameters, lengths, and values in convenient form
numParams = length(params);
paramLens = matrix(0, numParams, 1);
for( j in 1:numParams ) {
vect = as.matrix(paramValues[j,1]);
paramLens[j,1] = nrow(vect);
}
paramVals = matrix(0, numParams, max(paramLens));
for( j in 1:numParams ) {
vect = as.matrix(paramValues[j,1]);
paramVals[j,1:nrow(vect)] = t(vect);
}
cumLens = rev(cumprod(rev(paramLens))/rev(paramLens));
numConfigs = prod(paramLens);
# Step 2) materialize hyper-parameter combinations
# (simplify debugging and compared to compute negligible)
HP = matrix(0, numConfigs, numParams);
parfor( i in 1:nrow(HP) ) {
for( j in 1:numParams )
HP[i,j] = paramVals[j,as.scalar(((i-1)/cumLens[j,1])%%paramLens[j,1]+1)];
}
if( verbose ) {
print("GridSeach: Number of hyper-parameters: \n"+toString(paramLens));
print("GridSeach: Hyper-parameter combinations: \n"+toString(HP));
}
# Step 3) training/scoring of parameter combinations
Rbeta = matrix(0, nrow(HP), numB);
Rloss = matrix(0, nrow(HP), 1);
# with cross-validation
if( cv ) {
# a) create folds
foldsX = list(); foldsY = list();
fs = ceil(nrow(X)/cvk);
for( k in 0:(cvk-1) ) {
foldsX = append(foldsX, X[(k*fs+1):min((cvk+1)*fs,nrow(X)),]);
foldsY = append(foldsY, y[(k*fs+1):min((cvk+1)*fs,nrow(y)),]);
}
parfor( i in 1:nrow(HP) ) {
# a) replace training arguments
ltrainArgs = trainArgs;
lpredictArgs = predictArgs;
for( j in 1:numParams )
ltrainArgs[as.scalar(params[j])] = as.scalar(HP[i,j]);
# b) cross-validated training/scoring and write-back
cvbeta = matrix(0,1,numB);
cvloss = matrix(0,1,1);
for( k in 1:cvk ) {
[tmpX, testX] = remove(foldsX, k);
[tmpy, testy] = remove(foldsY, k);
ltrainArgs['X'] = rbind(tmpX);
ltrainArgs['y'] = rbind(tmpy);
lbeta = t(eval(train, ltrainArgs));
cvbeta[,1:ncol(lbeta)] = cvbeta[,1:ncol(lbeta)] + lbeta;
lpredictArgs[1] = as.matrix(testX);
lpredictArgs[2] = as.matrix(testy);
cvloss += eval(predict, append(lpredictArgs,t(lbeta)));
}
Rbeta[i,] = cvbeta / cvk; # model averaging
Rloss[i,] = cvloss / cvk;
}
}
# without cross-validation
else {
parfor( i in 1:nrow(HP) ) {
# a) replace training arguments
ltrainArgs = trainArgs;
for( j in 1:numParams )
ltrainArgs[as.scalar(params[j])] = as.scalar(HP[i,j]);
# b) core training/scoring and write-back
lbeta = t(eval(train, ltrainArgs))
Rbeta[i,1:ncol(lbeta)] = lbeta;
Rloss[i,] = eval(predict, append(predictArgs,t(lbeta)));
}
}
# Step 4) select best parameter combination
ix = as.scalar(rowIndexMin(t(Rloss)));
B = t(Rbeta[ix,]); # optimal model
opt = as.frame(HP[ix,]); # optimal hyper-parameters
}