| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| |
| #------------------------------------------------------------------------------- |
| # X Input feature matrix |
| # y Input label vector (or matrix) |
| # train Name ft of the train function to call via ft(trainArgs) |
| # predict Name fp of the loss function to call via fp((predictArgs,B)) |
| # numB Maximum number of parameters in model B (pass the maximum |
| # because the size of B may vary with parameters like icpt |
| # params List of varied hyper-parameter names |
| # paramValues List of matrices providing the parameter values as |
| # columnvectors for position-aligned hyper-parameters in 'params' |
| # trainArgs named List of arguments to pass to the 'train' function, where |
| # gridSearch replaces enumerated hyper-parameter by name, if |
| # not provided or an empty list, the lm parameters are used |
| # predictArgs List of arguments to pass to the 'predict' function, where |
| # gridSearch appends the trained models at the end, if |
| # not provided or an empty list, list(X, y) is used instead |
| # cv flag enabling k-fold cross validation, otherwise training loss |
| # cvk if cv=TRUE, specifies the the number of folds, otherwise ignored |
| # verbose flag for verbose debug output |
| #------------------------------------------------------------------------------- |
| # B the trained model with minimal loss (by the 'predict' function) |
| # opt one-row frame w/ optimal hyperparameters (by 'params' position) |
| #------------------------------------------------------------------------------- |
| |
| m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String predict, |
| Integer numB=ncol(X), List[String] params, List[Unknown] paramValues, |
| List[Unknown] trainArgs = list(), List[Unknown] predictArgs = list(), |
| Boolean cv = FALSE, Integer cvk = 5, Boolean verbose = TRUE) |
| return (Matrix[Double] B, Frame[Unknown] opt) |
| { |
| # Step 0) handling default arguments, which require access to passed data |
| if( length(trainArgs) == 0 ) |
| trainArgs = list(X=X, y=y, icpt=0, reg=-1, tol=-1, maxi=-1, verbose=FALSE); |
| if( length(predictArgs) == 0 ) |
| predictArgs = list(X, y); |
| if( cv & cvk <= 1 ) { |
| print("gridSearch: called with cv=TRUE but cvk="+cvk+", set to default cvk=5.") |
| cvk = 5; |
| } |
| |
| # Step 1) preparation of parameters, lengths, and values in convenient form |
| numParams = length(params); |
| paramLens = matrix(0, numParams, 1); |
| for( j in 1:numParams ) { |
| vect = as.matrix(paramValues[j,1]); |
| paramLens[j,1] = nrow(vect); |
| } |
| paramVals = matrix(0, numParams, max(paramLens)); |
| for( j in 1:numParams ) { |
| vect = as.matrix(paramValues[j,1]); |
| paramVals[j,1:nrow(vect)] = t(vect); |
| } |
| cumLens = rev(cumprod(rev(paramLens))/rev(paramLens)); |
| numConfigs = prod(paramLens); |
| |
| # Step 2) materialize hyper-parameter combinations |
| # (simplify debugging and compared to compute negligible) |
| HP = matrix(0, numConfigs, numParams); |
| parfor( i in 1:nrow(HP) ) { |
| for( j in 1:numParams ) |
| HP[i,j] = paramVals[j,as.scalar(((i-1)/cumLens[j,1])%%paramLens[j,1]+1)]; |
| } |
| |
| if( verbose ) { |
| print("GridSeach: Number of hyper-parameters: \n"+toString(paramLens)); |
| print("GridSeach: Hyper-parameter combinations: \n"+toString(HP)); |
| } |
| |
| # Step 3) training/scoring of parameter combinations |
| Rbeta = matrix(0, nrow(HP), numB); |
| Rloss = matrix(0, nrow(HP), 1); |
| |
| # with cross-validation |
| if( cv ) { |
| # a) create folds |
| foldsX = list(); foldsY = list(); |
| fs = ceil(nrow(X)/cvk); |
| for( k in 0:(cvk-1) ) { |
| foldsX = append(foldsX, X[(k*fs+1):min((cvk+1)*fs,nrow(X)),]); |
| foldsY = append(foldsY, y[(k*fs+1):min((cvk+1)*fs,nrow(y)),]); |
| } |
| parfor( i in 1:nrow(HP) ) { |
| # a) replace training arguments |
| ltrainArgs = trainArgs; |
| lpredictArgs = predictArgs; |
| for( j in 1:numParams ) |
| ltrainArgs[as.scalar(params[j])] = as.scalar(HP[i,j]); |
| # b) cross-validated training/scoring and write-back |
| cvbeta = matrix(0,1,numB); |
| cvloss = matrix(0,1,1); |
| for( k in 1:cvk ) { |
| [tmpX, testX] = remove(foldsX, k); |
| [tmpy, testy] = remove(foldsY, k); |
| ltrainArgs['X'] = rbind(tmpX); |
| ltrainArgs['y'] = rbind(tmpy); |
| lbeta = t(eval(train, ltrainArgs)); |
| cvbeta[,1:ncol(lbeta)] = cvbeta[,1:ncol(lbeta)] + lbeta; |
| lpredictArgs[1] = as.matrix(testX); |
| lpredictArgs[2] = as.matrix(testy); |
| cvloss += eval(predict, append(lpredictArgs,t(lbeta))); |
| } |
| Rbeta[i,] = cvbeta / cvk; # model averaging |
| Rloss[i,] = cvloss / cvk; |
| } |
| } |
| # without cross-validation |
| else { |
| parfor( i in 1:nrow(HP) ) { |
| # a) replace training arguments |
| ltrainArgs = trainArgs; |
| for( j in 1:numParams ) |
| ltrainArgs[as.scalar(params[j])] = as.scalar(HP[i,j]); |
| # b) core training/scoring and write-back |
| lbeta = t(eval(train, ltrainArgs)) |
| Rbeta[i,1:ncol(lbeta)] = lbeta; |
| Rloss[i,] = eval(predict, append(predictArgs,t(lbeta))); |
| } |
| } |
| |
| # Step 4) select best parameter combination |
| ix = as.scalar(rowIndexMin(t(Rloss))); |
| B = t(Rbeta[ix,]); # optimal model |
| opt = as.frame(HP[ix,]); # optimal hyper-parameters |
| } |