| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| crossV = function(Matrix[double] X, Matrix[double] y, Integer k, Matrix[Double] MLhp, Boolean isWeighted) |
| return (Matrix[Double] accuracyMatrix) |
| { |
| accuracyMatrix = matrix(0, k, 1) |
| dataList = list() |
| testL = list() |
| data = order(target = cbind(y, X), by = 1, decreasing=FALSE, index.return=FALSE) |
| classes = table(data[, 1], 1) |
| ins_per_fold = classes/k |
| start_fold = matrix(1, rows=nrow(ins_per_fold), cols=1) |
| fold_idxes = cbind(start_fold, ins_per_fold) |
| |
| start_i = 0; end_i = 0; idx_fold = 1; |
| for(i in 1:k) { |
| fold_i = matrix(0, 0, ncol(data)) |
| start=0; end=0; |
| for(j in 1:nrow(classes)) { |
| idx = as.scalar(classes[j, 1]) |
| start = end + 1; |
| end = end + idx |
| class_j = data[start:end, ] |
| start_i = as.scalar(fold_idxes[j, 1]); |
| end_i = as.scalar(fold_idxes[j, 2]) |
| fold_i = rbind(fold_i, class_j[start_i:end_i, ]) |
| } |
| dataList = append(dataList, fold_i) |
| fold_idxes[, 1] = fold_idxes[, 2] + 1 |
| fold_idxes[, 2] += ins_per_fold |
| } |
| |
| parfor(i in seq(1,k)) { |
| [trainList, hold_out] = remove(dataList, i) |
| trainset = rbind(trainList) |
| testset = as.matrix(hold_out) |
| trainX = trainset[, 2:ncol(trainset)] |
| trainy = trainset[, 1] |
| testX = testset[, 2:ncol(testset)] |
| testy = testset[, 1] |
| beta = multiLogReg(X=trainX, Y=trainy, icpt=as.scalar(MLhp[1,1]), reg=as.scalar(MLhp[1,2]), tol=as.scalar(MLhp[1,3]), |
| maxi=as.scalar(MLhp[1,4]), maxii=50, verbose=FALSE); |
| [prob, yhat, acc] = multiLogRegPredict(testX, beta, testy, FALSE) |
| accuracy = getAccuracy(testy, yhat, isWeighted) |
| accuracyMatrix[i] = accuracy |
| } |
| } |
| |
| X = rand(rows=100, cols=100) |
| Y = sample(2, 100, TRUE) |
| hp = matrix("1 1e-4 1e-6 100", rows=1, cols=4) |
| |
| #acc = crossV(X=X, y=Y, k=3, MLhp=hp, isWeighted=FALSE) |
| acc = eval("crossV", list(X=X, y=Y, k=3, MLhp=hp, isWeighted=FALSE)) |
| |
| macc = mean(acc) |
| if( macc <= 0 ) # fail test if empty |
| stop("Invalid accuracy: "+macc); |
| print("CV accuracy: "+macc) |