| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # This script implements KNN (K Nearest Neighbor) algorithm. |
| # |
| # INPUT: |
| # --------------------------------------------------------------------------- |
| # Train The input matrix as features |
| # Test The input matrix for nearest neighbor search |
| # CL The input matrix as target |
| # CL_T The target type of matrix CL whether |
| # columns in CL are continuous ( =1 ) or |
| # categorical ( =2 ) or not specified ( =0 ) |
| # trans_continuous Option flag for continuous feature transformed to [-1,1]: |
| # FALSE = do not transform continuous variable; |
| # TRUE = transform continuous variable; |
| # k_value k value for KNN, ignore if select_k enable |
| # select_k Use k selection algorithm to estimate k (TRUE means yes) |
| # k_min Min k value( available if select_k = 1 ) |
| # k_max Max k value( available if select_k = 1 ) |
| # select_feature Use feature selection algorithm to select feature (TRUE means yes) |
| # feature_max Max feature selection |
| # interval Interval value for K selecting ( available if select_k = 1 ) |
| # feature_importance Use feature importance algorithm to estimate each feature |
| # (TRUE means yes) |
| # predict_con_tg Continuous target predict function: mean(=0) or median(=1) |
| # START_SELECTED feature selection initial value |
| # --------------------------------------------------------------------------- |
| # |
| # OUTPUT: |
| # --------------------------------------------------------------------------------------------- |
| # NNR_matrix Applied clusters to X |
| # CL_matrix Cluster matrix |
| # m_feature_importance Feature importance value |
| # --------------------------------------------------------------------------------------------- |
| |
| m_knn = function( |
| Matrix[Double] Train, |
| Matrix[Double] Test, |
| Matrix[Double] CL, |
| Integer CL_T = 0, |
| Integer trans_continuous = 0, |
| Integer k_value = 5, |
| Integer select_k = 0, |
| Integer k_min = 1, |
| Integer k_max = 100, |
| Integer select_feature = 0, |
| Integer feature_max = 10, |
| Integer interval = 1000, |
| Integer feature_importance = 0, |
| Integer predict_con_tg = 0, |
| Matrix[Double] START_SELECTED = matrix(0, 0, 0) |
| )return( |
| Matrix[Double] NNR_matrix, |
| Matrix[Double] CL_matrix, |
| Matrix[Double] m_feature_importance |
| ){ |
| |
| m_feature_importance = matrix(0, 0, 0); |
| |
| #data prepare |
| if( trans_continuous == 1 ){ |
| Train = prepareKNNData( Train); |
| Test = prepareKNNData( Test); |
| } |
| |
| n_records = nrow( Train ); |
| n_features = ncol( Train ); |
| s_selected_k = 5; |
| m_selected_feature = matrix(1,rows=1,cols=n_records); |
| if( select_k == 1 | select_feature==1 ){ |
| #parameter check |
| #parameter re-define |
| if( select_k==1 ){ |
| if( k_max >= n_records ){ |
| k_max = n_records - 1; |
| print( "k_max should no greater than number of record, change k_max equal " + |
| "( number of record - 1 ) : " + k_max ); |
| } |
| if( k_max >= interval ){ |
| interval = k_max + 1; |
| # k_max should equal interval -1, because we drop self when search nn. |
| print( "interval should be no less than k_max, change interval equal : " + |
| interval ); |
| } |
| if( k_max <= 1 ) |
| stop( "uncorrect k_max value" ); |
| if( k_min >= k_max ) |
| stop( "k_min >= k_max" ); |
| } |
| if( select_feature == 1 ){ |
| if( k_value >= n_records ){ |
| k_value = n_records - 1; |
| print( "k_value should be no greater than number of record, change k_value equal " + |
| "( number of record - 1 ) : " + k_value ); |
| } |
| #Select feature only |
| if( nrow(START_SELECTED) == 0 & ncol(START_SELECTED) == 0 ) |
| m_start_selected_feature = matrix( 0,1,n_features ); |
| else |
| m_start_selected_feature = START_SELECTED; |
| } |
| |
| if( select_k == 1 & select_feature == 1){ |
| #Combined k and feature selection |
| print("Start combined k and feature selection ..."); |
| [m_selected_feature,s_selected_k] = |
| getSelectedFeatureAndK( Train,CL,CL_T,m_start_selected_feature, |
| feature_max,k_min,k_max,interval ); |
| } |
| else if( select_k == 1 ){ |
| #Select k only |
| print("Start k select ..."); |
| s_selected_k = getSelectedKBase( Train,CL,CL_T,k_min,k_max,interval ); |
| } |
| else if( select_feature == 1 ){ |
| #Select feature only |
| print("Start feature selection ... "); |
| [m_selected_feature,d_err] = |
| getSelectedFeature( Train,CL,CL_T,m_start_selected_feature, |
| feature_max,k_value,interval ); |
| } |
| } |
| |
| if( feature_importance == 1){ |
| if( k_value >= n_records ){ |
| k_value = n_records - 1; |
| print( "k_value should be no greater than number of record, make k_value equal " + |
| "( number of record - 1 ) : " + k_value ); |
| } |
| [m_feature_importance,m_norm_feature_importance] = |
| getFeatureImportance(Train,CL,CL_T,k_value); |
| } |
| |
| NNR_matrix = naiveKNNsearch(P=Train,Q=Test,K=k_value); |
| |
| CL_matrix = matrix( 0,nrow( Test ),1 ); |
| |
| for(i in 1 : nrow(NNR_matrix)) |
| { |
| NNR_tmp_matrix = matrix( 0,k_value,1 ); |
| for( j in 1:k_value ) |
| NNR_tmp_matrix[j,1] = CL[as.scalar( NNR_matrix[i,j] ),1]; |
| |
| if(CL_T == 2) { |
| t_cl_value = as.scalar( rowIndexMax( t(NNR_tmp_matrix) ) ); |
| } |
| else { |
| if ( predict_con_tg == 0) |
| t_cl_value = mean( NNR_tmp_matrix ); |
| else if(predict_con_tg == 1) |
| t_cl_value = median( NNR_tmp_matrix ); |
| } |
| |
| CL_matrix[i,1] = t_cl_value; |
| } |
| } |
| |
| #naive knn search implement |
| naiveKNNsearch = function( |
| Matrix[Double] P, |
| Matrix[Double] Q, |
| Integer K |
| )return( |
| Matrix[Double] O |
| ){ |
| num_records = nrow (P); |
| num_features = ncol (P); |
| num_queries = nrow (Q); |
| Qt = t(Q); |
| PQt = P %*% Qt; |
| P2 = rowSums (P ^ 2); |
| D = -2 * PQt + P2; |
| if (K == 1) { |
| Dt = t(D); |
| O = rowIndexMin (Dt); |
| } else { |
| O = matrix (0, rows = num_queries, cols = K); |
| parfor (i in 1:num_queries) { |
| D_sorted=order(target=D[,i], by=1, decreasing=FALSE, index.return=TRUE); |
| O[i,] = t(D_sorted[1:K,1]); |
| } |
| } |
| } |
| |
| #naive knn search for predict value only implement |
| #TODO eliminate redundancy |
| naiveKNNsearchForPredict = function( |
| matrix[double] P, |
| matrix[double] Q, |
| matrix[double] L, |
| integer K |
| )return( |
| matrix[double] OL |
| ){ |
| num_records = nrow (P); |
| num_features = ncol (P); |
| num_queries = nrow (Q); |
| Qt = t(Q); |
| PQt = P %*% Qt; |
| P2 = rowSums (P ^ 2); |
| D = -2 * PQt + P2; |
| if (K == 1) { |
| Dt = t(D); |
| O = rowIndexMin (Dt); |
| OL = matrix (0, rows = num_queries, cols = 1) |
| parfor( i in 1:num_queries){ |
| OL[i,] = L[as.scalar(O[i,1]),1] |
| } |
| } else { |
| OL = matrix (0, rows = num_queries, cols = K); |
| parfor (i in 1:num_queries) { |
| D_sorted=order(target=cbind(D[,i],L), by=1, decreasing=FALSE, index.return=FALSE); |
| OL[i,] = t(D_sorted[1:K,2]); |
| } |
| } |
| } |
| |
| getErr_k = function ( matrix[double] in_m_neighbor_value, |
| matrix[double] in_m_cl, |
| integer in_i_cl_type , |
| integer in_i_k_min ) |
| return ( matrix[double] out_m_err ) |
| { |
| i_col = ncol( in_m_neighbor_value ); |
| i_row = nrow( in_m_neighbor_value ); |
| |
| out_m_err = matrix( 0,i_row,i_col - in_i_k_min + 1 ); |
| if( in_i_cl_type == 2 ) #category |
| m_correct = in_m_neighbor_value != in_m_cl[1:i_row,]; |
| else #continuous |
| m_correct = (in_m_neighbor_value - in_m_cl[1:i_row,])^2;#ppred( in_m_neighbor_value,in_m_cl,"-" ); |
| parfor( i in 1:i_col-in_i_k_min+1 ,check = 0 ){ |
| out_m_err[,i] = |
| ( rowSums( m_correct[,1:in_i_k_min + i - 1] ) / ( in_i_k_min + i - 1 ) ); |
| } |
| #return err for each record and each k ( belong to range 1~max ); |
| } |
| |
| eliminateModel = function ( double s_err_mean, double s_err_vars, integer i_row ) |
| return( boolean out_b_inactived ){ |
| #alpha, beta, gamma, delta |
| d_gamma = 0.001; |
| d_delta = 0.001; |
| tmp_d_delta = cdf(target = (-d_gamma - s_err_mean)/s_err_vars, dist="t",df=i_row-1); |
| out_b_inactived = (tmp_d_delta < d_delta) |
| } |
| |
| getErr = function ( matrix[double] in_m_neighbor_value, |
| matrix[double] in_m_cl, |
| integer in_i_cl_type ) |
| return ( matrix[double] out_m_err ) |
| { |
| i_col = ncol( in_m_neighbor_value ); |
| i_row = nrow( in_m_neighbor_value ); |
| if( in_i_cl_type == 2 ) #category |
| m_correct = in_m_neighbor_value != in_m_cl[1:i_row,]; |
| else #continuous |
| m_correct = (in_m_neighbor_value - in_m_cl[1:i_row,])^2; |
| out_m_err = ( rowSums( m_correct[,1:i_col] )/( i_col ) ); |
| } |
| |
| # getSelectedFeatureAndK: |
| # Combine k and feature selection algorithm. |
| # Refer to ADD part "8.Combined k and feature selection" |
| # Argument: |
| # in_m_data input matrix as features |
| # in_m_data_target input matrix as target value |
| # in_i_is_categorical 1 = category , 0 = continuous |
| # in_m_init_selected S.user, initial selected feature which use specified |
| # in_i_max_select J, max feature selected |
| # k_min minimun k |
| # k_max maximun k |
| # interval block size for BRACE algorithm |
| # |
| # Reture: |
| # out_m_selected_feature output matrix for feature selection |
| # out_i_selected_k output k value for k selection |
| |
| getSelectedFeatureAndK = function ( |
| matrix[double] in_m_data, |
| matrix[double] in_m_data_target, |
| integer in_i_is_categorical, # 1 = category , 0 = continuous |
| matrix[double] in_m_init_selected, |
| integer in_i_max_select, |
| integer k_min, |
| integer k_max, |
| integer interval ) |
| return( |
| matrix[double] out_m_selected_feature, |
| integer out_i_selected_k |
| ) |
| { |
| m_err = matrix( 0,1,k_max-k_min+1 ); |
| m_feature = matrix( 0,k_max-k_min+1,ncol( in_m_data ) ); |
| #Step 1. For each k in [k_min,k_max] ( k_min has default value 1, k_max has default value 100 ) |
| #in parallel select relevant features using FS+BRACE or schemata search described in Section 7. |
| parfor( i in k_min:k_max,check=0 ){ |
| [m_selected_feature,d_err] = |
| getSelectedFeature( in_m_data,in_m_data_target,in_i_is_categorical, |
| in_m_init_selected,in_i_max_select,i,interval ); |
| m_err[1,i] = d_err; |
| m_feature[i,] = m_selected_feature; |
| } |
| #Step 2. Output the combination of features and k with the smallest LOOCV error. |
| i_min_err_index = as.integer( as.scalar( rowIndexMin( m_err ) ) ); |
| out_m_selected_feature = m_feature[i_min_err_index,]; |
| out_i_selected_k = i_min_err_index + k_min - 1; |
| } |
| |
| getFeatureImportance = function ( |
| matrix[double] in_m_data, |
| matrix[double] in_m_data_target, |
| integer in_i_is_categorical, # 1 = category , 0 = continuous |
| integer k_value) |
| return( |
| matrix[double] out_m_feature_importance, |
| matrix[double] out_m_norm_feature_importance |
| ) |
| { |
| n_feature = ncol(in_m_data) |
| n_record = nrow(in_m_data) |
| if(n_feature <= 1) |
| stop("can't estimate feature importance when ncol = 1") |
| |
| m_err = matrix( 0,n_record,n_feature); |
| for(i_feature in 1:n_feature){ |
| m_feature_select = matrix(1,1,n_feature) |
| m_feature_select[1,i_feature] = 0; |
| m_in_tmp_data = removeEmpty(target=in_m_data,margin="cols", select= m_feature_select) |
| m_neighbor_value = getKNeighbor( m_in_tmp_data,m_in_tmp_data,in_m_data_target,k_value); |
| m_tmp_err = getErr( m_neighbor_value,in_m_data_target ,in_i_is_categorical ); |
| m_err[,i_feature] = m_tmp_err |
| } |
| out_m_feature_importance = colSums( m_err ); |
| out_m_norm_feature_importance = |
| out_m_feature_importance / as.scalar(rowSums(out_m_feature_importance)) |
| } |
| |
| # prepareKNNData: |
| # Do data prepare - [-1,1] transform for continues variable |
| # Argument: |
| # * in_m_data input matrix as features |
| prepareKNNData = function(matrix[double] in_m_data) |
| return(matrix[double] out_m_data) |
| { |
| m_colmax = colMaxs(in_m_data); |
| m_colmin = colMins(in_m_data); |
| out_m_data = 2 * (in_m_data - m_colmin ) / ( m_colmax - m_colmin ) - 1; |
| } |
| |
| getKNeighbor = function(matrix[double] in_m_data, |
| matrix[double] in_m_test_data, |
| matrix[double] in_m_cl, |
| integer in_i_k_max) |
| return (matrix[double] out_m_neighbor_value) |
| { |
| # to naive |
| m_search_result = naiveKNNsearchForPredict(in_m_data, in_m_test_data, in_m_cl, in_i_k_max + 1) |
| out_m_neighbor_value = m_search_result[ , 2 : in_i_k_max + 1] |
| } |
| |
| # getSelectedKBase: |
| # k selection algorithm with simple KNN algorithm. |
| # Argument: |
| # * in_m_data input matrix as features |
| # * in_m_data_target input matrix as target value |
| # * in_i_is_categorical 1 = category , 0 = continuous |
| # * k_min minimum k |
| # * k_max maximum k |
| # * interval block size |
| # |
| # Return: |
| # * k output k value for k selection |
| getSelectedKBase = function(matrix[double] in_m_data, |
| matrix[double] in_m_data_target, |
| integer in_i_is_categorical, # 1 = category, 0 = continuous |
| integer k_min, |
| integer k_max, |
| integer interval) |
| return (integer k) |
| { |
| b_continue_loop = TRUE; |
| i_iter = 1; |
| i_record = nrow(in_m_data); |
| |
| i_active_model_number = k_max - k_min + 1; |
| m_active_flag = matrix(0, 1, i_active_model_number); |
| |
| m_iter_err_sum = matrix(0, 1, k_max - k_min + 1); |
| m_iter_err_sum_squared = matrix(0, 1, k_max - k_min + 1); |
| while(b_continue_loop) |
| { |
| # 1.build k-d tree? , or use hash method |
| # 2.search data to get k_max nearest neighbor |
| i_process_item = i_iter * interval; |
| if(i_process_item >= i_record) { |
| i_process_item = i_record; |
| b_continue_loop = FALSE; |
| } |
| i_process_begin_item = ((i_iter - 1) * interval) + 1; |
| i_process_end_item = i_process_item; |
| |
| m_neighbor_value = getKNeighbor(in_m_data, in_m_data[i_process_begin_item : i_process_end_item, ], in_m_data_target, k_max); |
| # 3.get matrix of err from k_min to k_max |
| m_err = getErr_k(m_neighbor_value, in_m_data_target[i_process_begin_item : i_process_end_item, ], in_i_is_categorical, k_min); |
| |
| # 4.check this matrix to drop unnessary record |
| m_active_flag_tmp = matrix(0, 1, ncol(m_err)); |
| |
| s_rows_number = i_process_item; |
| |
| m_iter_err_sum = colSums(m_err) + m_iter_err_sum; |
| m_iter_err_sum_squared = colSums(m_err ^ 2) + m_iter_err_sum_squared; |
| |
| m_err_mean = - outer(t(m_iter_err_sum), m_iter_err_sum , "-") / s_rows_number; |
| m_err_vars = ( m_err_mean ^2 * s_rows_number - |
| 2 * m_err_mean * m_iter_err_sum + m_iter_err_sum_squared) / (s_rows_number-1); |
| m_err_vars = sqrt(m_err_vars); |
| |
| parfor(i in 1 : ncol(m_err), check = 0) { |
| parfor(j in 1 : ncol(m_err), check = 0) { |
| b_execute_block = !(j == i |
| | as.scalar(m_active_flag_tmp[1, i]) == 1 # i has dropped, ignore this case |
| | as.scalar(m_active_flag_tmp[1, j]) == 1) # j has dropped, ignore this case |
| if(b_execute_block) { |
| b_flag = eliminateModel(as.scalar(m_err_mean[i, j]), as.scalar(m_err_vars[i, j]), s_rows_number); |
| if(b_flag == TRUE) |
| m_active_flag_tmp[1, i] = 1; |
| } |
| } |
| } |
| |
| m_active_flag = ((m_active_flag + m_active_flag_tmp) >= 1); |
| i_active_model_number = -sum(m_active_flag - 1); |
| |
| # 5.break while check |
| if(i_active_model_number <= 1) |
| b_continue_loop = FALSE; |
| |
| i_iter = i_iter + 1; |
| print("i_iter" + i_iter) |
| } |
| |
| k = 0; |
| if(i_active_model_number == 0) { |
| print("All k kick out, use min of range " + k_min); |
| k = k_min; |
| } |
| else if(i_active_model_number == 1) { |
| k = k_min + as.integer(as.scalar(rowIndexMin(m_active_flag))) - 1; |
| print( "Get k, which value is " + k ); |
| } |
| else { |
| m_err_for_order = |
| cbind(t(m_iter_err_sum), matrix(seq(k_min, k_max, 1), k_max - k_min + 1, 1)); |
| m_err_for_order = removeEmpty( |
| target = m_err_for_order * t(m_active_flag == 0), margin = "rows"); |
| for(i in 1 : nrow(m_err_for_order)) { |
| print("k:" + as.scalar(m_err_for_order[i, 2]) + |
| ", err:" + as.scalar(m_err_for_order[i, 1])); |
| } |
| m_err_order = order(target = m_err_for_order, by = 1, decreasing = FALSE, index.return = FALSE); |
| k = as.integer(as.scalar(m_err_order[1, 2])); |
| print("Get minimum LOOCV error, which value is " + k); |
| } |
| } |
| |
| # getSelectedFeature: |
| # feature selection algorithm. |
| # Refer to ADD part "7.1 FS+BRACE" |
| # Argument: |
| # in_m_data input matrix as features |
| # in_m_data_target input matrix as target value |
| # in_i_is_categorical 1 = category , 0 = continuous |
| # in_m_init_selected S.user, initial selected feature which use specified |
| # in_i_max_select J, max feature selected |
| # k_value k |
| # interval block size for BRACE algorithm |
| # |
| # Return: |
| # out_m_selected_feature output matrix for feature selection |
| # out_d_min_LOOCV output err |
| |
| getSelectedFeature = function ( |
| matrix[double] in_m_data, |
| matrix[double] in_m_data_target, |
| integer in_i_is_categorical, # 1 = category , 0 = continuous |
| matrix[double] in_m_init_selected, |
| integer in_i_max_select, |
| integer k_value, |
| integer interval ) |
| return( |
| matrix[double] out_m_selected_feature, |
| double out_d_min_LOOCV |
| ) |
| { |
| i_n_record = nrow( in_m_data ); |
| i_n_column = ncol( in_m_data ); |
| m_main_selected_flag = in_m_init_selected; |
| b_no_feature_selected = TRUE; |
| if( sum( in_m_init_selected ) >= 1 ) |
| b_no_feature_selected = FALSE; |
| |
| d_max_err_value = ( max( in_m_data_target ) - min( in_m_data_target ) ) * 100; |
| b_continue_main_loop = TRUE; #level 1 while loop flag |
| d_min_LOOCV = Inf; |
| while( b_continue_main_loop ){ |
| m_feature_selected_flag = m_main_selected_flag; |
| m_this_model_selected_flag = TRUE; |
| i_index_min_LOOCV = -1; # flag for which model win in BRACE algorith |
| b_selected_morethan_one = FALSE; |
| b_continue_loop = TRUE; #level 2 while loop flag |
| i_iter = 1; |
| m_iter_err_sum = matrix( 0,1,i_n_column+1 ); |
| m_iter_err_sum_squared = matrix( 0,1,i_n_column+1 ); |
| while( b_continue_loop ){ |
| i_process_item = i_iter*interval; |
| if( i_process_item >= i_n_record ){ |
| i_process_item = i_n_record; |
| b_continue_loop = FALSE; |
| } |
| i_process_begin_item = (i_iter - 1)*interval + 1 |
| i_process_end_item = i_process_item |
| m_err = matrix( d_max_err_value,i_process_end_item - i_process_begin_item + 1,i_n_column+1 ); |
| if( b_no_feature_selected == TRUE ){ |
| parfor( i in 1:i_n_column ,check=0){ |
| if( as.scalar( m_feature_selected_flag[1,i] ) != 1 ){ |
| m_tmp_process_data = in_m_data[,i]; |
| m_neighbor_value = getKNeighbor(m_tmp_process_data, |
| m_tmp_process_data[i_process_begin_item:i_process_end_item,], in_m_data_target,k_value ); |
| m_tmp_err = getErr(m_neighbor_value, |
| in_m_data_target[i_process_begin_item:i_process_end_item,], in_i_is_categorical ); |
| m_err[,i] = m_tmp_err; |
| } |
| } |
| }else{ |
| #Use m_main_selected_flag but not m_feature_selected_flag, |
| # m_main_selected_flag: which feature are init selected |
| # m_feature_selected_flag: which feature are dropped & init selected |
| m_tmp_data = removeEmpty( target=in_m_data ,margin="cols", select = m_main_selected_flag); |
| if( m_this_model_selected_flag == TRUE ){ |
| m_neighbor_value = getKNeighbor( |
| m_tmp_data,m_tmp_data[i_process_begin_item:i_process_end_item,],in_m_data_target, k_value ); |
| m_tmp_err = getErr( m_neighbor_value,in_m_data_target[i_process_begin_item:i_process_end_item,],in_i_is_categorical ); |
| m_err[,i_n_column+1] = m_tmp_err; |
| } |
| parfor( i in 1:i_n_column ,check=0){ |
| if( as.scalar( m_feature_selected_flag[1,i] ) != 1 ){ |
| m_tmp_process_data = cbind( m_tmp_data,in_m_data[,i] ); |
| m_neighbor_value = getKNeighbor( |
| m_tmp_process_data,m_tmp_process_data[i_process_begin_item:i_process_end_item,],in_m_data_target,k_value ); |
| m_tmp_err = getErr( |
| m_neighbor_value,in_m_data_target[i_process_begin_item:i_process_end_item,],in_i_is_categorical ); |
| m_err[,i] = m_tmp_err; |
| } |
| } |
| } |
| if( m_this_model_selected_flag == TRUE ) |
| m_active_flag_tmp = cbind( m_feature_selected_flag,matrix( 0,1,1 ) ); |
| else |
| m_active_flag_tmp = cbind( m_feature_selected_flag,matrix( 1,1,1 ) ); |
| s_rows_number = i_process_item |
| m_iter_err_sum = colSums(m_err) + m_iter_err_sum |
| m_iter_err_sum_squared = colSums(m_err ^ 2) + m_iter_err_sum_squared |
| m_err_mean = - outer(t(m_iter_err_sum), m_iter_err_sum , "-") / s_rows_number |
| m_err_vars = ( m_err_mean ^2 * s_rows_number - |
| 2 * m_err_mean * m_iter_err_sum + m_iter_err_sum_squared) / (s_rows_number-1) |
| m_err_vars = sqrt(m_err_vars) |
| parfor( i in 1:ncol( m_err ) ){ |
| parfor( j in 1:ncol( m_err ) ,check=0){ |
| b_execute_block = TRUE; |
| if( j==i ) b_execute_block = FALSE; |
| if( as.scalar( m_active_flag_tmp[1,i] ) == 1 ) b_execute_block = FALSE; |
| #i has dropped, ignore this case |
| if( as.scalar( m_active_flag_tmp[1,j] ) == 1 ) b_execute_block = FALSE; |
| #j has dropped, ignore this case |
| if( b_execute_block ){ |
| b_flag = eliminateModel( as.scalar(m_err_mean[i,j]),as.scalar(m_err_vars[i,j]),s_rows_number); |
| if( b_flag == TRUE ) |
| m_active_flag_tmp[1,i] = 1; |
| } |
| } |
| } |
| #We mark bit to 1 for selected feature before current loop, |
| #and mark bit to 1 also for dropped feature in current loop |
| if( sum( m_active_flag_tmp != 1 ) > 1 ) |
| b_selected_morethan_one = TRUE; |
| m_col_sums_err = m_iter_err_sum #colSums( m_err ); |
| i_index_min_LOOCV = as.scalar( rowIndexMin( m_col_sums_err ) ); |
| d_min_LOOCV = as.scalar( m_col_sums_err[1,i_index_min_LOOCV] ); |
| i_index_min_LOOCV = i_index_min_LOOCV%% ( i_n_column+1 ) |
| if( sum( m_active_flag_tmp != 1 ) <= 1 ) |
| b_continue_loop = FALSE; |
| if( as.scalar( m_active_flag_tmp[1,i_n_column+1] ) == 1 ) |
| m_this_model_selected_flag = FALSE; |
| m_feature_selected_flag = m_active_flag_tmp[,1:i_n_column]; |
| i_iter = i_iter + 1; |
| } |
| #select current model, jump out. |
| if( i_index_min_LOOCV == 0 ){ |
| b_continue_main_loop = FALSE; |
| print( "Select Current model" ); |
| }else{ |
| print( "select feature " + i_index_min_LOOCV + ", change bit value to 1" ); |
| m_main_selected_flag[1,i_index_min_LOOCV] = 1; |
| b_no_feature_selected = FALSE; |
| } |
| if( sum( m_main_selected_flag - in_m_init_selected ) >= in_i_max_select ){ |
| #select more than 10 |
| b_continue_main_loop = FALSE; |
| } |
| if( sum( m_main_selected_flag ) == i_n_column ){ |
| #all selected |
| b_continue_main_loop = FALSE; |
| } |
| } |
| out_m_selected_feature = m_main_selected_flag; |
| out_d_min_LOOCV = d_min_LOOCV; |
| } |