scripts/staging/knn.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------
 #
 # THIS SCRIPT IMPLEMENTS KNN( K Nearest Neighbor ) ALGORITHM
 #
 # INPUT   PARAMETERS:
 # ---------------------------------------------------------------------------------------------
 # NAME    TYPE     DEFAULT     OPTIONAL     MEANING
 # ---------------------------------------------------------------------------------------------
 # X                     String   ---   N    Location to read the input matrix as features
 # T                     String   ---   N    Location to read the input matrix for nearest neighbor
 #                                           search
 # Y                     String   ---    Y   Location to read the input matrix as target
 # Y_T                   String   ---    Y   Location to read the target type of matrix Y whether
 #                                           columns in Y are continuous ( =1 ) or
 #                                           categorical ( =2 )
 # K                     String   ---    Y   Location to write the result of selected k value
 # PR                    String   ---    Y   Location to write the predicted value result,
 #                                           it's optional when Y is NOT specified.
 # NNR                   String   ---    N   Location to write the nearest neighbor index result for T.
 # trans_continuous      int      0      Y   Option flag for continuous feature transformed to [-1,1]:
 #                                           0 = do not transform continuous variable;
 #                                           1 = transform continuous variable;
 # select_k              int      0      Y   Use k selection algorithm to estimate k
 #                                           ( 1 means yes )
 # k_min                 int      1      Y   Min k value(  available if select_k = 1 )
 # k_max                 int      100    Y   Max k value(  available if select_k = 1 )
 # select_feature        int      0      Y   Use feature selection algorithm to select feature
 #                                           ( 1 means yes )
 # START_SELECTED        String   ---    Y   Location to read feature selection initinal value
 # FEATURE_SELECTED      String   ---    Y   Location to write feature selection result
 # feature_max           int      10     Y   Max feature selection
 # interval              int      1000   Y   Interval value for K selecting (  available if select_k = 1 )
 # feature_importance    int      0      Y   Use feature importance algorithm to estimate each feature
 #                                           ( 1 means yes )
 # FEATURE_IMPORTANCE_VALUE String --- Y Location to write feature importance result
 # k_value               int      5      Y   k value for KNN, ignore if select_k enable
 # predict_con_tg        int      0      Y   Continuous  target predict function: mean(=0) or
 #                                           median(=1)
 # fmt                   String   "text" Y   Matrix output format for MB, ML, usually "text" or "csv"
 # ---------------------------------------------------------------------------------------------
 # OUTPUT: Matrix PR, Matrix NNR, Matrix FEATURE_IMPORTANCE_VALUE
 #
 # HOW TO INVOKE THIS SCRIPT - EXAMPLE:
 # hadoop jar SystemML.jar -f knn.dml -nvargs X=%datapath%/train.csv
 #                       T=%datapath%/test.csv NNR=%tmpdir%/NNR.csv fmt=csv


 print( "start knn algorithm" );
 #input data
 TrainPath = $X;
 TestPath = $T;
 CLPath = ifdef( $Y," " );
 CLTypePath = ifdef( $Y_T," " );
 #output data
 NNRPath = $NNR;
 PRPath = ifdef( $PR," " );
 #parameter
 epsilon = ifdef( $epsilon,0 );
 distance_type = ifdef( $distance_type,1 );
 trans_continuous = ifdef( $trans_continuous,0 );
 predict_con_tg = ifdef($predict_con_tg,0);
 k_value = ifdef( $k_value,5 );
 FMT = ifdef( $fmt,"text" );
 #k select parameter
 select_k = ifdef( $select_k,0 );
 k_min = ifdef( $k_min,1 );
 k_max = ifdef( $k_max,100 );
 #feature select parameter
 select_feature = ifdef( $select_feature,0 );
 feature_max = ifdef( $feature_max,10 );
 #feature importance parameter
 feature_importance = ifdef ($feature_importance,0);


 #Read train data
 Train = read ( TrainPath );
 Test = read ( TestPath );

 #data prepare
 if( trans_continuous == 1 ){
     print( "Do data prepare to transfer continuous variable to [-1,1] ..." );
     Train = prepareKNNData( Train);
     Test  = prepareKNNData( Test);
 }

 n_records = nrow( Train );
 n_features = ncol( Train );
 s_selected_k = 5;
 m_selected_feature = matrix(1,rows=1,cols=n_records);
 if( select_k == 1 | select_feature==1 ){
     default_interval = as.integer( ceil( sqrt( n_records ) ) );
     interval = ifdef( $interval,default_interval );
     print( "interval is " + interval );
     #parameter check
     if(  CLPath == " " | CLTypePath == " "){
         stop( "Y or Y_T must be set for k selection algorithm" );
     }
     #parameter re-define
     if( select_k==1 ){
         if(  k_max >= n_records  ){
             k_max = n_records - 1;
             print( "k_max should no greater than number of record, change k_max equal " +
             "( number of record - 1 ) : " + k_max );
         }
         if(  k_max >= interval  ){
             interval = k_max + 1;
             # k_max should equal interval -1, because we drop self when search nn.
             print( "interval should be no less than k_max, change interval equal : " +
             interval );
         }
         if(  k_max <= 1  ){
             stop( "uncorrect k_max value"  );
         }
         if(  k_min >= k_max ){
             stop( "k_min >= k_max" );
         }
     }
     if( select_feature == 1 ){
         if(  k_value >= n_records  ){
             k_value = n_records - 1;
             print( "k_value should be no greater than number of record, change k_value equal " +
             "( number of record - 1 ) : " + k_value );
         }
         #Select feature only
         StartSelectedPath = ifdef( $START_SELECTED," " );
         SelectedPath = ifdef( $FEATURE_SELECTED," " );
         if( StartSelectedPath == " " ){
             m_start_selected_feature = matrix( 0,1,n_features );
         }else{
             m_start_selected_feature = read( StartSelectedPath );
         }
     }
     CL = read( CLPath );
     CLType = read( CLTypePath );
     type = as.integer( as.scalar( CLType[1,1] ) );
     if( select_k == 1 & select_feature == 1){
         #Combined k and feature selection
         print("Start combined k and feature selection ...");
         [m_selected_feature,s_selected_k] =
             getSelectedFeatureAndK( Train,CL,type,m_start_selected_feature,
                 feature_max,k_min,k_max,interval );
     }else if( select_k == 1 ){
         #Select k only
         print("Start k select ...");
         s_selected_k = getSelectedKBase( Train,CL,type,k_min,k_max,interval );
     }else if( select_feature == 1 ){
         #Select feature only
         print("Start feature selection ... ");
         [m_selected_feature,d_err] =
             getSelectedFeature( Train,CL,type,m_start_selected_feature,
                 feature_max,k_value,interval );
     }
 }
 if( feature_importance == 1){
     if(  k_value >= n_records  ){
         k_value = n_records - 1;
         print( "k_value should be no greater than number of record, make k_value equal " +
         "( number of record - 1 ) : " + k_value );
     }
     CL = read( CLPath );
     CLType = read( CLTypePath );
     type = as.integer( as.scalar( CLType[1,1] ) );
     FeatureImportanceExportPath = ifdef( $FEATURE_IMPORTANCE_VALUE," " );
     if(FeatureImportanceExportPath != " "){
         [m_feature_importance,m_norm_feature_importance] =
             getFeatureImportance(Train,CL,type,k_value);
         #print( "feature importance is "  );
         #tmp = matrixDisplay( m_feature_importance );
         write ( m_feature_importance,FeatureImportanceExportPath,format=FMT );
     }
 }

 # knn search
 print("Start knn search ...")
 # apply k selection result
 if(select_k == 1){
     k_value = s_selected_k
 }
 if(  k_value >= n_records ){
     k_value = n_records - 1;
     print( "k_value should be no greater than number of record, make k_value equal " +
         "( number of record - 1 )" );
 }
 print( "k value is " + k_value );
 # apply feature selection result
 if( select_feature == 1 ){
     if( n_features > sum( m_selected_feature ) ){
         Train = removeEmpty( target = Train,margin="cols",select = m_selected_feature);
         Test = removeEmpty( target = Test,margin="cols",select = m_selected_feature );
     }
 }

 NNR_matrix = naiveKNNsearch(Train,Test,k_value)
 write( NNR_matrix,NNRPath,format=FMT );

 # main part for knn predicted
 if( CLPath != " " ){
     CL = read ( CLPath );
     if(  CLTypePath == " " ){
         stop( "Set Y but Miss Y_T for label type" );
     }
     CLType = read( CLTypePath );
     i_cl_type = as.integer( as.scalar( CLType[1,1] ) );
 }
 CL_matrix = matrix( 0,nrow( Test ),1 );
 if( CLPath != " " ){
     parfor( i in 1:nrow( NNR_matrix ) ,check=0){
         NNR_tmp_matrix = matrix( 0,k_value,1 );
         for( j in 1:k_value ){
             NNR_tmp_matrix[j,1] = CL[as.scalar( NNR_matrix[i,j] ),1] ;
         }
         t_cl_value = 0.0 #test data label value
         if( i_cl_type == 2 ){
             t_cl_value = as.scalar( rowIndexMax( t( table( NNR_tmp_matrix,1 ) ) ) )
         }else{
             if ( predict_con_tg == 0){
                 t_cl_value = mean( NNR_tmp_matrix );
             } else if(predict_con_tg == 1){
                 t_cl_value = median( NNR_tmp_matrix );
             }
         }
         CL_matrix[i,1] = t_cl_value;
     }
 }
 if( PRPath != " " ){
     write( CL_matrix,PRPath,format=FMT );
 }

 #naive knn search implement
 naiveKNNsearch = function(
     matrix[double] P,
     matrix[double] Q,
     integer K
 )return(
     matrix[double] O
 ){
     num_records = nrow (P);
     num_features = ncol (P);
     num_queries = nrow (Q);
     Qt = t(Q);
     PQt = P %*% Qt;
     P2 = rowSums (P ^ 2);
     D = -2 * PQt + P2;
     if (K == 1) {
         Dt = t(D);
         O = rowIndexMin (Dt);
     } else {
         O = matrix (0, rows = num_queries, cols = K);
         parfor (i in 1:num_queries) {
             D_sorted=order(target=D[,i], by=1, decreasing=FALSE, index.return=TRUE);
             O[i,] = t(D_sorted[1:K,1]);
         }
     }
 }
 #naive knn search for predict value only implement
 naiveKNNsearchForPredict = function(
     matrix[double] P,
     matrix[double] Q,
     matrix[double] L,
     integer K
 )return(
     matrix[double] OL
 ){
     num_records = nrow (P);
     num_features = ncol (P);
     num_queries = nrow (Q);
     Qt = t(Q);
     PQt = P %*% Qt;
     P2 = rowSums (P ^ 2);
     D = -2 * PQt + P2;
     if (K == 1) {
         Dt = t(D);
         O = rowIndexMin (Dt);
         OL = matrix (0, rows = num_queries, cols = 1)
         parfor( i in 1:num_queries){
             OL[i,] = L[as.scalar(O[i,1]),1]
         }
     } else {
         OL = matrix (0, rows = num_queries, cols = K);
         parfor (i in 1:num_queries) {
             D_sorted=order(target=append(D[,i],L), by=1, decreasing=FALSE, index.return=FALSE);
             OL[i,] = t(D_sorted[1:K,2]);
         }
     }
 }
 # prepareKNNData:
 #   Do data prepare - [-1,1] transform for continues variable
 # Argument:
 # in_m_data                     input matrix as features
 prepareKNNData = function (
     matrix[double] in_m_data
      )
 return(
     matrix[double] out_m_data
      )
 {
     m_colmax = colMaxs(in_m_data);
     m_colmin = colMins(in_m_data);
     out_m_data = 2*( in_m_data - m_colmin )/( m_colmax - m_colmin ) - 1;
 }

 getKNeighbor = function (  matrix[double] in_m_data,
         matrix[double] in_m_test_data,
         matrix[double] in_m_cl,
         integer in_i_k_max   )
     return (  matrix[double] out_m_neighbor_value  ) {
     #to naive
     m_search_result = naiveKNNsearchForPredict(in_m_data,in_m_test_data,in_m_cl,in_i_k_max + 1)
     out_m_neighbor_value = m_search_result[ ,2:in_i_k_max + 1]
 }

 getErr_k = function (  matrix[double] in_m_neighbor_value,
         matrix[double] in_m_cl,
         integer in_i_cl_type ,
         integer in_i_k_min  )
     return (  matrix[double] out_m_err  ){
     i_col = ncol( in_m_neighbor_value  );
     i_row  = nrow( in_m_neighbor_value  );

     out_m_err = matrix( 0,i_row,i_col - in_i_k_min + 1  );
     if( in_i_cl_type == 2  ){
         #category
         m_correct = ppred( in_m_neighbor_value,in_m_cl[1:i_row,],"!="  );
     }else{
         #continues
         m_correct = (in_m_neighbor_value - in_m_cl[1:i_row,])^2;#ppred( in_m_neighbor_value,in_m_cl,"-"  );
     }
     parfor( i in 1:i_col-in_i_k_min+1 ,check = 0 ){
         out_m_err[,i] =
             ( rowSums( m_correct[,1:in_i_k_min + i - 1]  ) / ( in_i_k_min + i - 1  )  );
     }
     #return err for each record and each k ( belong to range 1~max  );
 }
 /*
 eliminateModel = function (  matrix[double] in_j_err, matrix[double] in_j1_err   )
     return(  boolean out_b_inactived   ){
     out_b_inactived = FALSE;
     i_row  = nrow( in_j_err  );
     m_err_mean = as.scalar( colSums( in_j_err - in_j1_err  ) / i_row  );
     m_err_variance = as.scalar(
         sqrt(  colSums( ( in_j_err - in_j1_err - m_err_mean  ) ^ 2  ) / ( i_row-1  )))

     #alpha, beta, gamma, delta
     d_gamma = 0.001;
     d_delta = 0.001;

     tmp_d_delta = cdf(
         target = ( -d_gamma - m_err_mean  ) / m_err_variance, dist="t",df=i_row-1  );
     #print( "m_err_mean=" + m_err_mean + ",m_err_variance=" +
     #   m_err_variance + ",tmp_d_delta=" + tmp_d_delta  );
     if( tmp_d_delta < d_delta  ){
         out_b_inactived = TRUE;
     }

 }*/
 eliminateModel = function (  double s_err_mean, double s_err_vars, integer i_row  )
     return(  boolean out_b_inactived   ){
     out_b_inactived = FALSE;
     #alpha, beta, gamma, delta
     d_gamma = 0.001;
     d_delta = 0.001;

     tmp_d_delta = cdf(
         target = ( -d_gamma - s_err_mean  ) / s_err_vars, dist="t",df=i_row-1  );
     #print( "m_err_mean=" + m_err_mean + ",m_err_variance=" +
     #   m_err_variance + ",tmp_d_delta=" + tmp_d_delta  );
     if( tmp_d_delta < d_delta  ){
         out_b_inactived = TRUE;
     }

 }
 # getSelectedKBase:
 #   k selection algorithm with simple KNN algorithm.
 #   Refer to ADD part "6.3 Optimized LOOCV + BRACE"
 # Argument:
 # in_m_data                     input matrix as features
 # in_m_data_target              input matrix as target value
 # in_i_is_categorical           1 = category , 0 = continuous
 # k_min                         minimun k
 # k_max                         maximun k
 # interval                      block size for BRACE algorithm
 #
 # Reture:
 # k                             output k value for k selection
 getSelectedKBase = function (
     matrix[double] in_m_data,
     matrix[double] in_m_data_target,
     integer in_i_is_categorical, # 1 = category , 0 = continuous
     integer k_min,
     integer k_max,
     integer interval  )
 return(
     integer k
       )
 {
     b_continue_loop = TRUE;
     i_iter = 1;
     i_record  = nrow( in_m_data  );

     m_active_flag = matrix( 0,1,k_max-k_min+1  );
     i_active_model_number = k_max - k_min + 1;

     m_iter_err_sum = matrix( 0,1,k_max-k_min+1);
     m_iter_err_sum_squared = matrix( 0,1,k_max-k_min+1);
     while( b_continue_loop  ){
         # 1.build k-d tree? , or use hash method
         # 2.search data to get k_max nearest neighbor
         i_process_item = i_iter*interval;
         if(  i_process_item >= i_record  ){
             i_process_item = i_record;
             b_continue_loop = FALSE;
         }
         i_process_begin_item = (i_iter - 1)*interval + 1
 		#i_process_begin_item = 1
         i_process_end_item = i_process_item

         m_neighbor_value = getKNeighbor( in_m_data,in_m_data[i_process_begin_item:i_process_end_item,],in_m_data_target,k_max  );
         # 3.get matrix of err from k_min to k_max
         m_err = getErr_k( m_neighbor_value,in_m_data_target[i_process_begin_item:i_process_end_item,],in_i_is_categorical,k_min  );

         # 4.check this matrix to drop unnessary record
         m_active_flag_tmp = matrix( 0,1,ncol( m_err  )  );

         s_rows_number = i_process_item#nrow(m_err);

         m_iter_err_sum = colSums(m_err) + m_iter_err_sum
         m_iter_err_sum_squared = colSums(m_err ^ 2) + m_iter_err_sum_squared

         m_err_mean = - outer(t(m_iter_err_sum), m_iter_err_sum , "-") / s_rows_number
         m_err_vars = ( m_err_mean ^2 * s_rows_number -
             2 * m_err_mean * m_iter_err_sum  + m_iter_err_sum_squared) / (s_rows_number-1)
         m_err_vars = sqrt(m_err_vars)

         parfor( i in 1:ncol( m_err  )  ){
             #m_err_i = m_err - m_err[,i];
             #m_err_i_mean = colMeans(m_err_i);
             #m_err_i_vars = colVars(m_err_i);
             parfor( j in 1:ncol( m_err  ),check=0  ){
                 b_execute_block = TRUE;
                 if( j==i  ) b_execute_block = FALSE;
                 if( as.scalar( m_active_flag_tmp[1,i]  ) == 1  )  b_execute_block = FALSE;
                 #i has dropped, ignore this case
                 if( as.scalar( m_active_flag_tmp[1,j]  ) == 1  )  b_execute_block = FALSE;
                 #j has dropped, ignore this case
                 if( b_execute_block  ){
                     b_flag = eliminateModel( as.scalar(m_err_mean[i,j]),as.scalar(m_err_vars[i,j]),s_rows_number);
                     if(  b_flag == TRUE  ){
                         m_active_flag_tmp[1,i] = 1;
                     }
                 }
             }
         }

         #m_tmp = ppred( colSums( m_active_flag_tmp  ),1,">="  );
         m_active_flag =  ppred( m_active_flag + m_active_flag_tmp,1,">="  );
         i_active_model_number = -sum( m_active_flag - 1  );

         # 5.break while check
         if( i_active_model_number <= 1  ){
             b_continue_loop = FALSE;
         }
         i_iter = i_iter + 1;
         print("i_iter" + i_iter)
     }

     k = 0;
     if(  i_active_model_number == 0   ){
         print( "All k kick out, use min of range " + k_min  );
         k = k_min;
     }else if( i_active_model_number == 1  ){
         k = k_min + as.integer( as.scalar( rowIndexMin( m_active_flag  )  )  ) - 1;
         print( "Get k, which value is " + k  );
     }else{
         m_err_for_order =
             append( t( m_iter_err_sum ), matrix( seq( k_min,k_max,1  ),k_max-k_min+1,1  )  );
         m_err_for_order = removeEmpty(
             target=m_err_for_order * t( ppred( m_active_flag,0,"=="  )  ),margin="rows"  );
         for( i in 1:nrow( m_err_for_order  )  ){
             print( "k:" + as.scalar( m_err_for_order[i,2]  ) +
                 ",err:" + as.scalar( m_err_for_order[i,1]  )  );
         }
         m_err_order = order( target=m_err_for_order,by=1,decreasing=FALSE,index.return=FALSE  );
         k = as.integer( as.scalar( m_err_order[1,2]  )  );
         print( "Get minimum LOOCV error, which value is " + k  );
     }
 }

 getErr = function (  matrix[double] in_m_neighbor_value,
         matrix[double] in_m_cl,
         integer in_i_cl_type )
     return (  matrix[double] out_m_err ){
     i_col = ncol( in_m_neighbor_value );
     i_row  = nrow( in_m_neighbor_value );

     if( in_i_cl_type == 2 ){
         #category
         m_correct = ppred( in_m_neighbor_value,in_m_cl[1:i_row,],"!=" );
     }else{
         #continues
         m_correct = (in_m_neighbor_value - in_m_cl[1:i_row,])^2;#ppred( in_m_neighbor_value,in_m_cl,"-" );
     }
     out_m_err = ( rowSums( m_correct[,1:i_col] )/( i_col ) );
 }

 # getSelectedFeature:
 #   feature selection algorithm.
 #   Refer to ADD part "7.1 FS+BRACE"
 # Argument:
 # in_m_data                     input matrix as features
 # in_m_data_target              input matrix as target value
 # in_i_is_categorical           1 = category , 0 = continuous
 # in_m_init_selected            S.user, initial selected feature which use specified
 # in_i_max_select               J, max feature selected
 # k_value                       k
 # interval                      block size for BRACE algorithm
 #
 # Reture:
 # out_m_selected_feature        output matrix for feature selection
 # out_d_min_LOOCV               output err

 getSelectedFeature = function (
     matrix[double] in_m_data,
     matrix[double] in_m_data_target,
     integer in_i_is_categorical, # 1 = category , 0 = continuous
     matrix[double] in_m_init_selected,
     integer in_i_max_select,
     integer k_value,
     integer interval )
 return(
     matrix[double] out_m_selected_feature,
     double out_d_min_LOOCV
      )
 {
     i_n_record  = nrow( in_m_data );
     i_n_column = ncol( in_m_data );
     m_main_selected_flag = in_m_init_selected;
     b_no_feature_selected = TRUE;
     if( sum( in_m_init_selected ) >= 1 ){
         b_no_feature_selected = FALSE;
     }
     d_max_err_value = ( max( in_m_data_target ) - min( in_m_data_target ) ) * 100;
     b_continue_main_loop = TRUE;    #level 1 while loop flag
     d_min_LOOCV = 1.0/0.0;
     while( b_continue_main_loop ){
         m_feature_selected_flag = m_main_selected_flag;
         m_this_model_selected_flag = TRUE;
         i_index_min_LOOCV = -1; # flag for which model win in BRACE algorith
         b_selected_morethan_one = FALSE;
         b_continue_loop = TRUE; #level 2 while loop flag
         i_iter = 1;
         m_iter_err_sum = matrix( 0,1,i_n_column+1 );
         m_iter_err_sum_squared = matrix( 0,1,i_n_column+1 );
         while( b_continue_loop ){
             #print( "*************" + i_iter + "*************" );
             i_process_item = i_iter*interval;
             if(  i_process_item >= i_n_record ){
                 i_process_item = i_n_record;
                 b_continue_loop = FALSE;
             }
             i_process_begin_item = (i_iter - 1)*interval + 1
             #i_process_begin_item = 1
             i_process_end_item = i_process_item
             m_err = matrix( d_max_err_value,i_process_end_item - i_process_begin_item + 1,i_n_column+1 );
             if( b_no_feature_selected == TRUE ){
                 parfor( i in 1:i_n_column ,check=0){
                     if( as.scalar( m_feature_selected_flag[1,i] ) != 1 ){
                         m_tmp_process_data = in_m_data[,i];
                         m_neighbor_value = getKNeighbor(
                             m_tmp_process_data,m_tmp_process_data[i_process_begin_item:i_process_end_item,],
                             in_m_data_target,k_value );
                         m_tmp_err = getErr(
                             m_neighbor_value,in_m_data_target[i_process_begin_item:i_process_end_item,],in_i_is_categorical );
                         m_err[,i] = m_tmp_err;
                     }
                 }
             }else{
                 #Use m_main_selected_flag but not m_feature_selected_flag,
                 # m_main_selected_flag: which feature are init selected
                 # m_feature_selected_flag: which feature are dropped & init selected
                 m_tmp_data = removeEmpty( target=in_m_data ,margin="cols", select = m_main_selected_flag);
                 if( m_this_model_selected_flag == TRUE ){
                     m_neighbor_value = getKNeighbor(
                         m_tmp_data,m_tmp_data[i_process_begin_item:i_process_end_item,],in_m_data_target, k_value );

                     m_tmp_err = getErr( m_neighbor_value,in_m_data_target[i_process_begin_item:i_process_end_item,],in_i_is_categorical );
                     m_err[,i_n_column+1] = m_tmp_err;
                 }
                 parfor( i in 1:i_n_column ,check=0){
                     if( as.scalar( m_feature_selected_flag[1,i] ) != 1 ){
                         m_tmp_process_data = append( m_tmp_data,in_m_data[,i] );
                         m_neighbor_value = getKNeighbor(
                             m_tmp_process_data,m_tmp_process_data[i_process_begin_item:i_process_end_item,],in_m_data_target,k_value );
                         m_tmp_err = getErr(
                             m_neighbor_value,in_m_data_target[i_process_begin_item:i_process_end_item,],in_i_is_categorical );
                         m_err[,i] = m_tmp_err;
                     }
                 }
             }
             if( m_this_model_selected_flag == TRUE ){
                 m_active_flag_tmp = append( m_feature_selected_flag,matrix( 0,1,1 ) );
             }else{
                 m_active_flag_tmp = append( m_feature_selected_flag,matrix( 1,1,1 ) );
             }

             s_rows_number = i_process_item#nrow(m_err);

             m_iter_err_sum = colSums(m_err) + m_iter_err_sum
             m_iter_err_sum_squared = colSums(m_err ^ 2) + m_iter_err_sum_squared

             m_err_mean = - outer(t(m_iter_err_sum), m_iter_err_sum , "-") / s_rows_number
             m_err_vars = ( m_err_mean ^2 * s_rows_number -
                     2 * m_err_mean * m_iter_err_sum  + m_iter_err_sum_squared) / (s_rows_number-1)
             m_err_vars = sqrt(m_err_vars)

             parfor( i in 1:ncol( m_err ) ){
                 #m_err_i_mean = (m_iter_err_sum - m_iter_err_sum[,i])/s_rows_number
                 #m_err_i_vars = (m_iter_err_sum_squared + m_err_i_mean ^2 * s_rows_number -
                 #   2 * m_iter_err_sum * m_err_i_mean) / (s_rows_number-1)
                 #m_err_i_vars = sqrt(m_err_i_vars)
                 #m_err_i_mean = m_err_mean[i,]
                 #m_err_i_vars = m_err_vars[i,]
                 parfor( j in 1:ncol( m_err ) ,check=0){
                     b_execute_block = TRUE;
                     if( j==i ) b_execute_block = FALSE;
                     if( as.scalar( m_active_flag_tmp[1,i] ) == 1 )  b_execute_block = FALSE;
                     #i has dropped, ignore this case
                     if( as.scalar( m_active_flag_tmp[1,j] ) == 1 )  b_execute_block = FALSE;
                     #j has dropped, ignore this case
                     if( b_execute_block ){
                         #print( "i=" + i + ",j=" + j );
                         b_flag = eliminateModel( as.scalar(m_err_mean[i,j]),as.scalar(m_err_vars[i,j]),s_rows_number);
                         if(  b_flag == TRUE ){
                             m_active_flag_tmp[1,i] = 1;
                         }
                     }
                 }
             }
             #We mark bit to 1 for selected feature before current loop,
             #and mark bit to 1 also for dropped feature in current loop

             if( sum( ppred( m_active_flag_tmp,1,"!=" ) ) > 1 ){
                 b_selected_morethan_one = TRUE;
             }
             m_col_sums_err = m_iter_err_sum #colSums( m_err );
             i_index_min_LOOCV = as.scalar( rowIndexMin( m_col_sums_err ) );
             d_min_LOOCV = as.scalar( m_col_sums_err[1,i_index_min_LOOCV] );
             i_index_min_LOOCV = i_index_min_LOOCV%% ( i_n_column+1 )

             if( sum( ppred( m_active_flag_tmp,1,"!=" ) ) <= 1 ){
                 b_continue_loop = FALSE;
             }
             if( as.scalar( m_active_flag_tmp[1,i_n_column+1] ) == 1 ){
                 m_this_model_selected_flag = FALSE;
             }
             m_feature_selected_flag = m_active_flag_tmp[,1:i_n_column];
             i_iter = i_iter + 1;
         }
         #select current model, jump out.
         if( i_index_min_LOOCV == 0 ){
             b_continue_main_loop = FALSE;
             print( "Select Current model" );
         }else{
             print( "select feature " + i_index_min_LOOCV + ", change bit value to 1" );
             m_main_selected_flag[1,i_index_min_LOOCV] = 1;
             b_no_feature_selected = FALSE;
         }
         if( sum( m_main_selected_flag - in_m_init_selected ) >= in_i_max_select ){
             #select more than 10
             b_continue_main_loop = FALSE;
         }
         if( sum( m_main_selected_flag ) == i_n_column ){
             #all selected
             b_continue_main_loop = FALSE;
         }
     }
     out_m_selected_feature = m_main_selected_flag;
     out_d_min_LOOCV = d_min_LOOCV;
 }

 # getSelectedFeatureAndK:
 #   Combine k and feature selection algorithm.
 #   Refer to ADD part "8.Combined k and feature selection"
 # Argument:
 # in_m_data                     input matrix as features
 # in_m_data_target              input matrix as target value
 # in_i_is_categorical           1 = category , 0 = continuous
 # in_m_init_selected            S.user, initial selected feature which use specified
 # in_i_max_select               J, max feature selected
 # k_min                         minimun k
 # k_max                         maximun k
 # interval                      block size for BRACE algorithm
 #
 # Reture:
 # out_m_selected_feature        output matrix for feature selection
 # out_i_selected_k              output k value for k selection
 getSelectedFeatureAndK = function (
     matrix[double] in_m_data,
     matrix[double] in_m_data_target,
     integer in_i_is_categorical, # 1 = category , 0 = continuous
     matrix[double] in_m_init_selected,
     integer in_i_max_select,
     integer k_min,
     integer k_max,
     integer interval )
 return(
     matrix[double] out_m_selected_feature,
     integer out_i_selected_k
      )
 {
     m_err = matrix( 0,1,k_max-k_min+1 );
     m_feature = matrix( 0,k_max-k_min+1,ncol( in_m_data ) );

     #Step 1. For each k in [k_min,k_max] ( k_min has default value 1, k_max has default value 100 )
     #in parallel select relevant features using FS+BRACE or schemata search described in Section 7.
     parfor( i in k_min:k_max,check=0 ){
         #print( " ************** " + i  + " ****************" );
         [m_selected_feature,d_err] =
             getSelectedFeature( in_m_data,in_m_data_target,in_i_is_categorical
             ,in_m_init_selected,in_i_max_select,i,interval );

         m_err[1,i] = d_err;
         m_feature[i,] = m_selected_feature;
         #print( "err:" + d_err );
     }

     #Step 2. Output the combination of features and k with the smallest LOOCV error.
     i_min_err_index = as.integer( as.scalar( rowIndexMin( m_err ) ) );
     out_m_selected_feature = m_feature[i_min_err_index,];
     out_i_selected_k = i_min_err_index + k_min - 1;
     #print( "min err k is " + out_i_selected_k );
 }

 getFeatureImportance = function (
     matrix[double] in_m_data,
     matrix[double] in_m_data_target,
     integer in_i_is_categorical, # 1 = category , 0 = continuous
     integer k_value)
 return(
     matrix[double] out_m_feature_importance,
     matrix[double] out_m_norm_feature_importance
      )
 {
     n_feature = ncol(in_m_data)
     n_record = nrow(in_m_data)
     if(n_feature <= 1){
         stop("can't estimate feature importance when ncol = 1")
     }
     m_err = matrix( 0,n_record,n_feature);
     parfor(i_feature in 1:n_feature, check=0){
         m_feature_select = matrix(1,1,n_feature)
         m_feature_select[1,i_feature] = 0;
         m_in_tmp_data = removeEmpty(target=in_m_data,margin="cols", select= m_feature_select)
         m_neighbor_value = getKNeighbor( m_in_tmp_data,m_in_tmp_data,in_m_data_target,k_value);
         m_tmp_err = getErr( m_neighbor_value,in_m_data_target ,in_i_is_categorical );
         m_err[,i_feature] = m_tmp_err
     }
     out_m_feature_importance = colSums( m_err );
     out_m_norm_feature_importance =
         out_m_feature_importance / as.scalar(rowSums(out_m_feature_importance))
 }