[DOC] Documentation for builtin mice function Closes #953.

commit: f4780d332de1aba8b05c78f9e6dd50ddc61e28b6 [log] [tgz]
author: Parul Damalu <poco7p@gmail.com> Mon Jun 08 14:49:10 2020 +0530
committer: Janardhan Pulivarthi <j143@protonmail.com> Mon Jun 08 14:52:14 2020 +0530
tree: 3c8f3223a8e1e05710b4b04dd9b1144685892a5e
parent: 8aed1e98f616b3e65ffea4f2d1b54fce6c2ab3e5 [diff]
diff --git a/dev/docs/builtins-reference.md b/dev/docs/builtins-reference.md
index ecbdf13..afc501f 100644
--- a/dev/docs/builtins-reference.md
+++ b/dev/docs/builtins-reference.md

@@ -30,6 +30,7 @@
     * [`lmDS`-Function](#lmds-function)
     * [`lmCG`-Function](#lmcg-function)
     * [`lmpredict`-Function](#lmpredict-function)
+    * [`mice`-Function](#mice-function)
     * [`scale`-Function](#scale-function)
     * [`sigmoid`-Function](#sigmoid-function)
     * [`steplm`-Function](#steplm-function)
@@ -353,6 +354,37 @@
 yp = lmpredict(X, w)
 ```
 
+## `mice`-Function
+
+The `mice`-function implements Multiple Imputation using Chained Equations (MICE) for nominal data.
+
+### Usage
+```r
+mice(F, cMask, iter, complete, verbose)
+```
+
+### Arguments
+| Name     | Type           | Default  | Description |
+| :------- | :------------- | -------- | :---------- |
+| F        | Frame[String]  | required | Data Frame with one-dimensional row matrix with N columns where N>1. |
+| cMask    | Matrix[Double] | required | 0/1 row vector for identifying numeric (0) and categorical features (1) with one-dimensional row matrix with column = ncol(F). |
+| iter     | Integer        | `3`      | Number of iteration for multiple imputations. |
+| complete | Integer        | `3`      | A complete dataset generated though a specific iteration. |
+| verbose  | Boolean        | `FALSE`  | Boolean value. |
+
+### Returns
+| Type           | Description |
+| :------------- | :---------- |
+| Frame[String]  | imputed dataset. |
+| Frame[String]  | A complete dataset generated though a specific iteration. |
+
+### Example
+```r
+F = as.frame(matrix("4 3 2 8 7 8 5", rows=1, cols=7))
+cMask = round(rand(rows=1,cols=ncol(F),min=0,max=1))
+[dataset, singleSet] = mice(F, cMask, iter = 3, complete = 3, verbose = FALSE)
+```
+
 ## `scale`-Function
 
 The scale function is a generic function whose default method centers or scales the column of a numeric matrix.

diff --git a/scripts/builtin/mice.dml b/scripts/builtin/mice.dml
index b00d542..a06a357 100644
--- a/scripts/builtin/mice.dml
+++ b/scripts/builtin/mice.dml

@@ -40,13 +40,13 @@
 # singleSet             Double   ---        A complete dataset generated though a specific iteration
 
 # Assumption missing value are represented with empty string i.e ",," in csv file  
-# variables with suffix n are storing continous/numeric data and variables with suffix c are storing categorical data
+# variables with suffix n are storing continuous/numeric data and variables with suffix c are storing categorical data
 s_mice= function(Frame[String] F, Matrix[Double] cMask, Integer iter = 3, Integer complete = 3, Boolean verbose = FALSE)
 return(Frame[String] dataset, Frame[String] singleSet)
 {
 
   if(ncol(F) == 1)
-    stop("invalid aregument: can not apply mice on single column")
+    stop("invalid argument: can not apply mice on single column")
     
   if(complete > iter)
     complete = iter
@@ -78,7 +78,7 @@
   
   XO = replace(target=X, pattern=NaN, replacement=0);
 
-  # remove categorical features and impute continous features with mean
+  # remove categorical features and impute continuous features with mean
   eX_n = removeEmpty(target=X, margin="cols", select=(cMask==0))
   col_n = ncol(eX_n);
   # storing the mask/address of missing values
@@ -150,7 +150,7 @@
   {
     Mask_Filled_n = Mask_n;
     Mask_Filled_c = Mask_c
-    in_n = 1; in_c = 1; i=1; j=1; # varibales for index selection
+    in_n = 1; in_c = 1; i=1; j=1; # variables for index selection
     while(i <= ncol(dX))
     {
       if(as.scalar(dXMask[1,i]) == 0)
@@ -197,7 +197,7 @@
         test_X =  removeEmpty(target = slice2, margin = "cols", select = selX);
         test_Y = slice2a[,in_c]
        
-        # train clasification model
+        # train classification model
         beta = multiLogReg(X=train_X, Y=train_Y, icpt = 1, tol = 0.00000001, reg = 0.001, maxi = 100, maxii=0, verbose=FALSE)
         # predicting missing values 
         [prob,pred,acc] = multiLogRegPredict(X=test_X, B=beta, Y = test_Y)
commit	f4780d332de1aba8b05c78f9e6dd50ddc61e28b6	[log] [tgz]
author	Parul Damalu <poco7p@gmail.com>	Mon Jun 08 14:49:10 2020 +0530
committer	Janardhan Pulivarthi <j143@protonmail.com>	Mon Jun 08 14:52:14 2020 +0530
tree	3c8f3223a8e1e05710b4b04dd9b1144685892a5e
parent	8aed1e98f616b3e65ffea4f2d1b54fce6c2ab3e5 [diff]