src/ports/postgres/modules/regress/robust.sql_in - madlib - Git at Google

 /* ----------------------------------------------------------------------- *//**
  *
  * @file robust.sql_in
  *
  * @brief SQL functions for robust variance linear and logistic regression
  * @date 2012
  *
  * @sa For a brief introduction to robust variance, see \ref grp_robust
  *
  *//* ----------------------------------------------------------------------- */

 m4_include(`SQLCommon.m4')


 /**
 @addtogroup grp_robust

 <div class="toc"><b>Contents</b>
 <ul>
 <li class="level1"><a href="#train_linregr">Robust Linear Regression Training Function</a></li>
 <li class="level1"><a href="#train_logregr">Robust Logistic Regression Training Function</a></li>
 <li class="level1"><a href="#train_mlogregr">Robust Multinomial Logistic Regression Training Function</a></li>
 <li class="level1"><a href="#robust_variance_coxph">Robust Variance Function For Cox Proportional Hazards</a></li>
 <li class="level1"><a href="#examples">Examples</a></li>
 <li class="level1"><a href="#background">Technical Background</a></li>
 <li class="level1"><a href="#literature">Literature</a></li>
 <li class="level1"><a href="#related">Related Topics</a></li>
 </ul>
 </div>

 @brief Calculates Huber-White variance estimates for linear, logistic, and multinomial regression models, and for Cox proportional hazards models.

 The functions in this module calculate robust variance (Huber-White estimates)
 for linear regression, logistic regression, multinomial logistic regression,
 and Cox proportional hazards.
 They are useful in calculating variances in a dataset with potentially noisy
 outliers. The Huber-White implemented here is identical to the "HC0" sandwich
 operator in the R module "sandwich".

 The interfaces for robust linear, logistic, and multinomial logistic
 regression are similar. Each regression type has its own training function.
 The regression results are saved in an output table with small differences,
 depending on the regression type.

 @warning
 Please note that the interface for Cox proportional hazards, unlike the
 interface of other regression methods, accepts an output
 model table produced by \ref coxph_train() function.

 @anchor train_linregr
 @par Robust Linear Regression Training Function

 The \ref robust_variance_linregr() function has the following syntax:
 <pre class="syntax">
 robust_variance_linregr( source_table,
                          out_table,
                          dependent_varname,
                          independent_varname,
                          grouping_cols
                        )
 </pre>
 <dl class="arglist">
   <dt>source_table</dt>
   <dd>VARCHAR. The name of the table containing the training data.</dd>
   <dt>out_table</dt>
   <dd>VARCHAR. Name of the generated table containing the output model. The output table contains the following columns.
     <table class="output">
       <tr>
         <th>coef</th>
         <td>DOUBLE PRECISION[]. Vector of the coefficients of the regression.</td>
       </tr>
       <tr>
         <th>std_err</th>
         <td>DOUBLE PRECISION[]. Vector of the standard error of the coefficients.</td>
       </tr>
       <tr>
         <th>t_stats</th>
         <td>DOUBLE PRECISION[]. Vector of the t-stats of the coefficients.</td>
       </tr>
       <tr>
         <th>p_values</th>
         <td>DOUBLE PRECISION[]. Vector of the p-values of the coefficients.</td>
       </tr>
     </table>

     A summary table named \<out_table\>_summary is also created, which is the same as the summary table created by linregr_train function. Please refer to the documentation for linear regression for details.
   </dd>
   <dt>dependent_varname</dt>
   <dd>VARCHAR. The name of the column containing the dependent variable.</dd>
   <dt>independent_varname</dt>
   <dd>VARCHAR. Expression list to evaluate for the independent variables.
     An intercept variable is not assumed. It is common to provide an explicit
     intercept term by including a single constant 1 term in the independent variable list.
   </dd>
   <dt>grouping_cols (optional)</dt>
   <dd>VARCHAR, default: NULL. An expression list used to group the input dataset into discrete groups,
     running one regression per group. Similar to the SQL "GROUP BY" clause. When
     this value is NULL, no grouping is used and a single result model is
     generated. Default value: NULL.
   </dd>
 </dl>

 @anchor train_logregr
 @par Robust Logistic Regression Training Function

 The \ref robust_variance_logregr() function has the following syntax:
 <pre class="syntax">
 robust_variance_logregr( source_table,
                          out_table,
                          dependent_varname,
                          independent_varname,
                          grouping_cols,
                          max_iter,
                          optimizer,
                          tolerance,
                          verbose_mode
                        )
 </pre>
 <dl class="arglist">
 <dt>source_table</dt>
 <dd>VARCHAR. The name of the table containing the training data.</dd>
 <dt>out_table</dt>
 <dd>VARCHAR. Name of the generated table containing the output model. The output table has the following columns:
   <table class="output">
     <tr>
       <th>coef</th>
       <td>Vector of the coefficients of the regression.</td>
     </tr>
     <tr>
       <th>std_err</th>
       <td>Vector of the standard error of the coefficients.</td>
     </tr>
     <tr>
       <th>z_stats</th>
       <td>Vector of the z-stats of the coefficients.</td>
     </tr>
     <tr>
       <th>p_values</th>
       <td>Vector of the p-values of the coefficients.</td>
     </tr>
   </table>

   A summary table named \<out_table\>_summary is also created, which is the same as the summary table created by logregr_train function. Please refer to the documentation for logistic regression for details.
 </dd>
 <dt>dependent_varname</dt>
 <dd>VARCHAR. The name of the column containing the independent variable.</dd>
 <dt>independent_varname</dt>
 <dd>VARCHAR. Expression list to evaluate for the independent variables.
 An intercept variable is not assumed. It is common to provide an explicit
 intercept term by including a single constant 1 term in the independent variable
 list.</dd>
 <dt>grouping_cols (optional)</dt>
 <dd>VARCHAR, default: NULL. An expression list used to group the input dataset into discrete groups,
 running one regression per group. Similar to the SQL "GROUP BY" clause. When
 this value is NULL, no grouping is used and a single result model is
 generated. </dd>
 <dt>max_iter (optional)</dt>
 <dd>INTEGER, default: 20. The maximum number of iterations that are allowed.</dd>
 <dt>optimizer</dt>
 <dd>VARCHAR, default: 'fista'. Name of optimizer, either 'fista' or 'igd'.</dd>
 <dt>tolerance (optional)</dt>
 <dd>DOUBLE PRECISION, default: 1e-6. The criteria to end iterations. Both the 'fista' and 'igd'
 optimizers compute the average difference between the coefficients of two
 consecutive iterations, and when the difference is smaller than tolerance or the
 iteration number is larger than max_iter, the computation stops.
 </dd>
 <dt>verbose_mode (optional)</dt>
 <dd>BOOLEAN, default: FALSE. Whether the regression fit should print any warning messages. </dd>
 </dl>

 @anchor train_mlogregr
 @par Robust Multinomial Logistic Regression Function

 The \ref robust_variance_mlogregr() function has the following syntax:
 <pre class="syntax">
 robust_variance_mlogregr( source_table,
                           out_table,
                           dependent_varname,
                           independent_varname,
                           ref_category,
                           grouping_cols,
                           optimizer_params,
                           verbose_mode
                         )
 </pre>
 <dl class="arglist">
 <dt>source_table</dt>
 <dd>VARCHAR. The name of the table containing training data, properly qualified.</dd>
 <dt>out_table</dt>
 <dd>VARCHAR. The name of the table where the regression model will be stored.
 The output table has the following columns:
     <table class="output">
       <tr>
         <th>category</th>
         <td>The category.</td>
       </tr>
       <tr>
         <th>ref_category</th>
         <td>The refererence category used for modeling.</td>
       </tr>
       <tr>
         <th>coef</th>
         <td>Vector of the coefficients of the regression.</td>
       </tr>
       <tr>
         <th>std_err</th>
         <td>Vector of the standard error of the coefficients.</td>
       </tr>
       <tr>
         <th>z_stats</th>
         <td>Vector of the z-stats of the coefficients.</td>
       </tr>
       <tr>
         <th>p_values</th>
         <td>Vector of the p-values of the coefficients.</td>
       </tr>
     </table>

     A summary table named \<out_table\>_summary is also created, which is the same as the summary table created by mlogregr_train function. Please refer to the documentation for multinomial logistic regression for details.
 </dd>
 <dt>dependent_varname</dt>
 <dd>VARCHAR. The name of the column containing the dependent variable.</dd>
 <dt>independent_varname</dt>
 <dd>VARCHAR. Expression list to evaluate for the independent variables.
 An intercept variable is not assumed. It is common to provide an explicit
 intercept term by including a single constant 1 term in the independent variable
 list. The <em>independent_varname</em> can be the name of a column that contains an
 array of numeric values. It can also be a string with the format 'ARRAY[1, x1, x2, x3]',
 where <em>x1</em>, <em>x2</em> and <em>x3</em> are each column names.</dd>
 <dt>ref_category (optional)</dt>
 <dd>INTEGER, default: 0. The reference category.</dd>
 <dt>grouping_cols (optional)</dt>
 <dd>VARCHAR, default: NULL. <em>Not currently implemented. Any non-NULL value is ignored.</em> An expression list used to group the input dataset into discrete groups,
 running one regression per group. Similar to the SQL "GROUP BY" clause. When
 this value is NULL, no grouping is used and a single result model is
 generated.</dd>
 <dt>optimizer_params (optional)</dt>
 <dd>TEXT, default: NULL, which uses the default values of optimizer parameters: max_iter=20, optimizer='newton', tolerance=1e-4. It should be a string that contains pairs of 'key=value' separated by commas.</dd>
 <dt>verbose_mode (optional)</dt>
 <dd>BOOLEAN, default FALSE. <em>Not currently implemented.</em> TRUE if the regression fit should print warning messages.</dd>
 </dl>

 @anchor robust_variance_coxph
 @par Robust Variance Function For Cox Proportional Hazards

 The \ref robust_variance_coxph() function has the following syntax:
 <pre class="syntax">
 robust_variance_coxph(model_table, output_table)
 </pre>

 \b Arguments
 <dl class="arglist">
     <dt>model_table</dt>
     <dd>TEXT. The name of the model table, which is exactaly the same as the 'output_table' parameter of coxph_train() function.</dd>
     <dt>output_table</dt>
     <dd>TEXT. The name of the table where the output is saved. It has the following columns:
         <table class="output">
             <tr>
                 <th>coef</th>
                 <td>FLOAT8[]. Vector of the coefficients.</td>
             </tr>
             <tr>
                 <th>loglikelihood</th>
                 <td>FLOAT8. Log-likelihood value of the MLE estimate.</td>
             </tr>
             <tr>
                 <th>std_err</th>
                 <td>FLOAT8[]. Vector of the standard error of the coefficients.</td>
             </tr>
             <tr>
                 <th>robust_se</th>
                 <td>FLOAT8[]. Vector of the robust standard errors of the coefficients.</td>
             </tr>
             <tr>
                 <th>robust_z</th>
                 <td>FLOAT8[]. Vector of the robust z-stats of the coefficients.</td>
             </tr>
             <tr>
                 <th>robust_p</th>
                 <td>FLOAT8[]. Vector of the robust p-values of the coefficients.</td>
             </tr>
             <tr>
                 <th>hessian</th>
                 <td>FLOAT8[]. The Hessian matrix.</td>
             </tr>
         </table>
     </dd>
 </dl>

 @anchor examples
 @examp

 <b> Logistic Regression Example </b>
 -# View online help for the logistic regression training function.
 <pre class="example">
 SELECT madlib.robust_variance_logregr();
 </pre>
 -#  Create the training data table.
 <pre class="example">
 DROP TABLE IF EXISTS patients;
 CREATE TABLE patients (id INTEGER NOT NULL, second_attack INTEGER,
     treatment INTEGER, trait_anxiety INTEGER);
 COPY patients FROM STDIN WITH DELIMITER '|';
   1 |             1 |         1 |            70
   3 |             1 |         1 |            50
   5 |             1 |         0 |            40
   7 |             1 |         0 |            75
   9 |             1 |         0 |            70
  11 |             0 |         1 |            65
  13 |             0 |         1 |            45
  15 |             0 |         1 |            40
  17 |             0 |         0 |            55
  19 |             0 |         0 |            50
   2 |             1 |         1 |            80
   4 |             1 |         0 |            60
   6 |             1 |         0 |            65
   8 |             1 |         0 |            80
  10 |             1 |         0 |            60
  12 |             0 |         1 |            50
  14 |             0 |         1 |            35
  16 |             0 |         1 |            50
  18 |             0 |         0 |            45
  20 |             0 |         0 |            60
 \\.
 </pre>
 -# Run the logistic regression training function and compute the robust logistic variance of the regression:
 <pre class="example">
 DROP TABLE IF EXISTS patients_logregr;
 SELECT madlib.robust_variance_logregr( 'patients',
                                        'patients_logregr',
                                        'second_attack',
                                        'ARRAY[1, treatment, trait_anxiety]'
                                      );
 </pre>
 -# View the regression results.
 <pre class="example">
 \\x on
 Expanded display is on.
 SELECT * FROM patients_logregr;
 </pre>
 Result:
 <pre class="result">
 &nbsp;-[ RECORD 1 ]-------------------------------------------------------
  coef     | {-6.36346994178179,-1.02410605239327,0.119044916668605}
  std_err  | {3.45872062333648,1.1716192578234,0.0534328864185018}
  z_stats  | {-1.83983346294192,-0.874094587943036,2.22793348156809}
  p_values | {0.0657926909738889,0.382066744585541,0.0258849510757339}
 </pre>
 Alternatively, unnest the arrays in the results for easier reading of output.
 <pre class="example">
 \\x off
 SELECT unnest(array['intercept', 'treatment', 'trait_anxiety' ]) as attribute,
        unnest(coef) as coefficient,
        unnest(std_err) as standard_error,
        unnest(z_stats) as z_stat,
        unnest(p_values) as pvalue
 FROM patients_logregr;
 </pre>

 <b> Cox Proportional Hazards Example </b>
 -# View online help for the robust Cox Proportional hazards training method.
 <pre class="example">
 SELECT madlib.robust_variance_coxph();
 </pre>
 -# Create an input data set.
 <pre class="example">
 DROP TABLE IF EXISTS sample_data;
 CREATE TABLE sample_data (
     id INTEGER NOT NULL,
     grp DOUBLE PRECISION,
     wbc DOUBLE PRECISION,
     timedeath INTEGER,
     status BOOLEAN
 );
 COPY sample_data FROM STDIN DELIMITER '|';
   0 |   0 | 1.45 |        35 | t
   1 |   0 | 1.47 |        34 | t
   3 |   0 |  2.2 |        32 | t
   4 |   0 | 1.78 |        25 | t
   5 |   0 | 2.57 |        23 | t
   6 |   0 | 2.32 |        22 | t
   7 |   0 | 2.01 |        20 | t
   8 |   0 | 2.05 |        19 | t
   9 |   0 | 2.16 |        17 | t
  10 |   0 |  3.6 |        16 | t
  11 |   1 |  2.3 |        15 | t
  12 |   0 | 2.88 |        13 | t
  13 |   1 |  1.5 |        12 | t
  14 |   0 |  2.6 |        11 | t
  15 |   0 |  2.7 |        10 | t
  16 |   0 |  2.8 |         9 | t
  17 |   1 | 2.32 |         8 | t
  18 |   0 | 4.43 |         7 | t
  19 |   0 | 2.31 |         6 | t
  20 |   1 | 3.49 |         5 | t
  21 |   1 | 2.42 |         4 | t
  22 |   1 | 4.01 |         3 | t
  23 |   1 | 4.91 |         2 | t
  24 |   1 |    5 |         1 | t
 \\.
 </pre>
 -# Run the Cox regression function.
 <pre class="example">
 SELECT madlib.coxph_train( 'sample_data',
                            'sample_cox',
                            'timedeath',
                            'ARRAY[grp,wbc]',
                            'status'
                          );
 </pre>
 -# Run the Robust Cox regression function.
 <pre class="example">
 SELECT madlib.robust_variance_coxph( 'sample_cox',
                            'sample_robust_cox'
                          );
 </pre>
 -# View the results of the robust Cox regression.
 <pre class="example">
 \\x on
 SELECT * FROM sample_robust_cox;
 </pre>
 Results:
 <pre class="result">
 -[ RECORD 1 ]-+----------------------------------------------------------------------------
 coef          | {2.54407073265105,1.67172094780081}
 loglikelihood | -37.8532498733452
 std_err       | {0.677180599295459,0.387195514577754}
 robust_se     | {0.621095581073685,0.274773521439328}
 robust_z      | {4.09610180811965,6.08399579058399}
 robust_p      | {4.2016521208424e-05,1.17223683104729e-09}
 hessian       | {{2.78043065745405,-2.25848560642669},{-2.25848560642669,8.50472838284265}}
 </pre>


 @anchor background
 @par Technical Background

 When doing regression analysis, we are sometimes interested in the variance of
 the computed coefficients \f$ \boldsymbol  c \f$.  While the built-in regression
 functions provide variance estimates, we may prefer a <i>robust</i> variance
 estimate.

 The robust variance calculation can be expressed in a sandwich formation, which is the form
 \f[
     S( \boldsymbol c) = B( \boldsymbol c) M( \boldsymbol c) B( \boldsymbol c)
 \f]
 where \f$ B( \boldsymbol c)\f$ and \f$ M( \boldsymbol c)\f$ are matrices.  The \f$ B( \boldsymbol c) \f$ matrix,
 also known as the bread, is relatively straight forward, and can be computed as
 \f[
 B( \boldsymbol c) = n\left(\sum_i^n -H(y_i, x_i, \boldsymbol  c) \right)^{-1}
 \f]
 where \f$ H \f$ is the hessian matrix.

 The \f$ M( \boldsymbol c)\f$ matrix has several variations, each with different robustness properties.
 The form implemented here is the Huber-White sandwich operator, which takes the form
 \f[
 M_{H} =\frac{1}{n} \sum_i^n \psi(y_i,x_i,  \boldsymbol c)^T  \psi(y_i,x_i,  \boldsymbol c).
 \f]

 The above method for calculating robust variance (Huber-White estimates) is
 implemented for linear regression, logistic regression, and multinomial
 logistic regression. It is useful in calculating variances in a dataset with
 potentially noisy outliers.  The Huber-White implemented here is identical to
 the "HC0" sandwich operator in the R module "sandwich".

 When multinomial logistic regression is computed before the multinomial robust
 regression, it uses a default reference category of zero and the regression
 coefficients are included in the output table.  The regression coefficients in
 the output are in the same order as the multinomial logistic regression function,
 which is described below. For a problem with \f$ K \f$ dependent variables \f$
 (1, ..., K) \f$ and \f$ J \f$ categories \f$ (0, ..., J-1) \f$, let \f$
 {m_{k,j}} \f$ denote the coefficient for dependent variable \f$ k \f$ and
 category \f$ j \f$ . The output is \f$ {m_{k_1, j_0}, m_{k_1, j_1} \ldots
 m_{k_1, j_{J-1}}, m_{k_2, j_0}, m_{k_2, j_1} \ldots m_{k_K, j_{J-1}}} \f$. The
 order is NOT CONSISTENT with the multinomial regression marginal effect
 calculation with function <em>marginal_mlogregr</em>.  This is deliberate
 because the interfaces of all multinomial regressions (robust, clustered, ...)
 will be moved to match that used in marginal.

 The robust variance of Cox proportional hazards is more complex because
 coeeficients are trained by maximizing a partial log-likelihood.
 Therefore, one cannot directly use the formula for \f$ M( \boldsymbol c) \f$
 as in Huber-White robust estimator. Extra terms are needed. See [4] for
 details.


 @anchor literature
 @literature

 [1] vce(cluster) function in STATA: http://www.stata.com/help.cgi?vce_option

 [2] clustered estimators in R: http://people.su.se/~ma/clustering.pdf

 [3] Achim Zeileis: Object-oriented Computation of Sandwich Estimators.
     Research Report Series / Department of Statistics and Mathematics, 37.
     Department of Statistics and Mathematics, WU Vienna University of Economics
   and Business, Vienna.
     http://cran.r-project.org/web/packages/sandwich/vignettes/sandwich-OOP.pdf

 [4] D. Y. Lin and L . J. Wei, _The Robust Inference for the Cox Proportional Hazards Model_, Journal of
 the American Statistical Association, Vol. 84, No. 408, p.1074 (1989).

 @anchor related
 @par Related Topics
 File robust.sql_in documenting the SQL functions
 File robust_variance_coxph.sql_in documenting more the SQL functions

 @internal
 @sa Namespace \ref madlib::modules::regress
     documenting the implementation in C++
 @sa Namespace robust_coxph
     \ref madlib::modules::stats documenting the implementation in C++
 @endinternal
 */

 --------------------------- Robust Linear Regression ----------------------------------
 DROP TYPE IF EXISTS MADLIB_SCHEMA.robust_linregr_result CASCADE;
 CREATE TYPE MADLIB_SCHEMA.robust_linregr_result AS (
     coef      DOUBLE PRECISION[],
     std_err   DOUBLE PRECISION[],
     t_stats   DOUBLE PRECISION[],
     p_values  DOUBLE PRECISION[]
 );
 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_linregr_transition(
     state   MADLIB_SCHEMA.bytea8,
     y       DOUBLE PRECISION,
     x       DOUBLE PRECISION[],
     coef    DOUBLE PRECISION[])
 RETURNS MADLIB_SCHEMA.bytea8
 AS 'MODULE_PATHNAME'
 LANGUAGE C
 IMMUTABLE STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_linregr_merge_states(
     state1 MADLIB_SCHEMA.bytea8,
     state2 MADLIB_SCHEMA.bytea8)
 RETURNS MADLIB_SCHEMA.bytea8
 AS 'MODULE_PATHNAME'
 LANGUAGE C
 IMMUTABLE STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_linregr_final(
     state MADLIB_SCHEMA.bytea8)
 RETURNS MADLIB_SCHEMA.robust_linregr_result
 AS 'MODULE_PATHNAME'
 LANGUAGE C IMMUTABLE STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------

 CREATE AGGREGATE MADLIB_SCHEMA.robust_linregr(
     /*+ "dependentVariable" */ DOUBLE PRECISION,
     /*+ "independentVariables" */ DOUBLE PRECISION[],
     /*+ "coef" */ DOUBLE PRECISION[]) (
     SFUNC=MADLIB_SCHEMA.robust_linregr_transition,
     STYPE=MADLIB_SCHEMA.bytea8,
     FINALFUNC=MADLIB_SCHEMA.robust_linregr_final,
     m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.robust_linregr_merge_states,')
     INITCOND=''
 );

 --------------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr(
     message VARCHAR
 )
 RETURNS VARCHAR AS $$
 PythonFunction(regress, robust_linear, robust_linregr_help)
 $$ LANGUAGE plpythonu IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr()
 RETURNS VARCHAR AS $$
     SELECT MADLIB_SCHEMA.robust_variance_linregr('');
 $$ LANGUAGE sql IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

 --------------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr(
      source_table               VARCHAR       -- name of input  table
    , out_table                  VARCHAR       -- name of output table
    , dependent_varname          VARCHAR       -- name of dependent variable
    , independent_varname        VARCHAR       -- name of independent variable
    , grouping_cols              VARCHAR       -- grouping columns
    , verbose_mode               BOOLEAN       -- printing warning message or not
   )
 RETURNS VOID AS $$
     PythonFunction(regress, robust_linear, robust_variance_linregr)
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr(
      source_table           VARCHAR
    , out_table              VARCHAR
    , dependent_varname      VARCHAR
    , independent_varname    VARCHAR
    , grouping_cols          VARCHAR
   )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_linregr($1, $2, $3, $4, $5, False);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr(
      source_table           VARCHAR
    , out_table              VARCHAR
    , dependent_varname      VARCHAR
    , independent_varname    VARCHAR
   )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_linregr($1, $2, $3, $4, NULL, False);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 --------------------------- ROBUST LOGISTIC REGRESSION -------------------------

 DROP TYPE IF EXISTS MADLIB_SCHEMA.robust_logregr_result CASCADE;
 CREATE TYPE MADLIB_SCHEMA.robust_logregr_result AS (
     coef DOUBLE PRECISION[],
     std_err DOUBLE PRECISION[],
     z_stats DOUBLE PRECISION[],
     p_values DOUBLE PRECISION[]
 );

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_logregr_step_transition(
     state DOUBLE PRECISION[],
     y BOOLEAN,
     x DOUBLE PRECISION[],
     coef DOUBLE PRECISION[])
 RETURNS DOUBLE PRECISION[]
 AS 'MODULE_PATHNAME'
 LANGUAGE C IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_logregr_step_merge_states(
     state1 DOUBLE PRECISION[],
     state2 DOUBLE PRECISION[])
 RETURNS DOUBLE PRECISION[]
 AS 'MODULE_PATHNAME'
 LANGUAGE C IMMUTABLE STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_logregr_step_final(
     state DOUBLE PRECISION[])
 RETURNS MADLIB_SCHEMA.robust_logregr_result
 AS 'MODULE_PATHNAME'
 LANGUAGE C IMMUTABLE STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------

 CREATE AGGREGATE MADLIB_SCHEMA.robust_logregr(
       /*+ "dependentVariable" */ BOOLEAN,
       /*+ "independentVariables" */  DOUBLE PRECISION[],
     /*+ "coef" */ DOUBLE PRECISION[] ) (

     STYPE=DOUBLE PRECISION[],
     SFUNC=MADLIB_SCHEMA.robust_logregr_step_transition,
     m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.robust_logregr_step_merge_states,')
     FINALFUNC=MADLIB_SCHEMA.robust_logregr_step_final,
     INITCOND='{0,0,0,0,0.0}'
 );

 --------------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr(
     message VARCHAR
 )
 RETURNS VARCHAR AS $$
 PythonFunction(regress, robust_logistic, robust_logregr_help)
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr()
 RETURNS VARCHAR AS $$
 BEGIN
   RETURN MADLIB_SCHEMA.robust_variance_logregr('');
 END;
 $$ LANGUAGE plpgsql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
 /**
  *@brief The robust logistic regression function.
  *
  * @param source_table String identifying the input table
  * @param out_table String identifying the output table to be created
  * @param dependent_varname Column containing the dependent variable
  * @param independent_varname Column containing the array of independent variables
  * @param grouping_cols Columns to group by.
  * @param max_iter Integer identifying the maximum iterations used by the logistic regression solver.  Default is 20.
  * @param optimizer String identifying the optimizer used in the logistic regression.  See the documentation in the logistic regression for the available options.  Default is irls.
  * @param tolerance Float identifying the tolerance of the logistic regression optimizer. Default is 1e-4.
  * @param verbose_mode Boolean specifying if the regression fit should print any warning messages.  Default is false.
  * @par
  * To include an intercept in the model, set one coordinate in the
  * <tt>independent_varname</tt> array to 1.
  *
  * @return A composite value:
  *  - <tt>std_err FLOAT8[]</tt> - Array of huber-white standard errors,
  *    \f$ \mathit{se}(c_1), \dots, \mathit{se}(c_k) \f$
  *  - <tt>t_stats FLOAT8[]</tt> - Array of t-statistics, \f$ \boldsymbol t \f$
  *  - <tt>p_values FLOAT8[]</tt> - Array of p-values, \f$ \boldsymbol p \f$
  *
  * @usage
  * For function summary information. Run
  * sql> select robust_variance_logregr('help');
  * OR
  * sql> select robust_variance_logregr();
  * OR
  * sql> select robust_variance_logregr('?');
  * For function usage information. Run
  * sql> select robust_variance_logregr('usage');
  *  - Compute the coefficients, and the get the robust diagnostic statistics:
  *  <pre>
  *  select robust_variance_logregr(source_table, out_table, regression_type, dependentVariable, independentVariables, NULL );
  * </pre>
  *
  *  - If the coefficients are already known, they can be provided directly
  *    <pre>select robust_variance_logregr(source_table, out_table, regression_type, dependentVariable, independentVariables, coef );
  </pre>
  */

 --------------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr(
     source_table               VARCHAR,   -- name of input  table
     out_table                  VARCHAR,   -- name of output table
     dependent_varname          VARCHAR,   -- name of dependent variable
     independent_varname        VARCHAR,   -- name of independent variable
     grouping_cols              VARCHAR,   -- grouping columns
     max_iter                   INTEGER,   -- maximum of iterations in the optimizer
     optimizer                  VARCHAR,   -- the optimizer used
     tolerance                  DOUBLE PRECISION,
     verbose_mode               BOOLEAN
   )
 RETURNS VOID AS $$
     PythonFunction(regress, robust_logistic, robust_variance_logregr)
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr(
     source_table            VARCHAR,
     out_table               VARCHAR,
     dependent_variable      VARCHAR,
     independent_variable    VARCHAR,
     grouping_cols           VARCHAR,
     max_iter                INTEGER,
     optimizer               VARCHAR,
     tolerance               DOUBLE PRECISION
   )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_logregr(
         $1, $2, $3, $4, $5, $6, $7, $8, FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr(
     source_table            VARCHAR,
     out_table               VARCHAR,
     dependent_variable      VARCHAR,
     independent_variable    VARCHAR,
     grouping_cols           VARCHAR,
     max_iter                INTEGER,
     optimizer               VARCHAR
   )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_logregr(
         $1, $2, $3, $4, $5, $6, $7, 1e-4, FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr(
     source_table            VARCHAR,
     out_table               VARCHAR,
     dependent_variable      VARCHAR,
     independent_variable    VARCHAR,
     grouping_cols           VARCHAR,
     max_iter                INTEGER
   )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_logregr(
         $1, $2, $3, $4, $5, $6, 'irls', 1e-4, FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr(
     source_table            VARCHAR,
     out_table               VARCHAR,
     dependent_variable      VARCHAR,
     independent_variable    VARCHAR,
     grouping_cols           VARCHAR
   )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_logregr(
         $1, $2, $3, $4, $5, 20, 'irls', 1e-4, FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr(
     source_table            VARCHAR,
     out_table               VARCHAR,
     dependent_variable      VARCHAR,
     independent_variable    VARCHAR
   )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_logregr(
         $1, $2, $3, $4, NULL, 20, 'irls', 1e-4, FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------- ROBUST MULTINOMIAL LOGISTIC REGRESSION ------------------------

 DROP TYPE IF EXISTS MADLIB_SCHEMA.robust_mlogregr_result CASCADE;
 CREATE TYPE MADLIB_SCHEMA.robust_mlogregr_result AS (
     ref_category    INTEGER,
     coef            DOUBLE PRECISION[],
     std_err         DOUBLE PRECISION[],
     z_stats         DOUBLE PRECISION[],
     p_values        DOUBLE PRECISION[]
 );

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_robust_step_transition
 (
     state           DOUBLE PRECISION[],
     y               INTEGER,
     numCategories   INTEGER,
     ref_category    INTEGER,
     x               DOUBLE PRECISION[],
     coef            DOUBLE PRECISION[]
 )
 RETURNS DOUBLE PRECISION[]
 AS 'MODULE_PATHNAME'
 LANGUAGE C IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_robust_step_merge_states
 (
     state1  DOUBLE PRECISION[],
     state2  DOUBLE PRECISION[]
 )
 RETURNS DOUBLE PRECISION[]
 AS 'MODULE_PATHNAME'
 LANGUAGE C IMMUTABLE STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_robust_step_final
 (
     state DOUBLE PRECISION[]
 )
 RETURNS MADLIB_SCHEMA.robust_mlogregr_result
 AS 'MODULE_PATHNAME'
 LANGUAGE C IMMUTABLE STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------

 CREATE AGGREGATE MADLIB_SCHEMA.robust_mlogregr(
     /*+ "dependentVariable" */      INTEGER,
      /*+ "numCategories" */         INTEGER,
     /*+ "ref_category" */           INTEGER,
     /*+ "independentVariables" */   DOUBLE PRECISION[],
    /*+ "coef" */                    DOUBLE PRECISION[]
 )
 (
     STYPE=DOUBLE PRECISION[],
     SFUNC=MADLIB_SCHEMA.mlogregr_robust_step_transition,
     m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.mlogregr_robust_step_merge_states,')
     FINALFUNC=MADLIB_SCHEMA.mlogregr_robust_step_final,
     INITCOND='{0,0,0,0,0}'
 );

 --------------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr(
      message VARCHAR
 )
 RETURNS VARCHAR AS $$
 PythonFunction(regress, robust_mlogistic, robust_variance_mlogregr_help)
 $$ LANGUAGE plpythonu IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr()
 RETURNS VARCHAR AS $$
     SELECT MADLIB_SCHEMA.robust_variance_mlogregr('');
 $$ LANGUAGE sql IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

 -------------------- Robust Multinomial Logistic Regression --------------------
 /**
 -- NOTICE ---------------------------------------------------------------
 -- All functions calling other modeling functions should be in the format:
 --      accept a model table created by the underlying modeling function
 --      and extract all parameters from the model table and summary table
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr(
      model_table    VARCHAR       -- name of the mlogregr model table
    , out_table      VARCHAR       -- name of output table
   )
 RETURNS VOID AS $$
 PythonFunction(regress, robust_mlogistic, robust_variance_mlogregr_new)
 $$ LANGUAGE plpythonu;
 -- END OF NOTICE ---------------------------------------------------------------
 */
 --------------------------------------------------------------------------------

 --------------------------------------------------------------------------------

 /**
  * @brief Compute robust regression diagnostic statistics for multinomial logistic regression.
  *
  * @param source_table name of input table, VARCHAR
  * @param out_table  name of output table, VARCHAR
  * @param dependent_varname dependent variable, VARCHAR
  * @param independent_varname independent variables, VARCHAR
  * @param ref_category Integer specifying the reference category. Default is 0.
  * @param grouping_cols  grouping variables, VARCHAR. Default is NULL. Currently a placeholder.
  * @param max_iter  Integer identifying the maximum iterations used by the logistic regression solver.  Default is 20.
  * @param optimizer  String identifying the optimizer used in the multinomial logistic regression.  See the documentation in the multinomial logistic regression for the available options.  Default is 'irls'.
  * @param tolerance Float identifying the tolerance of the multinomial logistic regression optimizer. Default is 1e-4.
  * @param verbose_mode Boolean specifying if the multinomial logistic regression solver should print any warnings.  Currently a placeholder.
  *
  *
  * @par
  * To include an intercept in the model, set one coordinate in the
  * <tt>independentVariables</tt> array to 1.
  *
  *  @usage
  * <pre>
  * SELECT  madlib.robust_variance_mlogregr(
  *     <em>'source_table'</em>,        -- name of input table, VARCHAR
  *     <em>'out_table'</em>,           -- name of output table, VARCHAR
  *     <em>'dependent_varname'</em>,   -- dependent variable, VARCHAR
  *     <em>'independent_varname'</em>, -- independent variables, VARCHAR
  *     <em>ref_category</em>,        -- [OPTIONAL] Integer specifying the reference category. Default is 0.
  *     <em>'grouping_cols'</em>,        -- [OPTIONAL] grouping variables, VARCHAR. Default is NULL.
  *     <em>max_iter</em>,           -- [OPTIONAL] Integer identifying the maximum iterations used by the logistic regression solver.  Default is 20.
  *     <em>'optimizer'</em>,            -- [OPTIONAL] String identifying the optimizer used in the multinomial logistic regression.  See the documentation in the multinomial logistic regression for the available options.  Default is irls.
  *  <em>tolerance</em>,         -- [OPTIONAL] Float identifying the tolerance of the multinomial logistic regression optimizer. Default is 1e-4.
  *  <em>verbose_mode</em>        -- [OPTIONAL] Boolean specifying if the regression fit should print any warning messages.  Default is false.
  * );
  * </pre>
  *
  * @return A composite value:
  *  - <tt>ref_category INTEGER</tt> - The reference category
  *  - <tt>coef FLOAT8[] </tt> - The coefficients for the regression
  *  - <tt>std_err FLOAT8[]</tt> - Array of huber-white standard errors,
  *  - <tt>z_stats FLOAT8[]</tt> - Array of Wald z-statistics,
  *  - <tt>p_values FLOAT8[]</tt> - Array of p-values,
  *
  */
  CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr(
   source_table        VARCHAR,            -- name of input table
   out_table           VARCHAR,            -- name of output table
   dependent_varname   VARCHAR,            -- name of dependent variable
   independent_varname VARCHAR,            -- name of independent variable
   ref_category        INTEGER,            -- reference category (default 0)
   grouping_cols       VARCHAR,            -- grouping columns (PLACEHOLDER) (default NULL)
   optimizer_params    VARCHAR,            -- a comma-separated string with optimizer parameters
   verbose_mode        BOOLEAN             -- PLACEHOLDER (default False)
 )
 RETURNS VOID AS $$
 PythonFunction(regress, robust_mlogistic, robust_variance_mlogregr)
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr(
     source_table        VARCHAR,
     out_table           VARCHAR,
     dependent_varname   VARCHAR,
     independent_varname VARCHAR,
     ref_category        INTEGER,
     grouping_cols       VARCHAR,
     optimizer_params    VARCHAR
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_mlogregr(
         $1, $2, $3, $4, $5, $6, $7, FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr(
     source_table        VARCHAR,
     out_table           VARCHAR,
     dependent_varname   VARCHAR,
     independent_varname VARCHAR,
     ref_category        INTEGER,
     grouping_cols       VARCHAR
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6,
          'max_iter=20, optimizer=irls, tolerance=1e-4', FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr(
     source_table        VARCHAR,
     out_table           VARCHAR,
     dependent_varname   VARCHAR,
     independent_varname VARCHAR,
     ref_category        INTEGER
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, NULL,
                 'max_iter=20, optimizer=irls, tolerance=1e-4', FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr(
     source_table        VARCHAR,
     out_table           VARCHAR,
     dependent_varname   VARCHAR,
     independent_varname VARCHAR
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, 0, NULL,
                 'max_iter=20, optimizer=irls, tolerance=1e-4', FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 --------------------------------------------------------------------------------

 -- DEPRECATED NOTICE -----------------------------------------------------------
 -- The below functions has been deprecated and should be removed in next major
 --    version update

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr(
   source_table        VARCHAR,            -- name of input table
   out_table           VARCHAR,            -- name of output table
   dependent_varname   VARCHAR,            -- name of dependent variable
   independent_varname VARCHAR,            -- name of independent variable
   ref_category        INTEGER,            -- reference category (default 0)
   grouping_cols       VARCHAR,            -- grouping columns (PLACEHOLDER) (default NULL)
   max_iter            INTEGER,            -- max number of itertions (default 20)
   optimizer           VARCHAR,            -- optimizer ['irls', 'newton'] (default 'irls')
   tolerance           DOUBLE PRECISION,   -- tolerance (default 1e-4)
   verbose_mode        BOOLEAN             -- printing warning message or not (default False)
 )
 RETURNS VOID AS $$
 BEGIN
     RAISE WARNING $sql$This function has been deprecated.
         Please run "SELECT MADLIB_SCHEMA.robust_variance_mlogregr('usage');"
         to get the latest function usage.$sql$;
     PERFORM MADLIB_SCHEMA.robust_variance_mlogregr(
         $1, $2, $3, $4, $5, $6, 'max_iter=' || cast($7 as text) ||
                                 ', optimizer=' || $8 ||
                                 ', tolerance=' || cast($9 as text),
         $10);
 END;
 $$ LANGUAGE PLPGSQL VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr(
     source_table        VARCHAR,
     out_table           VARCHAR,
     dependent_varname   VARCHAR,
     independent_varname VARCHAR,
     ref_category        INTEGER,
     grouping_cols       VARCHAR,
     max_iter            INTEGER,
     optimizer           VARCHAR,
     tolerance           DOUBLE PRECISION
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6, $7,
     $8, $9, FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr(
     source_table        VARCHAR,
     out_table           VARCHAR,
     dependent_varname   VARCHAR,
     independent_varname VARCHAR,
     ref_category        INTEGER,
     grouping_cols       VARCHAR,
     max_iter            INTEGER,
     optimizer           VARCHAR
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6, $7,
     $8, 1e-4, FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr(
     source_table        VARCHAR,
     out_table           VARCHAR,
     dependent_varname   VARCHAR,
     independent_varname VARCHAR,
     ref_category        INTEGER,
     grouping_cols       VARCHAR,
     max_iter            INTEGER
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6, $7,
     'irls', 1e-4, FALSE);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
 -- END OF DEPRECATED NOTICE -----------------------------------------------------------