| /* ----------------------------------------------------------------------- *//** |
| * |
| * @file robust.sql_in |
| * |
| * @brief SQL functions for robust variance linear and logistic regression |
| * @date 2012 |
| * |
| * @sa For a brief introduction to robust variance, see \ref grp_robust |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| |
| |
| /** |
| @addtogroup grp_robust |
| |
| <div class="toc"><b>Contents</b> |
| <ul> |
| <li class="level1"><a href="#train_linregr">Robust Linear Regression Training Function</a></li> |
| <li class="level1"><a href="#train_logregr">Robust Logistic Regression Training Function</a></li> |
| <li class="level1"><a href="#train_mlogregr">Robust Multinomial Logistic Regression Training Function</a></li> |
| <li class="level1"><a href="#robust_variance_coxph">Robust Variance Function For Cox Proportional Hazards</a></li> |
| <li class="level1"><a href="#examples">Examples</a></li> |
| <li class="level1"><a href="#background">Technical Background</a></li> |
| <li class="level1"><a href="#literature">Literature</a></li> |
| <li class="level1"><a href="#related">Related Topics</a></li> |
| </ul> |
| </div> |
| |
| @brief Calculates Huber-White variance estimates for linear, logistic, and multinomial regression models, and for Cox proportional hazards models. |
| |
| The functions in this module calculate robust variance (Huber-White estimates) |
| for linear regression, logistic regression, multinomial logistic regression, |
| and Cox proportional hazards. |
| They are useful in calculating variances in a dataset with potentially noisy |
| outliers. The Huber-White implemented here is identical to the "HC0" sandwich |
| operator in the R module "sandwich". |
| |
| The interfaces for robust linear, logistic, and multinomial logistic |
| regression are similar. Each regression type has its own training function. |
| The regression results are saved in an output table with small differences, |
| depending on the regression type. |
| |
| @warning |
| Please note that the interface for Cox proportional hazards, unlike the |
| interface of other regression methods, accepts an output |
| model table produced by \ref coxph_train() function. |
| |
| @anchor train_linregr |
| @par Robust Linear Regression Training Function |
| |
| The \ref robust_variance_linregr() function has the following syntax: |
| <pre class="syntax"> |
| robust_variance_linregr( source_table, |
| out_table, |
| dependent_varname, |
| independent_varname, |
| grouping_cols |
| ) |
| </pre> |
| <dl class="arglist"> |
| <dt>source_table</dt> |
| <dd>VARCHAR. The name of the table containing the training data.</dd> |
| <dt>out_table</dt> |
| <dd>VARCHAR. Name of the generated table containing the output model. The output table contains the following columns. |
| <table class="output"> |
| <tr> |
| <th>coef</th> |
| <td>DOUBLE PRECISION[]. Vector of the coefficients of the regression.</td> |
| </tr> |
| <tr> |
| <th>std_err</th> |
| <td>DOUBLE PRECISION[]. Vector of the standard error of the coefficients.</td> |
| </tr> |
| <tr> |
| <th>t_stats</th> |
| <td>DOUBLE PRECISION[]. Vector of the t-stats of the coefficients.</td> |
| </tr> |
| <tr> |
| <th>p_values</th> |
| <td>DOUBLE PRECISION[]. Vector of the p-values of the coefficients.</td> |
| </tr> |
| </table> |
| |
| A summary table named \<out_table\>_summary is also created, which is the same as the summary table created by linregr_train function. Please refer to the documentation for linear regression for details. |
| </dd> |
| <dt>dependent_varname</dt> |
| <dd>VARCHAR. The name of the column containing the dependent variable.</dd> |
| <dt>independent_varname</dt> |
| <dd>VARCHAR. Expression list to evaluate for the independent variables. |
| An intercept variable is not assumed. It is common to provide an explicit |
| intercept term by including a single constant 1 term in the independent variable list. |
| </dd> |
| <dt>grouping_cols (optional)</dt> |
| <dd>VARCHAR, default: NULL. An expression list used to group the input dataset into discrete groups, |
| running one regression per group. Similar to the SQL "GROUP BY" clause. When |
| this value is NULL, no grouping is used and a single result model is |
| generated. Default value: NULL. |
| </dd> |
| </dl> |
| |
| @anchor train_logregr |
| @par Robust Logistic Regression Training Function |
| |
| The \ref robust_variance_logregr() function has the following syntax: |
| <pre class="syntax"> |
| robust_variance_logregr( source_table, |
| out_table, |
| dependent_varname, |
| independent_varname, |
| grouping_cols, |
| max_iter, |
| optimizer, |
| tolerance, |
| verbose_mode |
| ) |
| </pre> |
| <dl class="arglist"> |
| <dt>source_table</dt> |
| <dd>VARCHAR. The name of the table containing the training data.</dd> |
| <dt>out_table</dt> |
| <dd>VARCHAR. Name of the generated table containing the output model. The output table has the following columns: |
| <table class="output"> |
| <tr> |
| <th>coef</th> |
| <td>Vector of the coefficients of the regression.</td> |
| </tr> |
| <tr> |
| <th>std_err</th> |
| <td>Vector of the standard error of the coefficients.</td> |
| </tr> |
| <tr> |
| <th>z_stats</th> |
| <td>Vector of the z-stats of the coefficients.</td> |
| </tr> |
| <tr> |
| <th>p_values</th> |
| <td>Vector of the p-values of the coefficients.</td> |
| </tr> |
| </table> |
| |
| A summary table named \<out_table\>_summary is also created, which is the same as the summary table created by logregr_train function. Please refer to the documentation for logistic regression for details. |
| </dd> |
| <dt>dependent_varname</dt> |
| <dd>VARCHAR. The name of the column containing the independent variable.</dd> |
| <dt>independent_varname</dt> |
| <dd>VARCHAR. Expression list to evaluate for the independent variables. |
| An intercept variable is not assumed. It is common to provide an explicit |
| intercept term by including a single constant 1 term in the independent variable |
| list.</dd> |
| <dt>grouping_cols (optional)</dt> |
| <dd>VARCHAR, default: NULL. An expression list used to group the input dataset into discrete groups, |
| running one regression per group. Similar to the SQL "GROUP BY" clause. When |
| this value is NULL, no grouping is used and a single result model is |
| generated. </dd> |
| <dt>max_iter (optional)</dt> |
| <dd>INTEGER, default: 20. The maximum number of iterations that are allowed.</dd> |
| <dt>optimizer</dt> |
| <dd>VARCHAR, default: 'fista'. Name of optimizer, either 'fista' or 'igd'.</dd> |
| <dt>tolerance (optional)</dt> |
| <dd>DOUBLE PRECISION, default: 1e-6. The criteria to end iterations. Both the 'fista' and 'igd' |
| optimizers compute the average difference between the coefficients of two |
| consecutive iterations, and when the difference is smaller than tolerance or the |
| iteration number is larger than max_iter, the computation stops. |
| </dd> |
| <dt>verbose_mode (optional)</dt> |
| <dd>BOOLEAN, default: FALSE. Whether the regression fit should print any warning messages. </dd> |
| </dl> |
| |
| @anchor train_mlogregr |
| @par Robust Multinomial Logistic Regression Function |
| |
| The \ref robust_variance_mlogregr() function has the following syntax: |
| <pre class="syntax"> |
| robust_variance_mlogregr( source_table, |
| out_table, |
| dependent_varname, |
| independent_varname, |
| ref_category, |
| grouping_cols, |
| optimizer_params, |
| verbose_mode |
| ) |
| </pre> |
| <dl class="arglist"> |
| <dt>source_table</dt> |
| <dd>VARCHAR. The name of the table containing training data, properly qualified.</dd> |
| <dt>out_table</dt> |
| <dd>VARCHAR. The name of the table where the regression model will be stored. |
| The output table has the following columns: |
| <table class="output"> |
| <tr> |
| <th>category</th> |
| <td>The category.</td> |
| </tr> |
| <tr> |
| <th>ref_category</th> |
| <td>The refererence category used for modeling.</td> |
| </tr> |
| <tr> |
| <th>coef</th> |
| <td>Vector of the coefficients of the regression.</td> |
| </tr> |
| <tr> |
| <th>std_err</th> |
| <td>Vector of the standard error of the coefficients.</td> |
| </tr> |
| <tr> |
| <th>z_stats</th> |
| <td>Vector of the z-stats of the coefficients.</td> |
| </tr> |
| <tr> |
| <th>p_values</th> |
| <td>Vector of the p-values of the coefficients.</td> |
| </tr> |
| </table> |
| |
| A summary table named \<out_table\>_summary is also created, which is the same as the summary table created by mlogregr_train function. Please refer to the documentation for multinomial logistic regression for details. |
| </dd> |
| <dt>dependent_varname</dt> |
| <dd>VARCHAR. The name of the column containing the dependent variable.</dd> |
| <dt>independent_varname</dt> |
| <dd>VARCHAR. Expression list to evaluate for the independent variables. |
| An intercept variable is not assumed. It is common to provide an explicit |
| intercept term by including a single constant 1 term in the independent variable |
| list. The <em>independent_varname</em> can be the name of a column that contains an |
| array of numeric values. It can also be a string with the format 'ARRAY[1, x1, x2, x3]', |
| where <em>x1</em>, <em>x2</em> and <em>x3</em> are each column names.</dd> |
| <dt>ref_category (optional)</dt> |
| <dd>INTEGER, default: 0. The reference category.</dd> |
| <dt>grouping_cols (optional)</dt> |
| <dd>VARCHAR, default: NULL. <em>Not currently implemented. Any non-NULL value is ignored.</em> An expression list used to group the input dataset into discrete groups, |
| running one regression per group. Similar to the SQL "GROUP BY" clause. When |
| this value is NULL, no grouping is used and a single result model is |
| generated.</dd> |
| <dt>optimizer_params (optional)</dt> |
| <dd>TEXT, default: NULL, which uses the default values of optimizer parameters: max_iter=20, optimizer='newton', tolerance=1e-4. It should be a string that contains pairs of 'key=value' separated by commas.</dd> |
| <dt>verbose_mode (optional)</dt> |
| <dd>BOOLEAN, default FALSE. <em>Not currently implemented.</em> TRUE if the regression fit should print warning messages.</dd> |
| </dl> |
| |
| @anchor robust_variance_coxph |
| @par Robust Variance Function For Cox Proportional Hazards |
| |
| The \ref robust_variance_coxph() function has the following syntax: |
| <pre class="syntax"> |
| robust_variance_coxph(model_table, output_table) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>model_table</dt> |
| <dd>TEXT. The name of the model table, which is exactaly the same as the 'output_table' parameter of coxph_train() function.</dd> |
| <dt>output_table</dt> |
| <dd>TEXT. The name of the table where the output is saved. It has the following columns: |
| <table class="output"> |
| <tr> |
| <th>coef</th> |
| <td>FLOAT8[]. Vector of the coefficients.</td> |
| </tr> |
| <tr> |
| <th>loglikelihood</th> |
| <td>FLOAT8. Log-likelihood value of the MLE estimate.</td> |
| </tr> |
| <tr> |
| <th>std_err</th> |
| <td>FLOAT8[]. Vector of the standard error of the coefficients.</td> |
| </tr> |
| <tr> |
| <th>robust_se</th> |
| <td>FLOAT8[]. Vector of the robust standard errors of the coefficients.</td> |
| </tr> |
| <tr> |
| <th>robust_z</th> |
| <td>FLOAT8[]. Vector of the robust z-stats of the coefficients.</td> |
| </tr> |
| <tr> |
| <th>robust_p</th> |
| <td>FLOAT8[]. Vector of the robust p-values of the coefficients.</td> |
| </tr> |
| <tr> |
| <th>hessian</th> |
| <td>FLOAT8[]. The Hessian matrix.</td> |
| </tr> |
| </table> |
| </dd> |
| </dl> |
| |
| @anchor examples |
| @examp |
| |
| <b> Logistic Regression Example </b> |
| -# View online help for the logistic regression training function. |
| <pre class="example"> |
| SELECT madlib.robust_variance_logregr(); |
| </pre> |
| -# Create the training data table. |
| <pre class="example"> |
| DROP TABLE IF EXISTS patients; |
| CREATE TABLE patients (id INTEGER NOT NULL, second_attack INTEGER, |
| treatment INTEGER, trait_anxiety INTEGER); |
| COPY patients FROM STDIN WITH DELIMITER '|'; |
| 1 | 1 | 1 | 70 |
| 3 | 1 | 1 | 50 |
| 5 | 1 | 0 | 40 |
| 7 | 1 | 0 | 75 |
| 9 | 1 | 0 | 70 |
| 11 | 0 | 1 | 65 |
| 13 | 0 | 1 | 45 |
| 15 | 0 | 1 | 40 |
| 17 | 0 | 0 | 55 |
| 19 | 0 | 0 | 50 |
| 2 | 1 | 1 | 80 |
| 4 | 1 | 0 | 60 |
| 6 | 1 | 0 | 65 |
| 8 | 1 | 0 | 80 |
| 10 | 1 | 0 | 60 |
| 12 | 0 | 1 | 50 |
| 14 | 0 | 1 | 35 |
| 16 | 0 | 1 | 50 |
| 18 | 0 | 0 | 45 |
| 20 | 0 | 0 | 60 |
| \\. |
| </pre> |
| -# Run the logistic regression training function and compute the robust logistic variance of the regression: |
| <pre class="example"> |
| DROP TABLE IF EXISTS patients_logregr; |
| SELECT madlib.robust_variance_logregr( 'patients', |
| 'patients_logregr', |
| 'second_attack', |
| 'ARRAY[1, treatment, trait_anxiety]' |
| ); |
| </pre> |
| -# View the regression results. |
| <pre class="example"> |
| \\x on |
| Expanded display is on. |
| SELECT * FROM patients_logregr; |
| </pre> |
| Result: |
| <pre class="result"> |
| -[ RECORD 1 ]------------------------------------------------------- |
| coef | {-6.36346994178179,-1.02410605239327,0.119044916668605} |
| std_err | {3.45872062333648,1.1716192578234,0.0534328864185018} |
| z_stats | {-1.83983346294192,-0.874094587943036,2.22793348156809} |
| p_values | {0.0657926909738889,0.382066744585541,0.0258849510757339} |
| </pre> |
| Alternatively, unnest the arrays in the results for easier reading of output. |
| <pre class="example"> |
| \\x off |
| SELECT unnest(array['intercept', 'treatment', 'trait_anxiety' ]) as attribute, |
| unnest(coef) as coefficient, |
| unnest(std_err) as standard_error, |
| unnest(z_stats) as z_stat, |
| unnest(p_values) as pvalue |
| FROM patients_logregr; |
| </pre> |
| |
| <b> Cox Proportional Hazards Example </b> |
| -# View online help for the robust Cox Proportional hazards training method. |
| <pre class="example"> |
| SELECT madlib.robust_variance_coxph(); |
| </pre> |
| -# Create an input data set. |
| <pre class="example"> |
| DROP TABLE IF EXISTS sample_data; |
| CREATE TABLE sample_data ( |
| id INTEGER NOT NULL, |
| grp DOUBLE PRECISION, |
| wbc DOUBLE PRECISION, |
| timedeath INTEGER, |
| status BOOLEAN |
| ); |
| COPY sample_data FROM STDIN DELIMITER '|'; |
| 0 | 0 | 1.45 | 35 | t |
| 1 | 0 | 1.47 | 34 | t |
| 3 | 0 | 2.2 | 32 | t |
| 4 | 0 | 1.78 | 25 | t |
| 5 | 0 | 2.57 | 23 | t |
| 6 | 0 | 2.32 | 22 | t |
| 7 | 0 | 2.01 | 20 | t |
| 8 | 0 | 2.05 | 19 | t |
| 9 | 0 | 2.16 | 17 | t |
| 10 | 0 | 3.6 | 16 | t |
| 11 | 1 | 2.3 | 15 | t |
| 12 | 0 | 2.88 | 13 | t |
| 13 | 1 | 1.5 | 12 | t |
| 14 | 0 | 2.6 | 11 | t |
| 15 | 0 | 2.7 | 10 | t |
| 16 | 0 | 2.8 | 9 | t |
| 17 | 1 | 2.32 | 8 | t |
| 18 | 0 | 4.43 | 7 | t |
| 19 | 0 | 2.31 | 6 | t |
| 20 | 1 | 3.49 | 5 | t |
| 21 | 1 | 2.42 | 4 | t |
| 22 | 1 | 4.01 | 3 | t |
| 23 | 1 | 4.91 | 2 | t |
| 24 | 1 | 5 | 1 | t |
| \\. |
| </pre> |
| -# Run the Cox regression function. |
| <pre class="example"> |
| SELECT madlib.coxph_train( 'sample_data', |
| 'sample_cox', |
| 'timedeath', |
| 'ARRAY[grp,wbc]', |
| 'status' |
| ); |
| </pre> |
| -# Run the Robust Cox regression function. |
| <pre class="example"> |
| SELECT madlib.robust_variance_coxph( 'sample_cox', |
| 'sample_robust_cox' |
| ); |
| </pre> |
| -# View the results of the robust Cox regression. |
| <pre class="example"> |
| \\x on |
| SELECT * FROM sample_robust_cox; |
| </pre> |
| Results: |
| <pre class="result"> |
| -[ RECORD 1 ]-+---------------------------------------------------------------------------- |
| coef | {2.54407073265105,1.67172094780081} |
| loglikelihood | -37.8532498733452 |
| std_err | {0.677180599295459,0.387195514577754} |
| robust_se | {0.621095581073685,0.274773521439328} |
| robust_z | {4.09610180811965,6.08399579058399} |
| robust_p | {4.2016521208424e-05,1.17223683104729e-09} |
| hessian | {{2.78043065745405,-2.25848560642669},{-2.25848560642669,8.50472838284265}} |
| </pre> |
| |
| |
| @anchor background |
| @par Technical Background |
| |
| When doing regression analysis, we are sometimes interested in the variance of |
| the computed coefficients \f$ \boldsymbol c \f$. While the built-in regression |
| functions provide variance estimates, we may prefer a <i>robust</i> variance |
| estimate. |
| |
| The robust variance calculation can be expressed in a sandwich formation, which is the form |
| \f[ |
| S( \boldsymbol c) = B( \boldsymbol c) M( \boldsymbol c) B( \boldsymbol c) |
| \f] |
| where \f$ B( \boldsymbol c)\f$ and \f$ M( \boldsymbol c)\f$ are matrices. The \f$ B( \boldsymbol c) \f$ matrix, |
| also known as the bread, is relatively straight forward, and can be computed as |
| \f[ |
| B( \boldsymbol c) = n\left(\sum_i^n -H(y_i, x_i, \boldsymbol c) \right)^{-1} |
| \f] |
| where \f$ H \f$ is the hessian matrix. |
| |
| The \f$ M( \boldsymbol c)\f$ matrix has several variations, each with different robustness properties. |
| The form implemented here is the Huber-White sandwich operator, which takes the form |
| \f[ |
| M_{H} =\frac{1}{n} \sum_i^n \psi(y_i,x_i, \boldsymbol c)^T \psi(y_i,x_i, \boldsymbol c). |
| \f] |
| |
| The above method for calculating robust variance (Huber-White estimates) is |
| implemented for linear regression, logistic regression, and multinomial |
| logistic regression. It is useful in calculating variances in a dataset with |
| potentially noisy outliers. The Huber-White implemented here is identical to |
| the "HC0" sandwich operator in the R module "sandwich". |
| |
| When multinomial logistic regression is computed before the multinomial robust |
| regression, it uses a default reference category of zero and the regression |
| coefficients are included in the output table. The regression coefficients in |
| the output are in the same order as the multinomial logistic regression function, |
| which is described below. For a problem with \f$ K \f$ dependent variables \f$ |
| (1, ..., K) \f$ and \f$ J \f$ categories \f$ (0, ..., J-1) \f$, let \f$ |
| {m_{k,j}} \f$ denote the coefficient for dependent variable \f$ k \f$ and |
| category \f$ j \f$ . The output is \f$ {m_{k_1, j_0}, m_{k_1, j_1} \ldots |
| m_{k_1, j_{J-1}}, m_{k_2, j_0}, m_{k_2, j_1} \ldots m_{k_K, j_{J-1}}} \f$. The |
| order is NOT CONSISTENT with the multinomial regression marginal effect |
| calculation with function <em>marginal_mlogregr</em>. This is deliberate |
| because the interfaces of all multinomial regressions (robust, clustered, ...) |
| will be moved to match that used in marginal. |
| |
| The robust variance of Cox proportional hazards is more complex because |
| coeeficients are trained by maximizing a partial log-likelihood. |
| Therefore, one cannot directly use the formula for \f$ M( \boldsymbol c) \f$ |
| as in Huber-White robust estimator. Extra terms are needed. See [4] for |
| details. |
| |
| |
| @anchor literature |
| @literature |
| |
| [1] vce(cluster) function in STATA: http://www.stata.com/help.cgi?vce_option |
| |
| [2] clustered estimators in R: http://people.su.se/~ma/clustering.pdf |
| |
| [3] Achim Zeileis: Object-oriented Computation of Sandwich Estimators. |
| Research Report Series / Department of Statistics and Mathematics, 37. |
| Department of Statistics and Mathematics, WU Vienna University of Economics |
| and Business, Vienna. |
| http://cran.r-project.org/web/packages/sandwich/vignettes/sandwich-OOP.pdf |
| |
| [4] D. Y. Lin and L . J. Wei, _The Robust Inference for the Cox Proportional Hazards Model_, Journal of |
| the American Statistical Association, Vol. 84, No. 408, p.1074 (1989). |
| |
| @anchor related |
| @par Related Topics |
| File robust.sql_in documenting the SQL functions |
| File robust_variance_coxph.sql_in documenting more the SQL functions |
| |
| @internal |
| @sa Namespace \ref madlib::modules::regress |
| documenting the implementation in C++ |
| @sa Namespace robust_coxph |
| \ref madlib::modules::stats documenting the implementation in C++ |
| @endinternal |
| */ |
| |
| --------------------------- Robust Linear Regression ---------------------------------- |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.robust_linregr_result CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.robust_linregr_result AS ( |
| coef DOUBLE PRECISION[], |
| std_err DOUBLE PRECISION[], |
| t_stats DOUBLE PRECISION[], |
| p_values DOUBLE PRECISION[] |
| ); |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_linregr_transition( |
| state MADLIB_SCHEMA.bytea8, |
| y DOUBLE PRECISION, |
| x DOUBLE PRECISION[], |
| coef DOUBLE PRECISION[]) |
| RETURNS MADLIB_SCHEMA.bytea8 |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_linregr_merge_states( |
| state1 MADLIB_SCHEMA.bytea8, |
| state2 MADLIB_SCHEMA.bytea8) |
| RETURNS MADLIB_SCHEMA.bytea8 |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_linregr_final( |
| state MADLIB_SCHEMA.bytea8) |
| RETURNS MADLIB_SCHEMA.robust_linregr_result |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.robust_linregr( |
| /*+ "dependentVariable" */ DOUBLE PRECISION, |
| /*+ "independentVariables" */ DOUBLE PRECISION[], |
| /*+ "coef" */ DOUBLE PRECISION[]) ( |
| SFUNC=MADLIB_SCHEMA.robust_linregr_transition, |
| STYPE=MADLIB_SCHEMA.bytea8, |
| FINALFUNC=MADLIB_SCHEMA.robust_linregr_final, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.robust_linregr_merge_states,') |
| INITCOND='' |
| ); |
| |
| -------------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr( |
| message VARCHAR |
| ) |
| RETURNS VARCHAR AS $$ |
| PythonFunction(regress, robust_linear, robust_linregr_help) |
| $$ LANGUAGE plpythonu IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr() |
| RETURNS VARCHAR AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_linregr(''); |
| $$ LANGUAGE sql IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| -------------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr( |
| source_table VARCHAR -- name of input table |
| , out_table VARCHAR -- name of output table |
| , dependent_varname VARCHAR -- name of dependent variable |
| , independent_varname VARCHAR -- name of independent variable |
| , grouping_cols VARCHAR -- grouping columns |
| , verbose_mode BOOLEAN -- printing warning message or not |
| ) |
| RETURNS VOID AS $$ |
| PythonFunction(regress, robust_linear, robust_variance_linregr) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr( |
| source_table VARCHAR |
| , out_table VARCHAR |
| , dependent_varname VARCHAR |
| , independent_varname VARCHAR |
| , grouping_cols VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_linregr($1, $2, $3, $4, $5, False); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr( |
| source_table VARCHAR |
| , out_table VARCHAR |
| , dependent_varname VARCHAR |
| , independent_varname VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_linregr($1, $2, $3, $4, NULL, False); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------- ROBUST LOGISTIC REGRESSION ------------------------- |
| |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.robust_logregr_result CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.robust_logregr_result AS ( |
| coef DOUBLE PRECISION[], |
| std_err DOUBLE PRECISION[], |
| z_stats DOUBLE PRECISION[], |
| p_values DOUBLE PRECISION[] |
| ); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_logregr_step_transition( |
| state DOUBLE PRECISION[], |
| y BOOLEAN, |
| x DOUBLE PRECISION[], |
| coef DOUBLE PRECISION[]) |
| RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_logregr_step_merge_states( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[]) |
| RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_logregr_step_final( |
| state DOUBLE PRECISION[]) |
| RETURNS MADLIB_SCHEMA.robust_logregr_result |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.robust_logregr( |
| /*+ "dependentVariable" */ BOOLEAN, |
| /*+ "independentVariables" */ DOUBLE PRECISION[], |
| /*+ "coef" */ DOUBLE PRECISION[] ) ( |
| |
| STYPE=DOUBLE PRECISION[], |
| SFUNC=MADLIB_SCHEMA.robust_logregr_step_transition, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.robust_logregr_step_merge_states,') |
| FINALFUNC=MADLIB_SCHEMA.robust_logregr_step_final, |
| INITCOND='{0,0,0,0,0.0}' |
| ); |
| |
| -------------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| message VARCHAR |
| ) |
| RETURNS VARCHAR AS $$ |
| PythonFunction(regress, robust_logistic, robust_logregr_help) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr() |
| RETURNS VARCHAR AS $$ |
| BEGIN |
| RETURN MADLIB_SCHEMA.robust_variance_logregr(''); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| /** |
| *@brief The robust logistic regression function. |
| * |
| * @param source_table String identifying the input table |
| * @param out_table String identifying the output table to be created |
| * @param dependent_varname Column containing the dependent variable |
| * @param independent_varname Column containing the array of independent variables |
| * @param grouping_cols Columns to group by. |
| * @param max_iter Integer identifying the maximum iterations used by the logistic regression solver. Default is 20. |
| * @param optimizer String identifying the optimizer used in the logistic regression. See the documentation in the logistic regression for the available options. Default is irls. |
| * @param tolerance Float identifying the tolerance of the logistic regression optimizer. Default is 1e-4. |
| * @param verbose_mode Boolean specifying if the regression fit should print any warning messages. Default is false. |
| * @par |
| * To include an intercept in the model, set one coordinate in the |
| * <tt>independent_varname</tt> array to 1. |
| * |
| * @return A composite value: |
| * - <tt>std_err FLOAT8[]</tt> - Array of huber-white standard errors, |
| * \f$ \mathit{se}(c_1), \dots, \mathit{se}(c_k) \f$ |
| * - <tt>t_stats FLOAT8[]</tt> - Array of t-statistics, \f$ \boldsymbol t \f$ |
| * - <tt>p_values FLOAT8[]</tt> - Array of p-values, \f$ \boldsymbol p \f$ |
| * |
| * @usage |
| * For function summary information. Run |
| * sql> select robust_variance_logregr('help'); |
| * OR |
| * sql> select robust_variance_logregr(); |
| * OR |
| * sql> select robust_variance_logregr('?'); |
| * For function usage information. Run |
| * sql> select robust_variance_logregr('usage'); |
| * - Compute the coefficients, and the get the robust diagnostic statistics: |
| * <pre> |
| * select robust_variance_logregr(source_table, out_table, regression_type, dependentVariable, independentVariables, NULL ); |
| * </pre> |
| * |
| * - If the coefficients are already known, they can be provided directly |
| * <pre>select robust_variance_logregr(source_table, out_table, regression_type, dependentVariable, independentVariables, coef ); |
| </pre> |
| */ |
| |
| -------------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR, -- name of input table |
| out_table VARCHAR, -- name of output table |
| dependent_varname VARCHAR, -- name of dependent variable |
| independent_varname VARCHAR, -- name of independent variable |
| grouping_cols VARCHAR, -- grouping columns |
| max_iter INTEGER, -- maximum of iterations in the optimizer |
| optimizer VARCHAR, -- the optimizer used |
| tolerance DOUBLE PRECISION, |
| verbose_mode BOOLEAN |
| ) |
| RETURNS VOID AS $$ |
| PythonFunction(regress, robust_logistic, robust_variance_logregr) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_variable VARCHAR, |
| independent_variable VARCHAR, |
| grouping_cols VARCHAR, |
| max_iter INTEGER, |
| optimizer VARCHAR, |
| tolerance DOUBLE PRECISION |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_logregr( |
| $1, $2, $3, $4, $5, $6, $7, $8, FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_variable VARCHAR, |
| independent_variable VARCHAR, |
| grouping_cols VARCHAR, |
| max_iter INTEGER, |
| optimizer VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_logregr( |
| $1, $2, $3, $4, $5, $6, $7, 1e-4, FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_variable VARCHAR, |
| independent_variable VARCHAR, |
| grouping_cols VARCHAR, |
| max_iter INTEGER |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_logregr( |
| $1, $2, $3, $4, $5, $6, 'irls', 1e-4, FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_variable VARCHAR, |
| independent_variable VARCHAR, |
| grouping_cols VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_logregr( |
| $1, $2, $3, $4, $5, 20, 'irls', 1e-4, FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_variable VARCHAR, |
| independent_variable VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_logregr( |
| $1, $2, $3, $4, NULL, 20, 'irls', 1e-4, FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| ---------------- ROBUST MULTINOMIAL LOGISTIC REGRESSION ------------------------ |
| |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.robust_mlogregr_result CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.robust_mlogregr_result AS ( |
| ref_category INTEGER, |
| coef DOUBLE PRECISION[], |
| std_err DOUBLE PRECISION[], |
| z_stats DOUBLE PRECISION[], |
| p_values DOUBLE PRECISION[] |
| ); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_robust_step_transition |
| ( |
| state DOUBLE PRECISION[], |
| y INTEGER, |
| numCategories INTEGER, |
| ref_category INTEGER, |
| x DOUBLE PRECISION[], |
| coef DOUBLE PRECISION[] |
| ) |
| RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_robust_step_merge_states |
| ( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[] |
| ) |
| RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_robust_step_final |
| ( |
| state DOUBLE PRECISION[] |
| ) |
| RETURNS MADLIB_SCHEMA.robust_mlogregr_result |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.robust_mlogregr( |
| /*+ "dependentVariable" */ INTEGER, |
| /*+ "numCategories" */ INTEGER, |
| /*+ "ref_category" */ INTEGER, |
| /*+ "independentVariables" */ DOUBLE PRECISION[], |
| /*+ "coef" */ DOUBLE PRECISION[] |
| ) |
| ( |
| STYPE=DOUBLE PRECISION[], |
| SFUNC=MADLIB_SCHEMA.mlogregr_robust_step_transition, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.mlogregr_robust_step_merge_states,') |
| FINALFUNC=MADLIB_SCHEMA.mlogregr_robust_step_final, |
| INITCOND='{0,0,0,0,0}' |
| ); |
| |
| -------------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| message VARCHAR |
| ) |
| RETURNS VARCHAR AS $$ |
| PythonFunction(regress, robust_mlogistic, robust_variance_mlogregr_help) |
| $$ LANGUAGE plpythonu IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr() |
| RETURNS VARCHAR AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr(''); |
| $$ LANGUAGE sql IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| -------------------- Robust Multinomial Logistic Regression -------------------- |
| /** |
| -- NOTICE --------------------------------------------------------------- |
| -- All functions calling other modeling functions should be in the format: |
| -- accept a model table created by the underlying modeling function |
| -- and extract all parameters from the model table and summary table |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| model_table VARCHAR -- name of the mlogregr model table |
| , out_table VARCHAR -- name of output table |
| ) |
| RETURNS VOID AS $$ |
| PythonFunction(regress, robust_mlogistic, robust_variance_mlogregr_new) |
| $$ LANGUAGE plpythonu; |
| -- END OF NOTICE --------------------------------------------------------------- |
| */ |
| -------------------------------------------------------------------------------- |
| |
| -------------------------------------------------------------------------------- |
| |
| /** |
| * @brief Compute robust regression diagnostic statistics for multinomial logistic regression. |
| * |
| * @param source_table name of input table, VARCHAR |
| * @param out_table name of output table, VARCHAR |
| * @param dependent_varname dependent variable, VARCHAR |
| * @param independent_varname independent variables, VARCHAR |
| * @param ref_category Integer specifying the reference category. Default is 0. |
| * @param grouping_cols grouping variables, VARCHAR. Default is NULL. Currently a placeholder. |
| * @param max_iter Integer identifying the maximum iterations used by the logistic regression solver. Default is 20. |
| * @param optimizer String identifying the optimizer used in the multinomial logistic regression. See the documentation in the multinomial logistic regression for the available options. Default is 'irls'. |
| * @param tolerance Float identifying the tolerance of the multinomial logistic regression optimizer. Default is 1e-4. |
| * @param verbose_mode Boolean specifying if the multinomial logistic regression solver should print any warnings. Currently a placeholder. |
| * |
| * |
| * @par |
| * To include an intercept in the model, set one coordinate in the |
| * <tt>independentVariables</tt> array to 1. |
| * |
| * @usage |
| * <pre> |
| * SELECT madlib.robust_variance_mlogregr( |
| * <em>'source_table'</em>, -- name of input table, VARCHAR |
| * <em>'out_table'</em>, -- name of output table, VARCHAR |
| * <em>'dependent_varname'</em>, -- dependent variable, VARCHAR |
| * <em>'independent_varname'</em>, -- independent variables, VARCHAR |
| * <em>ref_category</em>, -- [OPTIONAL] Integer specifying the reference category. Default is 0. |
| * <em>'grouping_cols'</em>, -- [OPTIONAL] grouping variables, VARCHAR. Default is NULL. |
| * <em>max_iter</em>, -- [OPTIONAL] Integer identifying the maximum iterations used by the logistic regression solver. Default is 20. |
| * <em>'optimizer'</em>, -- [OPTIONAL] String identifying the optimizer used in the multinomial logistic regression. See the documentation in the multinomial logistic regression for the available options. Default is irls. |
| * <em>tolerance</em>, -- [OPTIONAL] Float identifying the tolerance of the multinomial logistic regression optimizer. Default is 1e-4. |
| * <em>verbose_mode</em> -- [OPTIONAL] Boolean specifying if the regression fit should print any warning messages. Default is false. |
| * ); |
| * </pre> |
| * |
| * @return A composite value: |
| * - <tt>ref_category INTEGER</tt> - The reference category |
| * - <tt>coef FLOAT8[] </tt> - The coefficients for the regression |
| * - <tt>std_err FLOAT8[]</tt> - Array of huber-white standard errors, |
| * - <tt>z_stats FLOAT8[]</tt> - Array of Wald z-statistics, |
| * - <tt>p_values FLOAT8[]</tt> - Array of p-values, |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, -- name of input table |
| out_table VARCHAR, -- name of output table |
| dependent_varname VARCHAR, -- name of dependent variable |
| independent_varname VARCHAR, -- name of independent variable |
| ref_category INTEGER, -- reference category (default 0) |
| grouping_cols VARCHAR, -- grouping columns (PLACEHOLDER) (default NULL) |
| optimizer_params VARCHAR, -- a comma-separated string with optimizer parameters |
| verbose_mode BOOLEAN -- PLACEHOLDER (default False) |
| ) |
| RETURNS VOID AS $$ |
| PythonFunction(regress, robust_mlogistic, robust_variance_mlogregr) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR, |
| ref_category INTEGER, |
| grouping_cols VARCHAR, |
| optimizer_params VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr( |
| $1, $2, $3, $4, $5, $6, $7, FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR, |
| ref_category INTEGER, |
| grouping_cols VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6, |
| 'max_iter=20, optimizer=irls, tolerance=1e-4', FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR, |
| ref_category INTEGER |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, NULL, |
| 'max_iter=20, optimizer=irls, tolerance=1e-4', FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, 0, NULL, |
| 'max_iter=20, optimizer=irls, tolerance=1e-4', FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| -------------------------------------------------------------------------------- |
| |
| -- DEPRECATED NOTICE ----------------------------------------------------------- |
| -- The below functions has been deprecated and should be removed in next major |
| -- version update |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, -- name of input table |
| out_table VARCHAR, -- name of output table |
| dependent_varname VARCHAR, -- name of dependent variable |
| independent_varname VARCHAR, -- name of independent variable |
| ref_category INTEGER, -- reference category (default 0) |
| grouping_cols VARCHAR, -- grouping columns (PLACEHOLDER) (default NULL) |
| max_iter INTEGER, -- max number of itertions (default 20) |
| optimizer VARCHAR, -- optimizer ['irls', 'newton'] (default 'irls') |
| tolerance DOUBLE PRECISION, -- tolerance (default 1e-4) |
| verbose_mode BOOLEAN -- printing warning message or not (default False) |
| ) |
| RETURNS VOID AS $$ |
| BEGIN |
| RAISE WARNING $sql$This function has been deprecated. |
| Please run "SELECT MADLIB_SCHEMA.robust_variance_mlogregr('usage');" |
| to get the latest function usage.$sql$; |
| PERFORM MADLIB_SCHEMA.robust_variance_mlogregr( |
| $1, $2, $3, $4, $5, $6, 'max_iter=' || cast($7 as text) || |
| ', optimizer=' || $8 || |
| ', tolerance=' || cast($9 as text), |
| $10); |
| END; |
| $$ LANGUAGE PLPGSQL VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR, |
| ref_category INTEGER, |
| grouping_cols VARCHAR, |
| max_iter INTEGER, |
| optimizer VARCHAR, |
| tolerance DOUBLE PRECISION |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6, $7, |
| $8, $9, FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR, |
| ref_category INTEGER, |
| grouping_cols VARCHAR, |
| max_iter INTEGER, |
| optimizer VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6, $7, |
| $8, 1e-4, FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| --------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR, |
| ref_category INTEGER, |
| grouping_cols VARCHAR, |
| max_iter INTEGER |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6, $7, |
| 'irls', 1e-4, FALSE); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| -- END OF DEPRECATED NOTICE ----------------------------------------------------------- |