| /* ----------------------------------------------------------------------- *//** |
| * |
| * @file robust.sql_in |
| * |
| * @brief SQL functions for linear regression |
| * @date January 2011 |
| * |
| * @sa Calculates robust statistics for various regression models. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| |
| |
| /** |
| @addtogroup grp_robust |
| |
| @about |
| When doing regression analysis, we are sometimes interested in the variance of the computed coefficients \f$ \boldsymbol c \f$. While the built-in regression functions provide variance estimates, we may prefer a <i> robust </i> variance estimate. |
| |
| The robust variance calculation can be expressed in a sandwich formation, which is the form |
| \f[ |
| S( \boldsymbol c) = B( \boldsymbol c) M( \boldsymbol c) B( \boldsymbol c) |
| \f] |
| where \f$ B( \boldsymbol c)\f$ and \f$ M( \boldsymbol c)\f$ are matrices. The \f$ B( \boldsymbol c) \f$ matrix, |
| also known as the bread, is relatively straight forward, and can be computed as |
| \f[ |
| B( \boldsymbol c) = n\left(\sum_i^n -H(y_i, x_i, \boldsymbol c) \right)^{-1} |
| \f] |
| where \f$ H \f$ is the hessian matrix. |
| |
| The \f$ M( \boldsymbol c)\f$ matrix has several variation, each with different robustness properties. |
| The form implemented here is the Huber-White sandwich operator, which takes the form |
| \f[ |
| M_{H} =\frac{1}{n} \sum_i^n \psi(y_i,x_i, \boldsymbol c)^T \psi(y_i,x_i, \boldsymbol c). |
| \f] |
| |
| |
| The above method for calculating robust variance (Huber-White estimates) is implemented for linear regression, logistic regression, and multinomial logistic regression. It is useful in calculating variances in a dataset with potentially noisy outliers. The Huber-White implemented here is identical to the "HC0" sandwich operator in the R module "sandwich". |
| |
| |
| The interface for robust linear, logistic, and multinomial logistic regression are similar, differing only in the optimal parameters. Calling the help and usage functions is identical across all three robust regressions. |
| |
| When multinomial logistic regression is computed before the multinomial robust |
| regression, it uses a default reference category of zero and the regression coefficients are included in the output table. The regression |
| coefficients in the output are in the same order as multinomial logistic |
| regression function, which is described below. |
| For a problem with |
| \f$ K \f$ dependent variables \f$ (1, ..., K) \f$ and \f$ J \f$ categories \f$ (0, ..., J-1) |
| \f$, let \f$ {m_{k,j}} \f$ denote the coefficient for dependent variable \f$ k |
| \f$ and category \f$ j \f$ . The output is \f$ {m_{k_1, j_0}, m_{k_1, j_1} |
| \ldots m_{k_1, j_{J-1}}, m_{k_2, j_0}, m_{k_2, j_1} \ldots m_{k_K, j_{J-1}}} \f$. |
| The order is NOT CONSISTENT with the multinomial regression marginal effect |
| calculation with function <em>marginal_mlogregr</em>. This is deliberate |
| because the interfaces of all multinomial regressions (robust, clustered, ...) |
| will be moved to match that used in marginal. |
| |
| |
| @input |
| |
| The training data is expected to be of the following form: |
| <pre>{TABLE|VIEW} <em>sourceName</em> ( |
| <em>outputTable</em> VARCHAR, |
| <em>regressionType </em> VARCHAR, |
| <em>dependentVariable</em> VARCHAR, |
| <em>independentVariable</em> VARCHAR |
| )</pre> |
| |
| @usage |
| |
| |
| <b> The Full Interface</b> |
| |
| \warning The \b 'groupingCol' and \b 'print_warnings' input parameter for \e robust_variance_mlogregr |
| is a placeholder in the Madlib V1.0. These input parameters will be implemented in a |
| future release. |
| |
| <pre> |
| SELECT \ref madlib.robust_variance_linregr( |
| <em>'source_table'</em>, -- name of input table, VARCHAR |
| <em>'out_table'</em>, -- name of output table, VARCHAR |
| <em>'dependent_varname'</em>, -- dependent variable, VARCHAR |
| <em>'independent_varname'</em>, -- independent variables, VARCHAR |
| <em>'grouping_cols'</em> -- [OPTIONAL] grouping variables, VARCHAR |
| ); |
| </pre> |
| OR |
| <pre> |
| SELECT \ref madlib.robust_variance_logregr( |
| <em>'source_table'</em>, -- name of input table, VARCHAR |
| <em>'out_table'</em>, -- name of output table, VARCHAR |
| <em>'dependent_varname'</em>, -- dependent variable, VARCHAR |
| <em>'independent_varname'</em>, -- independent variables, VARCHAR |
| <em>'grouping_cols'</em>, -- [OPTIONAL] grouping variables, VARCHAR |
| <em>max_iter</em>, -- [OPTIONAL] Integer identifying the maximum iterations used by the logistic regression solver. Default is 20, Integer |
| <em>'optimizer'</em>, -- [OPTIONAL] String identifying the optimizer used in the logistic regression. See the documentation in the logistic regression for the available options. Default is irls. VARCHAR |
| <em>tolerance</em>, -- [OPTIONAL] Float identifying the tolerance of the logistic regression optimizer. Default is 0.0001. DOUBLE PRECISION |
| <em>print_warnings</em> -- [OPTIONAL] Boolean specifying if the regression fit should print any warning messages. Default is false. BOOLEAN |
| ); |
| </pre> |
| OR |
| <pre> |
| SELECT \ref madlib.robust_variance_mlogregr( |
| <em>'source_table'</em>, -- name of input table, VARCHAR |
| <em>'out_table'</em>, -- name of output table, VARCHAR |
| <em>'dependent_varname'</em>, -- dependent variable, VARCHAR |
| <em>'independent_varname'</em>, -- independent variables, VARCHAR |
| <em>ref_category</em>, -- [OPTIONAL] Integer specifying the reference category. Default is 0. |
| <em>'grouping_cols'</em>, -- [OPTIONAL] grouping variables, VARCHAR. Default is NULL. |
| <em>max_iter</em>, -- [OPTIONAL] Integer identifying the maximum iterations used by the logistic regression solver. Default is 20. |
| <em>'optimizer'</em>, -- [OPTIONAL] String identifying the optimizer used in the multinomial logistic regression. See the documentation in the multinomial logistic regression for the available options. Default is irls. |
| <em>tolerance</em>, -- [OPTIONAL] Float identifying the tolerance of the multinomial logistic regression optimizer. Default is 0.0001. |
| <em>print_warnings</em> -- [OPTIONAL] Boolean specifying if the regression fit should print any warning messages. Default is false. |
| ); |
| </pre> |
| Here the <em>'independent_varname'</em> can be the name of a column, which contains |
| array of numeric values. It can also have a format of string 'array[1, x1, x2, x3]', |
| where <em>x1</em>, <em>x2</em> and <em>x3</em> are all column names. |
| |
| Output is stored in the <em>out_table</em>: |
| <pre> |
| [ coef | std_err | (z/t)-stats | p_values | |
| +------+---------+-------------+----------+ |
| </pre> |
| |
| |
| @examp |
| |
| |
| -# For function summary information. Run |
| @verbatim |
| sql> select robust_variance_{linregr OR logregr OR mlogregr}('help'); |
| OR |
| sql> select robust_variance_{linregr OR logregr OR mlogregr}(); |
| OR |
| sql> select robust_variance_{linregr OR logregr OR mlogregr}('?'); |
| @endverbatim |
| |
| -# For function usage information. |
| @verbatim |
| sql> select robust_variance_{linregr OR logregr OR mlogregr}('usage'); |
| @endverbatim |
| -# Create the sample data set: |
| @verbatim |
| sql> SELECT * FROM data; |
| id | second_attack | treatment | trait_anxiety |
| ----+---------------+-----------+--------------- |
| 1 | 1 | 1 | 70 |
| 3 | 1 | 1 | 50 |
| 5 | 1 | 0 | 40 |
| 7 | 1 | 0 | 75 |
| 9 | 1 | 0 | 70 |
| 11 | 0 | 1 | 65 |
| 13 | 0 | 1 | 45 |
| 15 | 0 | 1 | 40 |
| 17 | 0 | 0 | 55 |
| ... |
| @endverbatim |
| -# Run the logistic regression function and then compute the robust logistic variance of the regression: |
| @verbatim |
| sql> select robust_variance_logregr('patients', 'newTable', 'second_attack', 'ARRAY[1, treatment, trait_anxiety]'); |
| sql> select * from newTable; |
| coef | {11.962748350258,1.37269168529894,0.00285507335100035} |
| std_err | {3.45872062333141,1.17161925782182,0.053432886418388} |
| z_stats | {-1.839833462942,-0.874094587942144,2.22793348156965} |
| p_values | {0.0657926909738772,0.382066744586027,0.0258849510756295} |
| @endverbatim |
| |
| |
| @literature |
| |
| |
| [1] vce(cluster) function in STATA: http://www.stata.com/help.cgi?vce_option |
| |
| [2] clustered estimators in R: http://people.su.se/~ma/clustering.pdf |
| |
| [3] Achim Zeileis: Object-oriented Computation of Sandwich Estimators. |
| Research Report Series / Department of Statistics and Mathematics, 37. |
| Department of Statistics and Mathematics, WU Vienna University of Economics and Business, Vienna. |
| http://cran.r-project.org/web/packages/sandwich/vignettes/sandwich-OOP.pdf |
| |
| @sa File robust.sql_in documenting the SQL functions. |
| |
| @internal |
| @sa Namespace \ref madlib::modules::regress |
| documenting the implementation in C++ |
| @endinternal |
| */ |
| |
| /** |
| * @internal |
| * @bDoes error checking on the input for the robust variance regressions for the mandatory inputs. |
| * |
| */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_input_checking( |
| source_table VARCHAR -- name of input table |
| , out_table VARCHAR -- name of output table |
| , dependent_varname VARCHAR -- name of dependent variable |
| , independent_varname VARCHAR -- name of independent variable |
| ) |
| RETURNS VOID AS $$ |
| BEGIN |
| IF (source_table IS NULL OR source_table = '') THEN |
| RAISE EXCEPTION 'Invalid input table name given.'; |
| END IF; |
| IF (NOT MADLIB_SCHEMA.__table_exists(source_table)) THEN |
| RAISE EXCEPTION 'Input table name does not exist.'; |
| END IF; |
| IF (out_table IS NULL OR out_table = '') THEN |
| RAISE EXCEPTION 'Invalid output table name given.'; |
| END IF; |
| IF (MADLIB_SCHEMA.__table_exists(out_table)) THEN |
| RAISE EXCEPTION 'Output table name already exists. Drop the table before calling the function.'; |
| END IF; |
| IF (independent_varname IS NULL OR independent_varname = '') THEN |
| RAISE EXCEPTION 'Invalid independent variable name given.'; |
| END IF; |
| IF (dependent_varname IS NULL OR dependent_varname = '') THEN |
| RAISE EXCEPTION 'Invalid dependent variable name given.'; |
| END IF; |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| --------------------------- Robust Linear Regression ---------------------------------- |
| |
| CREATE TYPE MADLIB_SCHEMA.robust_linregr_result AS ( |
| coef DOUBLE PRECISION[], |
| std_err DOUBLE PRECISION[], |
| t_stats DOUBLE PRECISION[], |
| p_values DOUBLE PRECISION[] |
| ); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_linregr_transition( |
| state MADLIB_SCHEMA.bytea8, |
| y DOUBLE PRECISION, |
| x DOUBLE PRECISION[], |
| coef DOUBLE PRECISION[]) |
| RETURNS MADLIB_SCHEMA.bytea8 |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE STRICT; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_linregr_merge_states( |
| state1 MADLIB_SCHEMA.bytea8, |
| state2 MADLIB_SCHEMA.bytea8) |
| RETURNS MADLIB_SCHEMA.bytea8 |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE STRICT; |
| |
| -- Final functions |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_linregr_final( |
| state MADLIB_SCHEMA.bytea8) |
| RETURNS MADLIB_SCHEMA.robust_linregr_result |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT; |
| |
| /** |
| * @brief Compute robust regression diagnostic statistics for linear regression. |
| * |
| * @param dependentVariable Column containing the dependent variable |
| * @param independentVariables Column containing the array of independent variables |
| * @param coef Column containing the array of the OLS coefficients (as obtained by linregr) |
| * |
| * @par |
| * To include an intercept in the model, set one coordinate in the |
| * <tt>independentVariables</tt> array to 1. |
| * |
| * @return A composite value: |
| * - <tt>std_err FLOAT8[]</tt> - Array of huber-white standard errors, |
| * \f$ \mathit{se}(c_1), \dots, \mathit{se}(c_k) \f$ |
| * - <tt>t_stats FLOAT8[]</tt> - Array of t-statistics, \f$ \boldsymbol t \f$ |
| * - <tt>p_values FLOAT8[]</tt> - Array of p-values, \f$ \boldsymbol p \f$ |
| * |
| * @usage |
| * - Get all the diagnostic statistics:\n |
| * |
| * <pre> SELECT (robust_linregr(<em>dependentVariable</em>, |
| * <em>independentVariables</em>, coef)).* |
| * FROM ( |
| * SELECT linregr(<em>dependentVariable</em>, <em>independentVariables</em>).coef |
| * ) AS ols_coef, <em>sourceName</em> as src; |
| * </pre> |
| * - Get a subset of the output columns, e.g., only the condition number |
| * and the array of p-values \f$ \boldsymbol p \f$: |
| * <pre>SELECT (lr).robust_condition_no, (lr).robust_p_values |
| *FROM ( |
| * </pre> |
| * <pre> SELECT (robust_linregr(<em>dependentVariable</em>, |
| * <em>independentVariables</em>, coef)).* |
| * FROM ( |
| * SELECT linregr(<em>dependentVariable</em>, <em>independentVariables</em>).coef |
| * ) AS ols_coef, <em>sourceName</em> as src |
| *) AS subq;</pre> |
| */ |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.robust_linregr( |
| /*+ "dependentVariable" */ DOUBLE PRECISION, |
| /*+ "independentVariables" */ DOUBLE PRECISION[], |
| /*+ "coef" */ DOUBLE PRECISION[]) ( |
| SFUNC=MADLIB_SCHEMA.robust_linregr_transition, |
| STYPE=MADLIB_SCHEMA.bytea8, |
| FINALFUNC=MADLIB_SCHEMA.robust_linregr_final, |
| m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.robust_linregr_merge_states,') |
| INITCOND='' |
| ); |
| |
| --------------------------- INTERNAL --------------------------------------- |
| /** |
| * @brief Return robust linear regression estimates given a set of coefficients |
| **/ |
| CREATE FUNCTION MADLIB_SCHEMA.__internal_get_robust_linregr_result( |
| source_table VARCHAR -- name of input table |
| , dependent_varname VARCHAR -- name of dependent variable |
| , independent_varname VARCHAR -- name of independent variable |
| , linregr_coeffs DOUBLE PRECISION[] -- coeffs from linear regression |
| ) |
| RETURNS MADLIB_SCHEMA.robust_linregr_result AS $$ |
| DECLARE |
| robust_value MADLIB_SCHEMA.robust_linregr_result; |
| BEGIN |
| EXECUTE ' |
| SELECT (MADLIB_SCHEMA.robust_linregr(' |
| || dependent_varname || ' , ' |
| || independent_varname || ' , ' |
| || 'ARRAY[' || array_to_string(linregr_coeffs, ',') || ']) |
| ).* FROM ' || source_table |
| INTO robust_value; |
| RETURN robust_value; |
| END |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| /** |
| * @brief Return insert string for robust linear regression |
| **/ |
| |
| CREATE FUNCTION MADLIB_SCHEMA.__internal_get_robust_linregr_insert_string( |
| robust_lin_rst MADLIB_SCHEMA.robust_linregr_result |
| , linregr_coeffs DOUBLE PRECISION[] -- coeffs from linear regression |
| , out_table TEXT |
| ) |
| RETURNS VARCHAR AS $$ |
| DECLARE |
| insert_string VARCHAR; |
| BEGIN |
| insert_string := 'INSERT INTO ' || out_table || ' VALUES ('; |
| insert_string := insert_string || |
| CASE |
| WHEN linregr_coeffs is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string(linregr_coeffs, ',') || '], ' |
| END || |
| CASE |
| WHEN (robust_lin_rst).std_err is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string((robust_lin_rst).std_err, ',') || '], ' |
| END || |
| CASE |
| WHEN (robust_lin_rst).t_stats is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string((robust_lin_rst).t_stats, ',') || '], ' |
| END || |
| CASE |
| WHEN (robust_lin_rst).p_values is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string((robust_lin_rst).p_values, ',') || ']' |
| END; |
| RETURN insert_string; |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| --------------------------- Interface ---------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr( |
| usage_string VARCHAR -- usage string |
| ) |
| RETURNS VARCHAR AS $$ |
| DECLARE |
| insert_string VARCHAR; |
| BEGIN |
| IF (usage_string = '' OR usage_string = 'help' OR usage_string = '?') THEN |
| insert_string := '' || |
| E'Summary \n' || |
| E'-----------------------------------------------------------------------------------------\n' || |
| E' Functionality: Calculate Huber-White robust statistics for linear regression\n' || |
| E' The function first runs the regression to calculate the \n' || |
| E' coefficients and uses them to calculate the robust statistics \n' || |
| E' The functions robust_variance_linregr() and robust_variance_logregr() have nearly identical interfaces\n' || |
| E' SELECT {schema_madlib}.robust_variance_linregr(''source_table'' \n' || |
| E' ,''output_table'' \n' || |
| E' ,''dependent_variable'' \n' || |
| E' ,''independent_variable'' \n' || |
| E' ,''group_cols'' \n' || |
| E' );\n' || |
| E'For more details on function usage: \n' || |
| E'SELECT {schema_madlib}.robust_variance_linregr(''usage'') \n'; |
| ElSIF (usage_string = 'usage') THEN |
| insert_string := '' || |
| E'Usage\n' || |
| E'-----------------------------------------------------------------------------------------\n' || |
| E' To use this function \n' || |
| E' SELECT {schema_madlib}.robust_variance_linregr( \n' || |
| E' ''source_table'', -- Name of data table \n' || |
| E' ''output_table'', -- Name of result table \n' || |
| E' ''dependent_variable'', -- Name of column for dependent variables\n' || |
| E' ''independent_variable'', -- Name of column for independent variables\n' || |
| E' (can be any SQL expression that evaluates to an array) \n' || |
| E' ''group_cols'', -- [OPTIONAL] Comma separated string with columns to group by. Default is NULL. \n' || |
| E' );\n' || |
| E'\n' || |
| E'Output:\n' || |
| E'-----------------------------------------------------------------------------------------\n' || |
| E' The output table (''output_table'' above) has the following columns\n' || |
| E' ''coef'' DOUBLE PRECISION[], -- Coefficients of regression \n' || |
| E' ''std_err'' DOUBLE PRECISION[], -- Huber-White standard errors\n' || |
| E' ''stats'' DOUBLE PRECISION[], -- T-stats of the standard errors\n' || |
| E' ''p_values'' DOUBLE PRECISION[], -- p-values of the standard errors\n' || |
| E'\n' || |
| E''; |
| ELSE |
| insert_string := 'No such option. Run SELECT {schema_madlib}.robust_variance_linregr()'; |
| END IF; |
| RETURN insert_string; |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr() |
| RETURNS VARCHAR AS $$ |
| BEGIN |
| RETURN MADLIB_SCHEMA.robust_variance_linregr(''); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| /** |
| * @brief Robust linear regression with default fit regression behaviour & no grouping |
| **/ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr( |
| source_table VARCHAR -- name of input table |
| , out_table VARCHAR -- name of output table |
| , dependent_variable VARCHAR -- name of dependent variable |
| , independent_variable VARCHAR -- name of independent variable |
| ) |
| RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.robust_variance_linregr( |
| source_table, |
| out_table, |
| dependent_variable, |
| independent_variable, |
| NULL); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| --------------------------- Robust Linear Regression ---------------------------------- |
| |
| /** |
| * @brief Robust linear regression function subcall |
| **/ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_linregr( |
| source_table VARCHAR -- name of input table |
| , out_table VARCHAR -- name of output table |
| , dependent_varname VARCHAR -- name of dependent variable |
| , input_independent_varname VARCHAR -- name of independent variable |
| , input_group_cols VARCHAR -- grouping columns |
| ) |
| RETURNS VOID AS $$ |
| DECLARE |
| insert_string VARCHAR; |
| group_cols VARCHAR[]; |
| regr_coef DOUBLE PRECISION[]; |
| robust_lin_rst MADLIB_SCHEMA.robust_linregr_result; |
| table_creation_string VARCHAR; |
| group_string VARCHAR; |
| where_group_string VARCHAR; |
| actual_table_name VARCHAR; |
| input_table_name VARCHAR[]; |
| group_array_length INTEGER; |
| each_group INTEGER; |
| robust_linregr_fitting_rst VARCHAR; |
| temp_table VARCHAR; |
| col_data_type VARCHAR; |
| schema_name VARCHAR; |
| sql VARCHAR; |
| old_msg_level TEXT; |
| independent_varname VARCHAR; |
| BEGIN |
| EXECUTE 'SELECT setting FROM pg_settings WHERE name=''client_min_messages''' INTO old_msg_level; |
| EXECUTE 'SET client_min_messages TO warning'; |
| |
| PERFORM MADLIB_SCHEMA.robust_input_checking(source_table, out_table, dependent_varname, input_independent_varname); |
| independent_varname := input_independent_varname; |
| |
| --This code should be added back in should '*' support be implemented |
| /*IF (input_independent_varname = '*') THEN |
| independent_varname := MADLIB_SCHEMA.__internal_get_col_names_except_dep_variable(source_table, input_independent_varname); |
| ELSE |
| independent_varname := input_independent_varname; |
| END IF;*/ |
| |
| |
| IF (input_group_cols IS NULL) |
| THEN |
| ------------------------------------------------------------------------- |
| -- No grouping columns: Run regular robust variance estimates |
| ------------------------------------------------------------------------- |
| -- Step 1: Create output table with appropriate column names |
| EXECUTE ' |
| CREATE TABLE ' || out_table || ' ( |
| coef DOUBLE PRECISION[], |
| std_err DOUBLE PRECISION[], |
| t_stats DOUBLE PRECISION[], |
| p_values DOUBLE PRECISION[])'; |
| -- Step 2: Run the regression if the coefficients are not provided |
| EXECUTE ' |
| SELECT (MADLIB_SCHEMA.linregr(' |
| || dependent_varname || ' , ' |
| || independent_varname || ') |
| ).coef FROM ' || source_table |
| INTO regr_coef; |
| |
| -- Step 3: Perform the robust linear regression |
| robust_lin_rst := MADLIB_SCHEMA.__internal_get_robust_linregr_result( |
| source_table, dependent_varname, independent_varname, regr_coef); |
| |
| -- Step 4: Insert into output table & cast infinity and nan |
| insert_string := MADLIB_SCHEMA.__internal_get_robust_linregr_insert_string( |
| robust_lin_rst, regr_coef, out_table); |
| insert_string := REGEXP_REPLACE(insert_string, 'Infinity', |
| '''Infinity''::double precision', 'gi'); |
| insert_string := REGEXP_REPLACE(insert_string, 'NaN', |
| '''NaN''::double precision', 'gi'); |
| EXECUTE insert_string || ')'; |
| |
| ELSE |
| ------------------------------------------------------------------------- |
| -- Grouping columns: Run robust variance estimates for each group |
| ------------------------------------------------------------------------- |
| |
| -- Step 1: Prepare the group columns |
| ------------------------------------------------------------------------- |
| group_cols = MADLIB_SCHEMA._string_to_array(input_group_cols); |
| group_array_length = array_upper(group_cols, 1); |
| |
| input_table_name = regexp_split_to_array(source_table, E'\\.'); |
| IF array_upper(input_table_name, 1) = 1 THEN |
| actual_table_name = input_table_name[1]; |
| schema_name := current_schema(); |
| ELSIF array_upper(input_table_name, 1) = 2 THEN |
| actual_table_name = input_table_name[2]; |
| schema_name = input_table_name[1]; |
| ELSE |
| RAISE EXCEPTION 'Incorrect input source table name provided'; |
| END IF; |
| |
| -- Check that each grouping column exists |
| FOR each_group in 1 .. group_array_length |
| LOOP |
| if NOT MADLIB_SCHEMA.check_if_col_exists(source_table, |
| group_cols[each_group]) THEN |
| RAISE EXCEPTION 'Grouping column % does not exist', |
| group_cols[each_group]; |
| END IF; |
| END LOOP; |
| |
| table_creation_string := 'CREATE TABLE ' || out_table || '('; |
| FOR each_group in 1 .. group_array_length |
| LOOP |
| -- create a string that makes list of |
| EXECUTE 'SELECT data_type FROM information_schema.columns |
| WHERE |
| table_schema = ''' || schema_name || ''' |
| AND table_name = ''' || actual_table_name || ''' |
| AND column_name= ''' || group_cols[each_group] || '''' |
| INTO col_data_type; |
| |
| table_creation_string := table_creation_string |
| || group_cols[each_group] |
| || ' ' || col_data_type || ','; |
| END LOOP; |
| |
| -- finish creating the output table |
| EXECUTE table_creation_string || ' |
| coef DOUBLE PRECISION[], |
| std_err DOUBLE PRECISION[], |
| t_stats DOUBLE PRECISION[], |
| p_values DOUBLE PRECISION[])'; |
| |
| -- Step 2: Perform the linear regression on groups |
| ------------------------------------------------------------------------- |
| temp_table := MADLIB_SCHEMA.__unique_string(); |
| PERFORM MADLIB_SCHEMA.linregr_train(source_table, |
| temp_table, |
| dependent_varname, |
| independent_varname, |
| input_group_cols); |
| |
| -- Note: We need to alter the names on the output tables for linregr_train |
| -- so that they don't clash with the robust_variance_linregr output tables |
| -- This name changing is required to construct queries when the regressor |
| -- is also being grouped by. I don't know why anyone would do that but |
| -- it should be a mathematically valid operation. |
| FOR each_group in 1 .. group_array_length |
| LOOP |
| EXECUTE 'ALTER TABLE ' || temp_table || ' RENAME COLUMN ' || |
| group_cols[each_group] || ' TO lin_regr_' |
| || group_cols[each_group]; |
| END LOOP; |
| |
| -- Step 3: Prepare the grouping strings |
| group_string := ''; |
| FOR each_group in 1 .. (group_array_length-1) |
| LOOP |
| group_string := group_string || |
| actual_table_name || '.' || |
| group_cols[each_group] || ','; |
| END LOOP; |
| group_string := group_string || |
| actual_table_name || '.' || |
| group_cols[group_array_length]; |
| |
| -- Where clause string |
| where_group_string := ''; |
| FOR each_group in 1 .. (group_array_length-1) |
| LOOP |
| where_group_string := where_group_string || '(' || |
| temp_table || '.lin_regr_' || |
| group_cols[each_group] || ' = ' || |
| actual_table_name || '.' || |
| group_cols[each_group] || ') AND '; |
| END LOOP; |
| where_group_string := where_group_string || '(' || |
| temp_table || '.lin_regr_' || |
| group_cols[group_array_length] || ' = ' || |
| actual_table_name || '.' || |
| group_cols[group_array_length] || ')'; |
| |
| -- Step 4: Put robust statistics into the out table |
| EXECUTE |
| 'INSERT INTO ' || out_table || ' |
| SELECT ' || group_string || ', (result).coef, (result).std_err, |
| (result).t_stats, (result).p_values |
| FROM ( |
| SELECT |
| '|| group_string ||', |
| MADLIB_SCHEMA.robust_linregr('|| |
| dependent_varname ||','|| |
| independent_varname || ', coef) AS result' || |
| ' FROM '|| source_table || ',' || temp_table || |
| ' WHERE '|| where_group_string || |
| ' GROUP BY '|| group_string || ')' || actual_table_name; |
| EXECUTE 'DROP TABLE IF EXISTS ' || temp_table; |
| END IF; |
| --Restore message settings |
| EXECUTE 'SET client_min_messages TO '|| old_msg_level; |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| |
| --------------------------- ROBUST LOGISTIC REGRESSION --------------------------------------- |
| |
| /** |
| * @internal |
| * @bCompute the results of the Huber-White sandwich estimator |
| * |
| */ |
| |
| --DROP TYPE IF EXISTS MADLIB_SCHEMA.robust_logregr_result; |
| CREATE TYPE MADLIB_SCHEMA.robust_logregr_result AS ( |
| coef DOUBLE PRECISION[], |
| std_err DOUBLE PRECISION[], |
| z_stats DOUBLE PRECISION[], |
| p_values DOUBLE PRECISION[] |
| ); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_logregr_step_transition( |
| state DOUBLE PRECISION[], |
| y BOOLEAN, |
| x DOUBLE PRECISION[], |
| coef DOUBLE PRECISION[]) |
| RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_logregr_step_merge_states( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[]) |
| RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT; |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_logregr_step_final( |
| state DOUBLE PRECISION[]) |
| RETURNS MADLIB_SCHEMA.robust_logregr_result |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT; |
| |
| |
| /** |
| * @brief Compute robust regression diagnostic statistics for logistic regression. |
| * |
| * @param dependentVariable Column containing the dependent variable |
| * @param independentVariables Column containing the array of independent variables |
| * @param coef Column containing the array of the coefficients (as obtained by logregr) |
| * |
| * @par |
| * To include an intercept in the model, set one coordinate in the |
| * <tt>independentVariables</tt> array to 1. |
| * |
| * @return A composite value: |
| * - <tt>coef FLOAT8[] </tt> - The coefficients for the regression |
| * - <tt>std_err FLOAT8[]</tt> - Array of huber-white standard errors, |
| * \f$ \mathit{se}(c_1), \dots, \mathit{se}(c_k) \f$ |
| * - <tt>z_stats FLOAT8[]</tt> - Array of Wald z-statistics, \f$ \boldsymbol t \f$ |
| * - <tt>p_values FLOAT8[]</tt> - Array of p-values, \f$ \boldsymbol p \f$ |
| * |
| * @usage |
| * - Get all the diagnostic statistics:\n |
| * |
| * <pre> SELECT robust_logregr(<em>dependentVariable</em>, |
| * <em>independentVariables</em>, coef) |
| * FROM <em>dataTable</em>; |
| * </pre> |
| */ |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.robust_logregr( |
| /*+ "dependentVariable" */ BOOLEAN, |
| /*+ "independentVariables" */ DOUBLE PRECISION[], |
| /*+ "coef" */ DOUBLE PRECISION[] ) ( |
| |
| STYPE=DOUBLE PRECISION[], |
| SFUNC=MADLIB_SCHEMA.robust_logregr_step_transition, |
| m4_ifdef(`GREENPLUM',`prefunc=MADLIB_SCHEMA.robust_logregr_step_merge_states,') |
| FINALFUNC=MADLIB_SCHEMA.robust_logregr_step_final, |
| INITCOND='{0,0,0,0,0.0}' |
| ); |
| |
| --------------------------- INTERNAL --------------------------------------- |
| /** |
| * @brief Return robust logistic regression estimates given a set of coefficients |
| **/ |
| CREATE FUNCTION MADLIB_SCHEMA.__internal_get_robust_logregr_result( |
| source_table VARCHAR -- name of input table |
| , dependent_varname VARCHAR -- name of dependent variable |
| , independent_varname VARCHAR -- name of independent variable |
| , logregr_coeffs DOUBLE PRECISION[] -- coeffs from logear regression |
| ) |
| RETURNS MADLIB_SCHEMA.robust_logregr_result AS $$ |
| DECLARE |
| robust_value MADLIB_SCHEMA.robust_logregr_result; |
| BEGIN |
| EXECUTE 'SELECT (MADLIB_SCHEMA.robust_logregr(('|| dependent_varname || ' )::BOOLEAN, '|| independent_varname || ' , '|| 'ARRAY[' || array_to_string(logregr_coeffs, ',') || '])).* FROM ' || source_table |
| INTO robust_value; |
| RETURN robust_value; |
| END |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| /** |
| * @brief Return insert string for robust logistic regression |
| **/ |
| |
| CREATE FUNCTION MADLIB_SCHEMA.__internal_get_robust_logregr_insert_string( |
| robust_log_rst MADLIB_SCHEMA.robust_logregr_result, |
| out_table TEXT |
| ) |
| RETURNS VARCHAR AS $$ |
| DECLARE |
| insert_string VARCHAR; |
| BEGIN |
| insert_string := 'INSERT INTO ' || out_table || ' VALUES ('; |
| insert_string := insert_string || |
| CASE |
| WHEN (robust_log_rst).coef is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string((robust_log_rst).coef, ',') || '], ' |
| END || |
| CASE |
| WHEN (robust_log_rst).std_err is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string((robust_log_rst).std_err, ',') || '], ' |
| END || |
| CASE |
| WHEN (robust_log_rst).z_stats is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string((robust_log_rst).z_stats, ',') || '], ' |
| END || |
| CASE |
| WHEN (robust_log_rst).p_values is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string((robust_log_rst).p_values, ',') || '] ' |
| END; |
| RETURN insert_string; |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| --------------------------- Interface ---------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| usage_string VARCHAR -- usage string |
| ) |
| RETURNS VARCHAR AS $$ |
| DECLARE |
| insert_string VARCHAR; |
| BEGIN |
| IF (usage_string = '' OR usage_string = 'help' OR usage_string = '?') THEN |
| insert_string := '' || |
| E'Summary \n' || |
| E'-----------------------------------------------------------------------------------------\n' || |
| E' Functionality: Calculate Huber-White robust statistics for logistic regression\n' || |
| E' The function first runs the regression to calculate the \n' || |
| E' coefficients and uses them to calculate the robust statistics \n' || |
| E' The functions robust_variance_linregr() and robust_variance_logregr() have nearly identical interfaces\n' || |
| E' SELECT {schema_madlib}.robust_variance_logregr(''source_table'' \n' || |
| E' ,''output_table'' \n' || |
| E' ,''dependent_variable'' \n' || |
| E' ,''independent_variable'' \n' || |
| E' ,''group_cols'' \n' || |
| E' ,''max_iter'' \n' || |
| E' ,''optimizer'' \n' || |
| E' ,''tolerance'' \n' || |
| E' ,''print_warnings'' \n' || |
| E' );\n' || |
| E'For more details on function usage: \n' || |
| E'SELECT {schema_madlib}.robust_variance_logregr(''usage'') \n'; |
| ElSIF (usage_string = 'usage') THEN |
| insert_string := '' || |
| E'Usage\n' || |
| E'-----------------------------------------------------------------------------------------\n' || |
| E' To use this function \n' || |
| E' SELECT {schema_madlib}.robust_variance_logregr( \n' || |
| E' ''source_table'', -- Name of data table \n' || |
| E' ''output_table'', -- Name of result table \n' || |
| E' ''dependent_variable'', -- Name of column for dependent variables\n' || |
| E' ''independent_variable'', -- Name of column for independent variables\n' || |
| E' (can be any SQL expression that evaluates to an array) \n' || |
| E' ''group_cols'', -- [OPTIONAL] Comma separated string with columns to group by. Default is NULL. \n' || |
| E' ''max_iter'', -- [OPTIONAL] The number of iterations used by the logistic regression solver. Default is 20. \n' || |
| E' ''optimizer'', -- [OPTIONAL] Name of the optimizer used in the logistic regression. Default is irls. \n' || |
| E' ''tolerance'', -- [OPTIONAL] The tolerance of the logistic regression optimizer. Default is 0.0001. \n' || |
| E' ''print_warnings'', -- [OPTIONAL] Should the optimizer print warning messages to the screen. Default is FALSE. \n' || |
| |
| E' );\n' || |
| E'\n' || |
| E'Output:\n' || |
| E'-----------------------------------------------------------------------------------------\n' || |
| E' The output table (''output_table'' above) has the following columns\n' || |
| E' ''coef'' DOUBLE PRECISION[], -- Coefficients of regression \n' || |
| E' ''std_err'' DOUBLE PRECISION[], -- Huber-White standard errors\n' || |
| E' ''stats'' DOUBLE PRECISION[], -- Z-stats of the standard errors\n' || |
| E' ''p_values'' DOUBLE PRECISION[], -- p-values of the standard errors\n' || |
| E'\n' || |
| E''; |
| ELSE |
| insert_string := 'No such option. Run SELECT {schema_madlib}.robust_variance_logregr()'; |
| END IF; |
| RETURN insert_string; |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr() |
| RETURNS VARCHAR AS $$ |
| BEGIN |
| RETURN MADLIB_SCHEMA.robust_variance_logregr(''); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| /** |
| *@brief The robust logistic regression function. |
| * |
| * @param source_table String identifying the input table |
| * @param out_table String identifying the output table to be created |
| * @param dependent_varname Column containing the dependent variable |
| * @param independent_varname Column containing the array of independent variables |
| * @param input_group_cols Columns to group by. |
| * @param max_iter Integer identifying the maximum iterations used by the logistic regression solver. Default is 20. |
| * @param optimizer String identifying the optimizer used in the logistic regression. See the documentation in the logistic regression for the available options. Default is irls. |
| * @param tolerance Float identifying the tolerance of the logistic regression optimizer. Default is 0.0001. |
| * @param print_warnings Boolean specifying if the regression fit should print any warning messages. Default is false. |
| * @par |
| * To include an intercept in the model, set one coordinate in the |
| * <tt>independent_varname</tt> array to 1. |
| * |
| * @return A composite value: |
| * - <tt>std_err FLOAT8[]</tt> - Array of huber-white standard errors, |
| * \f$ \mathit{se}(c_1), \dots, \mathit{se}(c_k) \f$ |
| * - <tt>t_stats FLOAT8[]</tt> - Array of t-statistics, \f$ \boldsymbol t \f$ |
| * - <tt>p_values FLOAT8[]</tt> - Array of p-values, \f$ \boldsymbol p \f$ |
| * |
| * @usage |
| * For function summary information. Run |
| * sql> select robust_variance_logregr('help'); |
| * OR |
| * sql> select robust_variance_logregr(); |
| * OR |
| * sql> select robust_variance_logregr('?'); |
| * For function usage information. Run |
| * sql> select robust_variance_logregr('usage'); |
| * - Compute the coefficients, and the get the robust diagnostic statistics: |
| * <pre> |
| * select robust_variance_logregr(source_table, out_table, regression_type, dependentVariable, independentVariables, NULL ); |
| * </pre> |
| * |
| * - If the coefficients are already known, they can be provided directly |
| * <pre>select robust_variance_logregr(source_table, out_table, regression_type, dependentVariable, independentVariables, coef ); |
| </pre> |
| */ |
| |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR, -- name of input table |
| out_table VARCHAR, -- name of output table |
| dependent_varname VARCHAR, -- name of dependent variable |
| input_independent_varname VARCHAR, -- name of independent variable |
| input_group_cols VARCHAR, -- grouping columns |
| max_iter INTEGER, -- maximum of iterations in the optimizer |
| optimizer VARCHAR, -- the optimizer used |
| tolerance DOUBLE PRECISION, |
| print_warnings BOOLEAN |
| ) |
| RETURNS VOID AS $$ |
| DECLARE |
| insert_string VARCHAR; |
| group_cols VARCHAR[]; |
| regr_coef DOUBLE PRECISION[]; |
| table_creation_string VARCHAR; |
| group_string VARCHAR; |
| where_group_string VARCHAR; |
| actual_table_name VARCHAR; |
| input_table_name VARCHAR[]; |
| group_array_length INTEGER; |
| each_group INTEGER; |
| robust_logregr_fitting_rst VARCHAR; |
| temp_table VARCHAR; |
| col_data_type VARCHAR; |
| schema_name VARCHAR; |
| sql VARCHAR; |
| tempTableName VARCHAR; |
| robust_log_rst MADLIB_SCHEMA.robust_logregr_result; |
| old_msg_level TEXT; |
| independent_varname VARCHAR; |
| print_warningsString VARCHAR; |
| BEGIN |
| EXECUTE 'SELECT setting FROM pg_settings WHERE name=''client_min_messages''' INTO old_msg_level; |
| EXECUTE 'SET client_min_messages TO warning'; |
| |
| PERFORM MADLIB_SCHEMA.robust_input_checking(source_table, out_table, dependent_varname, input_independent_varname); |
| |
| independent_varname := input_independent_varname; |
| |
| --This code should be added back in should '*' support be implemented |
| --IF (input_independent_varname = '*') THEN |
| -- independent_varname := MADLIB_SCHEMA.__internal_get_col_names_except_dep_variable(source_table, input_independent_varname); |
| -- ELSE |
| -- independent_varname := input_independent_varname; |
| -- END IF; |
| IF (input_group_cols IS NULL) |
| THEN |
| -- create output table with appropriate column names |
| EXECUTE ' |
| CREATE TABLE ' || out_table || ' ( |
| coef DOUBLE PRECISION[], |
| std_err DOUBLE PRECISION[], |
| z_stats DOUBLE PRECISION[], |
| p_values DOUBLE PRECISION[])'; |
| -- Run the regression if the coefficients are not provided |
| print_warningsString := 'FALSE'; |
| IF(print_warnings IS TRUE) |
| THEN |
| print_warningsString := 'TRUE'; |
| END IF; |
| tempTableName := MADLIB_SCHEMA.__unique_string(); |
| EXECUTE ' |
| SELECT MADLIB_SCHEMA.logregr_train(' |
| || ' '''||source_table||''' ' || ',' |
| || ' '''||tempTableName||''' ' || ',' |
| || ' '''||dependent_varname ||''' ' || ' , ' |
| || ' '''||independent_varname||''' '||' , ' |
| || 'NULL'|| ' , ' |
| ||max_iter || ' , ' |
| || ' '''||optimizer|| ''' '|| ' , ' |
| || tolerance||' , ' |
| || print_warningsString|| |
| ')'; |
| EXECUTE 'SELECT coef from ' || tempTableName INTO regr_coef; |
| |
| EXECUTE 'DROP TABLE IF EXISTS ' || tempTableName; |
| |
| |
| -- compute robust variance calculation |
| robust_log_rst := MADLIB_SCHEMA.__internal_get_robust_logregr_result( |
| source_table, dependent_varname, independent_varname, regr_coef); |
| insert_string := MADLIB_SCHEMA.__internal_get_robust_logregr_insert_string( |
| robust_log_rst, out_table); |
| -- Ensure Infinity and NaN are cast properly |
| insert_string := REGEXP_REPLACE(insert_string, 'Infinity', |
| '''Infinity''::double precision', 'gi'); |
| insert_string := REGEXP_REPLACE(insert_string, 'NaN', |
| '''NaN''::double precision', 'gi'); |
| -- complete the sql string and execute |
| EXECUTE insert_string || ')'; |
| |
| ELSE |
| ------------------------------------------------------------------------- |
| -- Grouping columns: Run robust variance estimates for each group |
| ------------------------------------------------------------------------- |
| |
| -- Step 1: Prepare the group columns |
| ------------------------------------------------------------------------- |
| group_cols = MADLIB_SCHEMA._string_to_array(input_group_cols); |
| group_array_length = array_upper(group_cols, 1); |
| |
| input_table_name = regexp_split_to_array(source_table, E'\\.'); |
| IF array_upper(input_table_name, 1) = 1 THEN |
| actual_table_name = input_table_name[1]; |
| schema_name := current_schema(); |
| ELSIF array_upper(input_table_name, 1) = 2 THEN |
| actual_table_name = input_table_name[2]; |
| schema_name = input_table_name[1]; |
| ELSE |
| RAISE EXCEPTION 'Incorrect input source table name provided'; |
| END IF; |
| |
| -- Check that each grouping column exists |
| FOR each_group in 1 .. group_array_length |
| LOOP |
| if NOT MADLIB_SCHEMA.check_if_col_exists(source_table, |
| group_cols[each_group]) THEN |
| RAISE EXCEPTION 'Grouping column % does not exist', |
| group_cols[each_group]; |
| END IF; |
| END LOOP; |
| |
| table_creation_string := 'CREATE TABLE ' || out_table || '('; |
| FOR each_group in 1 .. group_array_length |
| LOOP |
| -- create a string that makes list of |
| EXECUTE 'SELECT data_type FROM information_schema.columns |
| WHERE |
| table_schema = ''' || schema_name || ''' |
| AND table_name = ''' || actual_table_name || ''' |
| AND column_name= ''' || group_cols[each_group] || '''' |
| INTO col_data_type; |
| |
| table_creation_string := table_creation_string |
| || group_cols[each_group] |
| || ' ' || col_data_type || ','; |
| END LOOP; |
| |
| -- finish creating the output table |
| EXECUTE table_creation_string || ' |
| coef DOUBLE PRECISION[], |
| std_err DOUBLE PRECISION[], |
| z_stats DOUBLE PRECISION[], |
| p_values DOUBLE PRECISION[])'; |
| |
| -- Step 2: Perform the regression on groups |
| ------------------------------------------------------------------------- |
| temp_table := MADLIB_SCHEMA.__unique_string(); |
| PERFORM MADLIB_SCHEMA.logregr_train(source_table, |
| temp_table, |
| dependent_varname, |
| independent_varname, |
| input_group_cols, |
| max_iter, |
| optimizer, |
| tolerance, |
| print_warnings); |
| |
| -- Note: We need to alter the names on the output tables for logregr_train |
| -- so that they don't clash with the robust_variance_logregr output tables |
| -- This name changing is required to construct queries when the regressor |
| -- is also being grouped by. I don't know why anyone would do that but |
| -- it should be a mathematically valid operation. |
| FOR each_group in 1 .. group_array_length |
| LOOP |
| EXECUTE 'ALTER TABLE ' || temp_table || ' RENAME COLUMN ' || |
| group_cols[each_group] || ' TO log_regr_' |
| || group_cols[each_group]; |
| END LOOP; |
| |
| -- Step 3: Prepare the grouping strings |
| group_string := ''; |
| FOR each_group in 1 .. (group_array_length-1) |
| LOOP |
| group_string := group_string || |
| actual_table_name || '.' || |
| group_cols[each_group] || ','; |
| END LOOP; |
| group_string := group_string || |
| actual_table_name || '.' || |
| group_cols[group_array_length]; |
| |
| -- Where clause string |
| where_group_string := ''; |
| FOR each_group in 1 .. (group_array_length-1) |
| LOOP |
| where_group_string := where_group_string || '(' || |
| temp_table || '.log_regr_' || |
| group_cols[each_group] || ' = ' || |
| actual_table_name || '.' || |
| group_cols[each_group] || ') AND '; |
| END LOOP; |
| where_group_string := where_group_string || '(' || |
| temp_table || '.log_regr_' || |
| group_cols[group_array_length] || ' = ' || |
| actual_table_name || '.' || |
| group_cols[group_array_length] || ')'; |
| |
| -- Step 4: Robust statistics into the out table |
| EXECUTE |
| 'INSERT INTO ' || out_table || ' |
| SELECT ' || group_string || ', (result).coef, (result).std_err, |
| (result).z_stats, (result).p_values |
| FROM ( |
| SELECT |
| '|| group_string ||', |
| MADLIB_SCHEMA.robust_logregr('|| |
| dependent_varname ||','|| |
| independent_varname || ', coef) AS result' || |
| ' FROM '|| source_table || ',' || temp_table || |
| ' WHERE '|| where_group_string || |
| ' GROUP BY '|| group_string || ')' || actual_table_name; |
| EXECUTE 'DROP TABLE IF EXISTS ' || temp_table; |
| END IF; |
| --Restore message settings |
| EXECUTE 'SET client_min_messages TO '|| old_msg_level; |
| |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| /** |
| * @brief Robust logistic function subcall |
| **/ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR, -- name of input table |
| out_table VARCHAR, -- name of output table |
| dependent_variable VARCHAR, -- name of dependent variable |
| independent_variable VARCHAR, -- name of independent variable |
| input_group_cols VARCHAR -- grouping columns |
| ) |
| RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.robust_variance_logregr( |
| source_table, |
| out_table, |
| dependent_variable, |
| independent_variable, |
| input_group_cols, |
| 20, |
| 'irls', |
| 0.0001, |
| FALSE); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| /** |
| * @brief Robust logistic regression with default fit regression behavior, and no grouping, |
| **/ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR -- name of input table |
| ,out_table VARCHAR -- name of output table |
| ,dependent_variable VARCHAR -- name of dependent variable |
| ,independent_variable VARCHAR -- name of independent variable |
| ) |
| RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.robust_variance_logregr( |
| source_table, |
| out_table, |
| dependent_variable, |
| independent_variable, |
| NULL, |
| 20, |
| 'irls', |
| 0.0001, |
| FALSE); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR -- name of input table |
| , out_table VARCHAR -- name of output table |
| , dependent_variable VARCHAR -- name of dependent variable |
| , independent_variable VARCHAR -- name of independent variable |
| , input_group_cols VARCHAR -- grouping columns |
| , max_iter INTEGER -- max number of iterations |
| ) |
| RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.robust_variance_logregr( |
| source_table, |
| out_table, |
| dependent_variable, |
| independent_variable, |
| input_group_cols, |
| max_iter, |
| 'irls', |
| 0.0001, |
| FALSE); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR -- name of input table |
| , out_table VARCHAR -- name of output table |
| , dependent_variable VARCHAR -- name of dependent variable |
| , independent_variable VARCHAR -- name of independent variable |
| , input_group_cols VARCHAR -- grouping columns |
| , max_iter INTEGER -- max number of iterations |
| , optimizer VARCHAR -- The optimizer used in the robust regression |
| ) |
| RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.robust_variance_logregr( |
| source_table, |
| out_table, |
| dependent_variable, |
| independent_variable, |
| input_group_cols, |
| max_iter, |
| optimizer, |
| 0.0001, |
| FALSE); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_logregr( |
| source_table VARCHAR -- name of input table |
| , out_table VARCHAR -- name of output table |
| , dependent_variable VARCHAR -- name of dependent variable |
| , independent_variable VARCHAR -- name of independent variable |
| , input_group_cols VARCHAR -- grouping columns |
| , max_iter INTEGER -- max number of iterations |
| , optimizer VARCHAR -- The optimizer used in the robust regression |
| , tolerance DOUBLE PRECISION -- The tolerance of the optimizer |
| ) |
| RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.robust_variance_logregr( |
| source_table, |
| out_table, |
| dependent_variable, |
| independent_variable, |
| input_group_cols, |
| max_iter, |
| optimizer, |
| tolerance, |
| FALSE); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| |
| |
| |
| |
| |
| --------------------------- ROBUST MULTINOMIAL LOGISTIC REGRESSION ------------------------- |
| |
| CREATE TYPE MADLIB_SCHEMA.robust_mlogregr_result AS ( |
| ref_category INTEGER, |
| coef DOUBLE PRECISION[], |
| std_err DOUBLE PRECISION[], |
| z_stats DOUBLE PRECISION[], |
| p_values DOUBLE PRECISION[] |
| ); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_robust_step_transition |
| ( |
| state DOUBLE PRECISION[], |
| y INTEGER, |
| numCategories INTEGER, |
| ref_category INTEGER, |
| x DOUBLE PRECISION[], |
| coef DOUBLE PRECISION[] |
| ) |
| RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_robust_step_merge_states |
| ( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[] |
| ) |
| RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_robust_step_final |
| ( |
| state DOUBLE PRECISION[] |
| ) |
| RETURNS MADLIB_SCHEMA.robust_mlogregr_result |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT; |
| |
| |
| /** |
| * @brief Compute robust regression diagnostic statistics for multinomial logistic regression. |
| * |
| * @param source_table name of input table, VARCHAR |
| * @param out_table name of output table, VARCHAR |
| * @param dependent_varname dependent variable, VARCHAR |
| * @param independent_varname independent variables, VARCHAR |
| * @param ref_category Integer specifying the reference category. Default is 0. |
| * @param grouping_cols grouping variables, VARCHAR. Default is NULL. Currently a placeholder. |
| * @param max_iter Integer identifying the maximum iterations used by the logistic regression solver. Default is 20. |
| * @param optimizer String identifying the optimizer used in the multinomial logistic regression. See the documentation in the multinomial logistic regression for the available options. Default is 'irls'. |
| * @param tolerance Float identifying the tolerance of the multinomial logistic regression optimizer. Default is 0.0001. |
| * @param print_warnings Boolean specifying if the multinomial logistic regression solver should print any warnings. Currently a placeholder. |
| * |
| * |
| * @par |
| * To include an intercept in the model, set one coordinate in the |
| * <tt>independentVariables</tt> array to 1. |
| * |
| * @usage |
| * <pre> |
| * SELECT madlib.robust_variance_mlogregr( |
| * <em>'source_table'</em>, -- name of input table, VARCHAR |
| * <em>'out_table'</em>, -- name of output table, VARCHAR |
| * <em>'dependent_varname'</em>, -- dependent variable, VARCHAR |
| * <em>'independent_varname'</em>, -- independent variables, VARCHAR |
| * <em>ref_category</em>, -- [OPTIONAL] Integer specifying the reference category. Default is 0. |
| * <em>'grouping_cols'</em>, -- [OPTIONAL] grouping variables, VARCHAR. Default is NULL. |
| * <em>max_iter</em>, -- [OPTIONAL] Integer identifying the maximum iterations used by the logistic regression solver. Default is 20. |
| * <em>'optimizer'</em>, -- [OPTIONAL] String identifying the optimizer used in the multinomial logistic regression. See the documentation in the multinomial logistic regression for the available options. Default is irls. |
| * <em>tolerance</em>, -- [OPTIONAL] Float identifying the tolerance of the multinomial logistic regression optimizer. Default is 0.0001. |
| * <em>print_warnings</em> -- [OPTIONAL] Boolean specifying if the regression fit should print any warning messages. Default is false. |
| * ); |
| * </pre> |
| * |
| * @return A composite value: |
| * - <tt>ref_category INTEGER</tt> - The reference category |
| * - <tt>coef FLOAT8[] </tt> - The coefficients for the regression |
| * - <tt>std_err FLOAT8[]</tt> - Array of huber-white standard errors, |
| * - <tt>z_stats FLOAT8[]</tt> - Array of Wald z-statistics, |
| * - <tt>p_values FLOAT8[]</tt> - Array of p-values, |
| * |
| */ |
| |
| |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, -- name of input table |
| out_table VARCHAR, -- name of output table |
| dependent_varname VARCHAR, -- name of dependent variable |
| independent_varname VARCHAR, -- name of independent variable |
| ref_category INTEGER, -- reference category (default 0) |
| input_group_cols VARCHAR, -- grouping columns (PLACEHOLDER) (default NULL) |
| max_iter INTEGER, -- max number of itertions (default 20) |
| optimizer VARCHAR, -- optimizer ['irls', 'newton'] (default 'irls') |
| tolerance DOUBLE PRECISION, -- tolerance (default 0.0001) |
| print_warnings BOOLEAN -- PLACEHOLDER (default False) |
| ) |
| RETURNS VOID AS $$ |
| DECLARE |
| num_category INTEGER; |
| regr_coef DOUBLE PRECISION[]; |
| robust_value MADLIB_SCHEMA.robust_mlogregr_result; |
| insert_string VARCHAR; |
| old_msg_level VARCHAR; |
| BEGIN |
| EXECUTE 'SELECT setting FROM pg_settings WHERE name=''client_min_messages''' INTO old_msg_level; |
| EXECUTE 'SET client_min_messages TO warning'; |
| |
| PERFORM MADLIB_SCHEMA.robust_input_checking(source_table, out_table, dependent_varname, independent_varname); |
| -- Run the regression if the coefficients are not provided |
| -- Will do further parameter checking in the regression function |
| EXECUTE |
| $sql$SELECT coef FROM MADLIB_SCHEMA.mlogregr('$sql$ |
| || source_table || $sql$', '$sql$ |
| || dependent_varname || $sql$', '$sql$ |
| || independent_varname || $sql$', $sql$ |
| || max_iter || $sql$, '$sql$ |
| || optimizer || $sql$', $sql$ |
| || tolerance || ',' |
| || ref_category || ')' |
| INTO regr_coef; |
| |
| EXECUTE |
| 'SELECT count(DISTINCT ' || dependent_varname|| ') FROM ' || |
| textin(regclassout(source_table)) |
| INTO num_category; |
| |
| -- compute robust variance calculation |
| --robust_log_rst := MADLIB_SCHEMA.__internal_get_robust_mlogregr_result( |
| -- source_table, dependent_varname, num_category, ref_category, |
| -- independent_varname, regr_coef); |
| EXECUTE |
| 'SELECT (MADLIB_SCHEMA.robust_mlogregr(' |
| || dependent_varname || ', ' |
| || num_category || ', ' |
| || ref_category || ', ' |
| || independent_varname || ', ' |
| || 'ARRAY[' || array_to_string(regr_coef, ',') || |
| '])).* FROM ' || source_table |
| INTO robust_value; |
| insert_string := |
| MADLIB_SCHEMA.__internal_get_robust_mlogregr_insert_string( |
| robust_value, out_table); |
| |
| -- Ensure Infinity and NaN are cast properly |
| insert_string := REGEXP_REPLACE( |
| insert_string, 'Infinity', '''Infinity''::double precision', 'gi'); |
| insert_string := REGEXP_REPLACE( |
| insert_string, 'NaN', '''NaN''::double precision', 'gi'); |
| |
| -- create output table with appropriate column names |
| EXECUTE |
| 'CREATE TABLE ' || out_table || ' ( |
| ref_category INTEGER, |
| coef DOUBLE PRECISION[], |
| std_err DOUBLE PRECISION[], |
| z_stats DOUBLE PRECISION[], |
| p_values DOUBLE PRECISION[])'; |
| |
| -- complete the sql string and execute |
| EXECUTE insert_string || ')'; |
| |
| --Restore message settings |
| EXECUTE 'SET client_min_messages TO '|| old_msg_level; |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| |
| /** |
| * @brief Robust multinomial logistic function subcall |
| * @param dependentVariable Column containing the dependent variable |
| * @param numCategories Number of categories in the dependent variable |
| * @param ref_category Reference category for modeling |
| * @param independentVariables Column containing the array of independent variables |
| * @param coef Column containing the array of the coefficients (as obtained by mlogregr) |
| * @usage |
| * <pre> |
| * SELECT robust_mlogregr( |
| * <em>dependentVariable</em>, |
| * <em>numCategories</em>, |
| * <em>ref_category</em>, |
| * <em>independentVariables</em>, |
| * <em>coef</em>) |
| * FROM <em>dataTable</em>; |
| * </pre> |
| * @return A composite value: |
| * - <tt>ref_category INTEGER</tt> - The reference category |
| * - <tt>coef FLOAT8[] </tt> - The coefficients for the regression |
| * - <tt>std_err FLOAT8[]</tt> - Array of huber-white standard errors, |
| * \f$ \mathit{se}(c_1), \dots, \mathit{se}(c_k) \f$ |
| * - <tt>z_stats FLOAT8[]</tt> - Array of Wald z-statistics, \f$ \boldsymbol t \f$ |
| * - <tt>p_values FLOAT8[]</tt> - Array of p-values, \f$ \boldsymbol p \f$ |
| * |
| */ |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.robust_mlogregr( |
| /*+ "dependentVariable" */ INTEGER, |
| /*+ "numCategories" */ INTEGER, |
| /*+ "ref_category" */ INTEGER, |
| /*+ "independentVariables" */ DOUBLE PRECISION[], |
| /*+ "coef" */ DOUBLE PRECISION[] |
| ) |
| ( |
| STYPE=DOUBLE PRECISION[], |
| SFUNC=MADLIB_SCHEMA.mlogregr_robust_step_transition, |
| m4_ifdef(`GREENPLUM',`prefunc=MADLIB_SCHEMA.mlogregr_robust_step_merge_states,') |
| FINALFUNC=MADLIB_SCHEMA.mlogregr_robust_step_final, |
| INITCOND='{0,0,0,0,0}' |
| ); |
| |
| |
| |
| |
| --------------------------- INTERNAL --------------------------------------- |
| /** |
| * @brief Return insert string for robust multinomial logistic regression |
| **/ |
| CREATE FUNCTION MADLIB_SCHEMA.__internal_get_robust_mlogregr_insert_string( |
| robust_mlog_rst MADLIB_SCHEMA.robust_mlogregr_result, |
| out_table TEXT |
| ) |
| RETURNS VARCHAR AS $$ |
| DECLARE |
| insert_string VARCHAR; |
| BEGIN |
| insert_string := 'INSERT INTO ' || out_table || ' VALUES ('; |
| insert_string := insert_string || |
| (robust_mlog_rst).ref_category || ', ' || |
| CASE |
| WHEN (robust_mlog_rst).coef is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string((robust_mlog_rst).coef, ',') || '], ' |
| END || |
| CASE |
| WHEN (robust_mlog_rst).std_err is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string((robust_mlog_rst).std_err, ',') || '], ' |
| END || |
| CASE |
| WHEN (robust_mlog_rst).z_stats is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string((robust_mlog_rst).z_stats, ',') || '], ' |
| END || |
| CASE |
| WHEN (robust_mlog_rst).p_values is NULL |
| THEN '''{}'',' |
| ELSE 'ARRAY[' || array_to_string((robust_mlog_rst).p_values, ',') || '] ' |
| END; |
| RETURN insert_string; |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| --------------------------- Interface ---------------------------------- |
| |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| usage_string VARCHAR -- usage string |
| ) |
| RETURNS VARCHAR AS $$ |
| DECLARE |
| insert_string VARCHAR; |
| BEGIN |
| IF (usage_string = '' OR usage_string = 'help' OR usage_string = '?') THEN |
| insert_string := '' || |
| E'Summary \n' || |
| E'-----------------------------------------------------------------------------------------\n' || |
| E' Functionality: Calculate Huber-White robust statistics for multinomial logistic regression\n' || |
| E' The funciton first runs the regression to calculate the \n' || |
| E' coefficients and uses them to calculate the robust statistics \n' || |
| E' SELECT {schema_madlib}.robust_variance_mlogregr(''source_table'' \n' || |
| E' ,''output_table'' \n' || |
| E' ,''dependent_variable'' \n' || |
| E' ,''independent_variable'' \n' || |
| E' ,''reference_category'' \n' || |
| E' ,''group_cols'' \n' || |
| E' ,''max_iter'' \n' || |
| E' ,''optimizer'' \n' || |
| E' ,''tolerance'' \n' || |
| E' ,''verbose'' \n' || |
| E' );\n' || |
| E'For more details on function usage: \n' || |
| E'SELECT {schema_madlib}.robust_variance_mlogregr(''usage'') \n'; |
| ElSIF (usage_string = 'usage') THEN |
| insert_string := '' || |
| E'Usage\n' || |
| E'-----------------------------------------------------------------------------------------\n' || |
| E' To use this function \n' || |
| E' SELECT {schema_madlib}.robust_variance_mlogregr( \n' || |
| E' ''source_table'', -- Name of data table\n' || |
| E' ''output_table'', -- Name of result table\n' || |
| E' ''dependent_variable'', -- Name of column for dependent variables\n' || |
| E' ''independent_variable'', -- Name of column for independent variables\n' || |
| E' (can be any SQL expression that evaluates to an array) \n' || |
| E' ''reference_category'', -- [OPTIONAL] Reference category. Default is 0. \n' || |
| E' ''group_cols'', -- [OPTIONAL] Comma separated string with columns to group by. Default is NULL. \n' || |
| E' ''max_iter'', -- [OPTIONAL] The number of iterations used by the logistic regression solver. Default is 20. \n' || |
| E' ''optimizer'', -- [OPTIONAL] Name of the optimizer used in the logistic regression. Default is irls. \n' || |
| E' ''tolerance'', -- [OPTIONAL] The tolerance of the logistic regression optimizer. Default is 0.0001. \n' || |
| E' ''verbose'' -- [OPTIONAL] Should the optimizer print warning messages to the screen. Default is FALSE. \n' || |
| E' );\n' || |
| E'\n' || |
| E'Output:\n' || |
| E'-----------------------------------------------------------------------------------------\n' || |
| E' The output table (''output_table'' above) has the following columns\n' || |
| E' ''ref_category" INTEGER, -- Reference category\n' || |
| E' ''coef'' DOUBLE PRECISION[], -- Coefficients of regression \n' || |
| E' ''std_err'' DOUBLE PRECISION[], -- Huber-White standard errors\n' || |
| E' ''z_stats'' DOUBLE PRECISION[], -- Z-stats of the standard errors\n' || |
| E' ''p_values'' DOUBLE PRECISION[], -- p-values of the standard errors\n' || |
| E'\n' || |
| E''; |
| ELSE |
| insert_string := 'No such option. Run SELECT {schema_madlib}.robust_variance_mlogregr()'; |
| END IF; |
| RETURN insert_string; |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr() |
| RETURNS VARCHAR AS $$ |
| BEGIN |
| RETURN MADLIB_SCHEMA.robust_variance_mlogregr(''); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| --------------------------- Robust Multinomial Logistic Regression ---------------------------------- |
| |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR, |
| ref_category INTEGER, |
| input_group_cols VARCHAR, |
| max_iter INTEGER, |
| optimizer VARCHAR, |
| tolerance DOUBLE PRECISION |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6, $7, |
| $8, $9, FALSE); |
| $$ LANGUAGE sql; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR, |
| ref_category INTEGER, |
| input_group_cols VARCHAR, |
| max_iter INTEGER, |
| optimizer VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6, $7, |
| $8, 0.0001, FALSE); |
| $$ LANGUAGE sql; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR, |
| ref_category INTEGER, |
| input_group_cols VARCHAR, |
| max_iter INTEGER |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6, $7, |
| 'irls', 0.0001, FALSE); |
| $$ LANGUAGE sql; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR, |
| ref_category INTEGER, |
| input_group_cols VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, $6, 20, |
| 'irls', 0.0001, FALSE); |
| $$ LANGUAGE sql; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR, |
| ref_category INTEGER |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, $5, NULL, 20, |
| 'irls', 0.0001, FALSE); |
| $$ LANGUAGE sql; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.robust_variance_mlogregr( |
| source_table VARCHAR, |
| out_table VARCHAR, |
| dependent_varname VARCHAR, |
| independent_varname VARCHAR |
| ) |
| RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.robust_variance_mlogregr($1, $2, $3, $4, 0, NULL, 20, |
| 'irls', 0.0001, FALSE); |
| $$ LANGUAGE sql; |