| # coding=utf-8 |
| |
| """ |
| @file marginal.py_in |
| |
| @brief Marginal Effects: Common functions |
| |
| @namespace marginal |
| """ |
| import plpy |
| from utilities.utilities import unique_string |
| from utilities.utilities import _string_to_array |
| from utilities.utilities import extract_keyvalue_params |
| from utilities.utilities import _assert |
| from utilities.utilities import py_list_to_sql_string |
| |
| from utilities.validate_args import table_exists |
| from utilities.validate_args import columns_exist_in_table |
| from utilities.validate_args import table_is_empty |
| |
| # use mad_vec to process arrays passed as strings in GPDB < 4.1 and PG < 9.0 |
| from utilities.utilities import __mad_version |
| version_wrapper = __mad_version() |
| string_to_array = version_wrapper.select_vecfunc() |
| array_to_string = version_wrapper.select_vec_return() |
| |
| # ======================================================================== |
| |
| |
| def _margins_common_validate_args(schema_madlib, source_table, out_table, |
| dependent_varname, independent_varname, |
| grouping_cols, marginal_vars, |
| max_iter, tolerance, **kwargs): |
| _assert(source_table and |
| source_table.strip().lower() not in ('null', ''), |
| "Margins error: Invalid data table name!") |
| _assert(table_exists(source_table), |
| "Margins error: Data table does not exist!") |
| _assert(not table_is_empty(source_table), |
| "Margins error: Data table is empty!") |
| _assert(out_table and |
| out_table.strip().lower() not in ('null', ''), |
| "Margins error: Invalid output table name!") |
| _assert(not table_exists(out_table, only_first_schema=True), |
| "Margins error: Output table already exists!") |
| _assert(not table_exists(out_table + '_summary', only_first_schema=True), |
| "Margins error: Output summary table already exists!") |
| _assert(dependent_varname and |
| dependent_varname.strip().lower() not in ('null', ''), |
| "Margins error: Invalid dependent column name!") |
| _assert(independent_varname and |
| independent_varname.strip().lower() not in ('null', ''), |
| "Margins error: Invalid independent column name!") |
| if grouping_cols: |
| _assert(grouping_cols.strip().lower() not in ('null', ''), |
| "Margins error: Invalid grouping columns name!") |
| _assert(columns_exist_in_table( |
| source_table, _string_to_array(grouping_cols), schema_madlib), |
| "Margins error: Grouping column does not exist!") |
| |
| if marginal_vars: |
| vec = string_to_array(marginal_vars, text=False) |
| _assert(min(vec) is not None, |
| "Margins error: NULL value found in marginal_vars!") |
| lower_dim = plpy.execute("select array_lower({0},1) as d from {1}".format( |
| independent_varname, source_table))[0]['d'] |
| upper_dim = plpy.execute("select array_upper({0},1) as d from {1}".format( |
| independent_varname, source_table))[0]['d'] |
| n = upper_dim - lower_dim + 1 |
| _assert(min(vec) >= 1, |
| "Margins error: All indices in the marginal coefficients " |
| "array must be >= 1.") |
| _assert(max(vec) <= n, |
| "Margins error: All indices in the marginal coefficients " |
| "array must be <= the number of independent variables.") |
| |
| _assert(max_iter is not None, |
| "Margins error: The max_iter should not be NULL!") |
| _assert(max_iter > 0, |
| "Margins error: Maximum number of iterations must be positive!") |
| _assert(tolerance is not None, |
| "Margins error: The tolerance should not be NULL!") |
| _assert(tolerance >= 0, |
| "Margins error: The tolerance cannot be negative!") |
| # ------------------------------------------------------------------------- |
| |
| |
| def _margins_logregr_validate_args(optimizer): |
| _assert(optimizer is not None, |
| "Margins error: Optimizer should not be NULL") |
| _assert(optimizer in ("irls", "cg", "igd"), |
| "Margins error: Optimizer does not exist. Must be 'newton'/'irls', 'cg', or 'igd'.") |
| |
| |
| # Main function call for marginal logisitc regression |
| def margins_logregr(schema_madlib, source_table, out_table, |
| dependent_varname, independent_varname, |
| grouping_cols, marginal_vars, max_iter, |
| optimizer, tolerance, verbose_mode, **kwargs): |
| """ |
| @brief A wrapper function for the various marginal regression analyzes. |
| |
| @param source_table String identifying the input table |
| @param out_table String identifying the output table to be created |
| @param dependent_varname Column containing the dependent variable |
| @param independent_varname Column containing the array of independent variables |
| @param grouping_cols Set of columns to group by. |
| @param marginal_vars Subset of independent variables to calculate marginal effects for. |
| @param max_iter Maximum number of iterations |
| @param optimzer Optimizer to be used (newton/irls, cg or idg) |
| @param tolerance Resiual tolerance |
| |
| @par |
| To include an intercept in the model, set one coordinate in the |
| <tt>independentVariables</tt> array to 1. |
| |
| @return void |
| |
| @usage |
| For function summary information. Run |
| sql> select margins_logregr('help'); |
| OR |
| sql> select margins_logregr(); |
| OR |
| sql> select margins_logregr('?'); |
| For function usage information. Run |
| sql> select margins_logregr('usage'); |
| """ |
| plpy.warning("This function has been deprecated and replaced by 'margins'") |
| |
| # Reset the message level to avoid random messages |
| old_msg_level = plpy.execute(""" |
| SELECT setting |
| FROM pg_settings |
| WHERE name='client_min_messages' |
| """)[0]['setting'] |
| plpy.execute('SET client_min_messages TO warning') |
| |
| if optimizer is not None and optimizer.lower() == 'newton': |
| optimizer = 'irls' |
| # Validate arguments |
| _margins_common_validate_args(schema_madlib, source_table, out_table, |
| dependent_varname, independent_varname, |
| grouping_cols, marginal_vars, max_iter, |
| tolerance) |
| _margins_logregr_validate_args(optimizer) |
| |
| group_col_str = 'NULL' if grouping_cols is None else "'" + grouping_cols + "'" |
| optimizer_str = 'NULL' if optimizer is None else "'" + optimizer + "'" |
| maxiter_str = 'NULL' if max_iter is None else max_iter |
| tolerance_str = 'NULL' if tolerance is None else tolerance |
| logr_out_table = "pg_temp." + unique_string() |
| |
| # No grouping |
| if not grouping_cols: |
| # Run logistic regression |
| plpy.execute(""" |
| SELECT {schema_madlib}.logregr_train( |
| '{source_table}', '{logr_out_table}', |
| '{dependent_varname}', '{independent_varname}', {group_col_str}, |
| {maxiter_str}, {optimizer_str}, {tolerance_str}, {verbose}) |
| """.format(schema_madlib=schema_madlib, source_table=source_table, |
| logr_out_table=logr_out_table, dependent_varname=dependent_varname, |
| independent_varname=independent_varname, group_col_str=group_col_str, |
| maxiter_str=maxiter_str, optimizer_str=optimizer_str, |
| tolerance_str=tolerance_str, verbose=verbose_mode)) |
| |
| # Rename the output summary table |
| plpy.execute("""CREATE TABLE {out_table}_summary AS |
| SELECT * FROM {logr_out_table}_summary""".format(**locals())) |
| |
| plpy.execute("""UPDATE {out_table}_summary SET out_table = '{out_table}' |
| """.format(out_table=out_table)) |
| |
| coef = plpy.execute("select coef from {0}".format(logr_out_table))[0]['coef'] |
| if coef is None: |
| plpy.error("Margins error: No fitting coefficients were computed!") |
| |
| num_depvars = plpy.execute(""" |
| SELECT array_upper(coef, 1) num_depvars from {0} |
| """.format(logr_out_table))[0]['num_depvars'] |
| if num_depvars is not None: |
| # If marginal variables are none. Then chose all variables |
| if not marginal_vars: |
| marginal_vars = range(1, num_depvars + 1) |
| else: |
| marginal_vars = map(int, string_to_array(marginal_vars)) |
| index_array = py_list_to_sql_string(marginal_vars, array_type="integer") |
| else: |
| index_array = 'NULL' |
| |
| # Run marginal effects for logistic regression |
| plpy.execute(""" |
| CREATE TABLE {out_table} AS |
| SELECT |
| {schema_madlib}.__sub_array((res).margins, {index_array}) AS margins, |
| {schema_madlib}.__sub_array((res).std_err, {index_array}) AS std_err, |
| {schema_madlib}.__sub_array((res).z_stats, {index_array}) AS z_stats, |
| {schema_madlib}.__sub_array((res).p_values, {index_array}) AS p_values |
| FROM |
| ( |
| SELECT |
| {schema_madlib}.marginal_logregr( |
| ({dependent_varname})::BOOLEAN, |
| {independent_varname}, |
| (SELECT coef FROM {logr_out_table})) AS res |
| FROM |
| {source_table} |
| ) t1 |
| """.format(schema_madlib=schema_madlib, |
| source_table=source_table, out_table=out_table, |
| dependent_varname=dependent_varname, |
| independent_varname=independent_varname, |
| logr_out_table=logr_out_table, index_array=index_array)) |
| |
| # Drop the output table of logregr_train() |
| plpy.execute('DROP TABLE IF EXISTS ' + logr_out_table) |
| plpy.execute("SET client_min_messages TO %s" % old_msg_level) |
| # ------------------------------------------------------------------------- |
| |
| |
| def margins_logregr_help(schema_madlib, message, **kwargs): |
| plpy.warning("This function has been deprecated and replaced by 'margins'") |
| |
| if not message: |
| help_string = """ |
| ----------------------------------------------------------------------- |
| SUMMARY |
| ----------------------------------------------------------------------- |
| Functionality: Calculate marginal effects for logistic regression |
| |
| For more details on function usage: |
| SELECT {schema_madlib}.margins_logregr('usage') |
| """ |
| elif message in ['usage', 'help', '?']: |
| help_string = """ |
| ----------------------------------------------------------------------- |
| USAGE |
| ----------------------------------------------------------------------- |
| SELECT {schema_madlib}.margins_logregr( |
| 'source_table', -- Name of data table |
| 'output_table', -- Name of result table |
| 'dependent_variable', -- Name of column for dependent variables |
| 'independent_variable', -- Name of column for independent variables |
| 'grouping_cols', -- An expression list used to group the input dataset into discrete groups. |
| -- (Optional, DEFAUT=NULL. |
| -- Not currently implemented. Any non-NULL value is ignored.) |
| marginal_vars, -- Indices of variables to calculate marginal effects on |
| -- (Optional, DEFAULT: All independent variables) |
| 'max_iter', -- [OPTIONAL] The number of iterations used by the logistic regression solver. Default is 20. |
| 'optimizer', -- [OPTIONAL] Name of the optimizer used in the logistic regression. Default is irls. |
| 'tolerance', -- [OPTIONAL] The tolerance of the logistic regression optimizer. Default is 0.0001. |
| 'verbose_mode' -- [OPTIONAL] Should the optimizer print warning messages to the screen. Default is FALSE. |
| ); |
| |
| ----------------------------------------------------------------------- |
| OUTUPT |
| ----------------------------------------------------------------------- |
| The output table ('output_table' above) has the following columns |
| margins DOUBLE PRECISION[], -- Marginal effects |
| std_err DOUBLE PRECISION[], -- Standard errors using delta method |
| z_stats DOUBLE PRECISION[], -- z-stats of the standard errors |
| p_values DOUBLE PRECISION[], -- p-values of the standard errors |
| |
| The output summary table is the same as logregr_train(), see also: |
| SELECT logregr_train('usage'); |
| """ |
| else: |
| help_string = "No such option. Use {schema_madlib}.margins_mlogregr()" |
| |
| return help_string.format(schema_madlib=schema_madlib) |
| |
| |
| # ======================================================================== |
| # ----------------------------------------------------------------------- |
| # Marginal Effects for multinomial logistic regression |
| # ----------------------------------------------------------------------- |
| |
| # ======================================================================== |
| # Input handling for multinomial logistic regression |
| |
| |
| def _margins_mlogregr_validate_args(schema_madlib, source_table, |
| dependent_varname, independent_varname, |
| ref_category, optimizer, **kwargs): |
| """ |
| Validate the arguments |
| """ |
| _assert(ref_category is not None, |
| "Margins: Reference category cannot be null!") |
| _assert(ref_category >= 0, |
| "Margins error: Reference category cannot be negative!") |
| |
| _assert(optimizer is not None, |
| "Margins error: Optimizer cannot be NULL!") |
| _assert(optimizer.lower() in ("irls"), |
| "Margins error: Optimizer does not exist. Must be 'newton'/'irls'.") |
| |
| result_w_null = plpy.execute(""" |
| SELECT DISTINCT {dep} AS cat |
| FROM {source} |
| WHERE {dep} is not NULL |
| """.format(source=source_table, |
| indep=independent_varname, |
| dep=dependent_varname)) |
| result_wo_null = plpy.execute(""" |
| SELECT DISTINCT {dep} AS cat |
| FROM {source} |
| WHERE {dep} is not NULL |
| AND NOT {madlib}.array_contains_null({indep}) |
| """.format(madlib=schema_madlib, source=source_table, |
| indep=independent_varname, |
| dep=dependent_varname)) |
| |
| categories_wo_null = set(i["cat"] for i in result_wo_null) |
| categories_w_null = set(i["cat"] for i in result_w_null) |
| |
| _assert(categories_wo_null == categories_w_null, |
| "Margins error: All observations of category set {0} contain " |
| "NULL values. These rows should be removed from the dataset " |
| "before proceeding.". |
| format(list(categories_w_null - categories_wo_null))) |
| |
| # ======================================================================== |
| |
| |
| # Main function call for marginal multinomial logistic regression |
| def margins_mlogregr_main(schema_madlib, source_table, |
| out_table, dependent_varname, |
| independent_varname, ref_category, |
| marginal_vars, grouping_cols, |
| optimizer_params, regr_coef=None, **kwargs): |
| """ |
| @brief A wrapper function for the marginal_logregr. |
| |
| @param source_table string, name of the input table |
| @param out_table string, name of the output table to be created |
| @param dependent_varname: string, Column containing the dependent variable |
| @param independent_varname string, Column containing the array of independent variables |
| @param ref_category int, Reference category for multinomial logistic regression |
| @param grouping_cols string, Set of columns to group by. |
| @param marginal_vars string, Subset of independent variables to calculate marginal effects for. |
| @param optimizer_params: string, Comma-separated string of optimizer parameters |
| Supported parameters: |
| max_iter Maximum number of iterations |
| optimzer Optimizer to be used (newton/irls, cg or idg) |
| tolerance Resiual tolerance |
| |
| To include an intercept in the model, set one coordinate in the |
| <tt>independentVariables</tt> array to 1. |
| |
| Returns: |
| None |
| """ |
| |
| allowed_param_types = {'max_iter': int, 'max_num_iterations': int, |
| 'optimizer': str, |
| 'tolerance': float} |
| default_optimizer_values = {'max_iter': 20, |
| 'optimizer': 'irls', |
| 'tolerance': 0.0001} |
| optimizer_param_dict = extract_keyvalue_params(optimizer_params, |
| allowed_param_types, |
| default_optimizer_values) |
| optimizer_param_dict.update(kwargs) |
| margins_mlogregr(schema_madlib, source_table, out_table, |
| dependent_varname, independent_varname, |
| ref_category, marginal_vars, |
| grouping_cols=grouping_cols, |
| regr_coef=regr_coef, |
| **optimizer_param_dict) |
| # ======================================================================== |
| |
| |
| def margins_mlogregr_new(schema_madlib, |
| model_table, |
| out_table, |
| marginal_vars=None, |
| **kwargs): |
| """ |
| Updated function interface to Margins for multinomial logistic regression. |
| |
| Input to this function is the model table obtained from mlogregr_train. All |
| necessary parameters are obtained from the model table. |
| |
| Args: |
| @param model_table: string, Name of the table returned by mlogregr_train |
| @param out_table: string, name of the output table to be created |
| @param marginal_vars: list, Integer array of indices of |
| """ |
| _assert(model_table is not None and |
| model_table.strip().lower() not in ('null', ''), |
| "Margins error: Invalid model table name") |
| _assert(table_exists(model_table), |
| "Margins error: Model table {0} does not exist". |
| format(model_table)) |
| _assert(table_exists(model_table + "_summary"), |
| "Margins error: Model Summary table {0} does not exist". |
| format(model_table + "_summary")) |
| |
| _assert(out_table is not None and |
| out_table.strip().lower() not in ('null', ''), |
| "Margins error: Invalid output table name") |
| |
| _assert(not table_exists(out_table, only_first_schema=True), |
| "Margins error: Output table {0}" |
| " already exists".format(str(out_table))) |
| |
| _assert(columns_exist_in_table(model_table, |
| ['coef']), |
| "Margins error: Invalid model data table" |
| " - column coef missing") |
| |
| _assert(columns_exist_in_table(model_table + '_summary', |
| ['source_table', 'dependent_varname', 'independent_varname', |
| 'optimizer_params', 'ref_category']), |
| "Margins error: Invalid model summary table" |
| " - some required columns missing") |
| |
| # info is a dict that contains source_table, ind_var, dep_var, |
| info = plpy.execute("SELECT * FROM {0}_summary".format(model_table))[0] |
| coef = string_to_array(plpy.execute("SELECT coef from {0}". |
| format(model_table))[0]['coef']) |
| info['out_table'] = out_table |
| margins_mlogregr_main(schema_madlib=schema_madlib, regr_coef=coef, |
| marginal_vars=marginal_vars, |
| grouping_cols=None, **info) |
| # ======================================================================== |
| |
| |
| def margins_mlogregr(schema_madlib, source_table, out_table, |
| dependent_varname, independent_varname, |
| ref_category, marginal_vars, |
| max_iter, optimizer, tolerance, |
| grouping_cols=None, regr_coef=None, |
| **kwargs): |
| """ |
| @brief A wrapper function for the various marginal regression analyzes. |
| |
| @param source_table String identifying the input table |
| @param out_table String identifying the output table to be created |
| @param dependent_varname Column containing the dependent variable |
| @param independent_varname Column containing the array of independent variables |
| @param ref_category Reference category for multinomial logistic regression |
| @param grouping_cols Set of columns to group by. |
| @param marginal_vars Subset of indices of independent variables to calculate marginal effects for. |
| @param max_iter Maximum number of iterations |
| @param optimzer Optimizer to be used (newton/irls, cg or idg) |
| @param tolerance Resiual tolerance |
| |
| @par |
| To include an intercept in the model, set one coordinate in the |
| <tt>independentVariables</tt> array to 1. |
| |
| @return void |
| |
| @usage |
| For function summary information. Run |
| sql> select margins_mlogregr('help'); |
| OR |
| sql> select margins_mlogregr(); |
| OR |
| sql> select margins_mlogregr('?'); |
| """ |
| # Reset the message level to avoid random messages |
| old_msg_level = plpy.execute(""" |
| SELECT setting |
| FROM pg_settings |
| WHERE name='client_min_messages' |
| """)[0]['setting'] |
| |
| if 'verbose_mode' in kwargs and kwargs['verbose_mode']: |
| plpy.execute('SET client_min_messages TO warning') |
| else: |
| plpy.execute('SET client_min_messages TO error') |
| |
| if optimizer.lower() == 'newton': |
| optimizer = 'irls' |
| |
| # Validate arguments |
| _margins_common_validate_args(schema_madlib, source_table, out_table, |
| dependent_varname, independent_varname, |
| grouping_cols, marginal_vars, |
| max_iter, tolerance) |
| |
| all_arguments = {'schema_madlib': schema_madlib, |
| 'source_table': source_table, |
| 'out_table': out_table, |
| 'dependent_varname': dependent_varname, |
| 'independent_varname': independent_varname, |
| 'ref_category': ref_category, |
| 'grouping_cols': grouping_cols, |
| 'marginal_vars': marginal_vars, |
| 'max_iter': max_iter, |
| 'optimizer': optimizer, |
| 'tolerance': tolerance} |
| _margins_mlogregr_validate_args(**all_arguments) |
| # NOTICE: * support was removed because other modules did not have it. |
| # Uncomment the following code if you want to re-add '*' support |
| |
| # Check for '*' in indepdendent variables |
| #if independent_varname == "*": |
| # all_arguments['independent_varname'] = \ |
| # _internal_return_all_except_dep_vars(schema_madlib, |
| # source_table, |
| # dependent_varname |
| # ) |
| |
| # No grouping |
| if not grouping_cols: |
| mlog_out_table = "pg_temp." + unique_string() |
| all_arguments['mlog_out_table'] = mlog_out_table |
| |
| # Run regression |
| plpy.execute(""" |
| SELECT {schema_madlib}.mlogregr_train( |
| '{source_table}', '{mlog_out_table}', |
| '{dependent_varname}', '{independent_varname}', {ref_category}, |
| 'max_iter={max_iter}, optimizer={optimizer}, tolerance={tolerance}') |
| """.format(**all_arguments)) |
| |
| # Rename the output summary table |
| plpy.execute("""CREATE TABLE {out_table}_summary AS |
| SELECT * FROM {mlog_out_table}_summary""".format(**locals())) |
| |
| plpy.execute("UPDATE {out_table}_summary SET out_table = '{out_table}'". |
| format(out_table=out_table)) |
| |
| num_categories = plpy.execute( |
| "SELECT count(DISTINCT {0}) as n_cat FROM {1}". |
| format(dependent_varname, source_table))[0]['n_cat'] |
| all_arguments['num_categories'] = num_categories |
| num_indepvars = plpy.execute(""" |
| SELECT array_upper(coef, 1) AS num_indepvars FROM {mlog_out_table} |
| """.format(mlog_out_table=mlog_out_table))[0]['num_indepvars'] |
| |
| if num_indepvars is not None: |
| # Note: The marginal_vars is a base 1 array |
| # If marginal variables are none. Then chose all variables |
| if not marginal_vars: |
| num_features = num_indepvars |
| marginal_vars = range(1, num_indepvars * (num_categories - 1) + 1) |
| else: |
| # For each independent var, get all the indices |
| num_features = len(marginal_vars) |
| marginal_vars_only = map(int, string_to_array(marginal_vars)) |
| marginal_vars = [] |
| for j in range(num_categories - 1): |
| for m in marginal_vars_only: |
| marginal_vars.append((m - 1) * (num_categories - 1) + j + 1) |
| |
| all_arguments['index_array'] = py_list_to_sql_string(marginal_vars, |
| "integer") |
| all_arguments['num_features'] = num_features |
| else: |
| all_arguments['index_array'] = 'NULL' |
| all_arguments['num_features'] = 'NULL' |
| |
| # Run Robust Variance |
| plpy.execute(""" |
| CREATE TABLE {out_table} AS |
| SELECT |
| ({schema_madlib}.__mlogregr_format( |
| margins, {num_features}, |
| {num_categories}, {ref_category}) |
| ).category AS category, |
| {ref_category} as ref_category, |
| ({schema_madlib}.__mlogregr_format( |
| margins, {num_features}, |
| {num_categories}, {ref_category}) |
| ).coef AS margins, |
| ({schema_madlib}.__mlogregr_format( |
| std_err, {num_features}, |
| {num_categories}, {ref_category}) |
| ).coef AS std_err, |
| ({schema_madlib}.__mlogregr_format( |
| z_stats, {num_features}, |
| {num_categories}, {ref_category}) |
| ).coef AS z_stats, |
| ({schema_madlib}.__mlogregr_format( |
| p_values, {num_features}, |
| {num_categories}, {ref_category}) |
| ).coef AS p_values |
| FROM |
| ( |
| SELECT |
| {schema_madlib}.__sub_array((res).margins, {index_array}) AS margins, |
| {schema_madlib}.__sub_array((res).std_err, {index_array}) AS std_err, |
| {schema_madlib}.__sub_array((res).z_stats, {index_array}) AS z_stats, |
| {schema_madlib}.__sub_array((res).p_values, {index_array}) AS p_values |
| FROM |
| ( |
| SELECT |
| {schema_madlib}.marginal_mlogregr( |
| ({dependent_varname})::INTEGER, |
| {num_categories}, |
| {ref_category}, |
| {independent_varname}, |
| (SELECT |
| {schema_madlib}.matrix_agg(coef ORDER BY category) |
| FROM {mlog_out_table}) |
| ) AS res |
| FROM {source_table} |
| ) t1 |
| ) t2 |
| """.format(**all_arguments)) |
| |
| # Drop the output table of mlogregr_train() |
| plpy.execute("DROP TABLE IF EXISTS " + mlog_out_table) |
| |
| plpy.execute("SET client_min_messages TO %s" % old_msg_level) |
| # ======================================================================== |
| |
| |
| def margins_mlogregr_help(schema_madlib, message, **kwargs): |
| """ |
| Help function for marginal_mlogregr |
| |
| Args: |
| @param schema_madlib |
| @param message: string, Help message string |
| @param kwargs |
| |
| Returns: |
| String. Help/usage information |
| """ |
| if not message: |
| help_string = """ |
| ----------------------------------------------------------------------- |
| SUMMARY |
| ----------------------------------------------------------------------- |
| Functionality: Calculate marginal effects for multinomial logistic regression |
| |
| For more details on function usage: |
| SELECT {schema_madlib}.margins_mlogregr('usage') |
| """ |
| elif message in ['usage', 'help', '?']: |
| help_string = """ |
| ----------------------------------------------------------------------- |
| USAGE |
| ----------------------------------------------------------------------- |
| SELECT {schema_madlib}.margins_mlogregr( |
| 'source_table', -- Name of data table |
| 'output_table', -- Name of result table |
| 'dependent_variable', -- Name of column for dependent variables |
| 'independent_variable', -- Name of column for independent variables |
| ref_category, -- Reference category for the multinomial logistic regression |
| -- (Optional, DEFAULT=0) |
| 'grouping_cols', -- An expression list used to group the input dataset into discrete groups. |
| -- (Optional, DEFAUT=NULL. |
| -- Not currently implemented. Any non-NULL value is ignored.) |
| marginal_vars, -- Indices of variables to calculate marginal effects on |
| -- (Optional, DEFAULT: All independent variables) |
| 'optimizer_params', -- The optimizer parameters as a comma-separated string |
| -- (Optional, DEFAULT: max_iter=20, optimizer=irls, tolerance=0.0001) |
| verbose_mode -- When TRUE, provides verbose output of the results of training. |
| (Optional, DEFAUT=FALSE) |
| ); |
| |
| ----------------------------------------------------------------------- |
| OUTUPT |
| ----------------------------------------------------------------------- |
| The output table ('output_table' above) has the following columns |
| margins DOUBLE PRECISION[], -- Marginal effects |
| std_err DOUBLE PRECISION[], -- Standard errors using delta method |
| z_stats DOUBLE PRECISION[], -- z-stats of the standard errors |
| p_values DOUBLE PRECISION[], -- p-values of the standard errors |
| |
| The output summary table is the same as mlogregr_train(), see also: |
| SELECT mlogregr_train('usage'); |
| """ |
| else: |
| help_string = "No such option. Use {schema_madlib}.margins_mlogregr()" |
| |
| return help_string.format(schema_madlib=schema_madlib) |
| |
| |
| |