src/ports/postgres/modules/regress/margins.py_in - madlib - Git at Google

 """
 @file margins.py_in

 @brief Marginal Effects with Interactions: Contains the main interface function
 and other functions that are common to the various methods and related to
 database constructs.

 @namespace marginal
 """
 import plpy
 from utilities.utilities import _assert
 from utilities.validate_args import table_exists
 from utilities.validate_args import columns_exist_in_table
 from utilities.validate_args import table_is_empty

 from utilities.utilities import py_list_to_sql_string
 from utilities.utilities import add_postfix
 from utilities.control import MinWarning

 from margins_builder import MarginalEffectsBuilder
 from margins_builder import TermBase

 import re
 #------------------------------------------------------------------------------


 def margins(schema_madlib, model_table, out_table, x_design=None,
             source_table=None, marginal_vars=None, *args, **kwargs):
     """
     Call the appropriate margins functions depending on the regression type
     found in the model table.

     Args:
         @param schema_madlib
         @param model_table
         @param out_table
         @param x_design
         @param source_table
         @param marginal_vars
         @param args
         @param kwargs

     Returns:
         None
     """
     with MinWarning('warning'):
         # 1) validate all arguments
         margins_validate_model_table(model_table)
         if model_table:
             model_summary_table = add_postfix(model_table, "_summary")
         else:
             raise ValueError("Margins error: Invalid regression model table name!")

         if not source_table:
             source_table = plpy.execute("SELECT source_table FROM {0}".
                                         format(model_summary_table))[0]['source_table']

         margins_validate_args(out_table, source_table, x_design)

         # 2) get the regression type ...
         reg_type = plpy.execute(
             "SELECT method from {0}".format(model_summary_table))[0]['method']
         margins_method = None
         if not reg_type:
             plpy.error("Margins error: Regression type cannot be obtained "
                        "from the model table. 'margins()' currently only "
                        "supported for linregr, logregr, mlogregr, or coxph")
         elif reg_type in ("linregr", "linear", "linear_regression",
                           "linear regression"):
             margins_method = margins_linregr
         elif reg_type in ("logregr", "logistic", "logistic_regression",
                           "logistic regression"):
             margins_method = margins_logregr
         elif reg_type in ("mlogregr", "multilogistic",
                           "multilogistic_regression",
                           "multinomial logistic regression"):
             margins_method = margins_mlogregr
         elif reg_type in ("coxph", "cox_proportional_hazards",
                           "cox prop", "cox proportional hazards"):
             margins_method = margins_coxph
         else:
             plpy.error("""
                 Margins not supported for this model table.
                 Re-run training {linregr, logregr, mlogregr, coxph}_train
                 before using margins().""")

         # 3) ... and call the appropriate sub-function
         margins_method(schema_madlib, model_table, out_table, source_table,
                        x_design, marginal_vars, *args, **kwargs)
 #------------------------------------------------------------------------------


 def margins_help(schema_madlib, message, **kwargs):
     """
     Help function for margins

     Args:
         @param schema_madlib
         @param message: string, Help message string
         @param kwargs

     Returns:
         String. Help/usage information
     """
     if not message:
         help_string = """
 -----------------------------------------------------------------------
                             SUMMARY
 -----------------------------------------------------------------------
 Functionality: Calculate marginal effects for binomial/multinomial logistic
 regression with interaction terms.
 A marginal effect (ME) or partial effect measures the effect on the
 conditional mean of a response (dependent variable) for a change in one of the
 regressors (independent variable).

 We currently only support margins for linear, logistic,
 multinomial logistic regression, and cox proportional hazard.

 For more details on function usage:
     SELECT {schema_madlib}.margins('usage')
             """
     elif message in ['usage', 'help', '?']:
         help_string = """
 -----------------------------------------------------------------------
                             USAGE
 -----------------------------------------------------------------------
  SELECT {schema_madlib}.margins(
     'model_table',              -- Name of table containing regression model
     'output_table',             -- Name of result table
     'x_design',                 -- Design of the independent variables
                                 --  (Optional, if not provided or NULL then independent variable list
                                          is assumed to have no interaction terms)
     'source_table',             -- Source table to apply marginal effects on
                                 --  (Optional, if not provided or NULL then assume training table as the source)
     'marginal_vars'             -- Variable identifiers (strings same as those in x_design) to
                                 --  calculate marginal effects on
                                 --  (Optional, if not provided or NULL then marginal effects for all
                                 --     basis variables will be returned)
     );

 -----------------------------------------------------------------------
                             OUTPUT
 -----------------------------------------------------------------------
 The output table ('output_table' above) has the following columns
     variables       INTEGER[],            -- Indices of the basis variables,
                                           --   will be same as marginal vars if provided
     margins         DOUBLE PRECISION[],   -- Marginal effects
     std_err         DOUBLE PRECISION[],   -- Standard errors using delta method
     z_stats         DOUBLE PRECISION[],   -- z-stats of the standard errors
     p_values        DOUBLE PRECISION[],   -- p-values of the standard errors
         """
     else:
         help_string = "No such option. Use {schema_madlib}.margins()"

     return help_string.format(schema_madlib=schema_madlib)
 # ---------------------------------------------------------------------


 def margins_validate_model_table(model_table):
     """
     Args:
         @param schema_madlib
         @param model_table

     Returns:

     """
     _assert(model_table and
             model_table.strip().lower() not in ('null', ''),
             "Margins error: Invalid regression model table name!")
     _assert(table_exists(model_table),
             "Margins error: Specified Model table ({0}) is missing! "
             "Rerun underlying regression".format(model_table))

     model_summary_table = add_postfix(model_table, '_summary')
     _assert(table_exists(model_summary_table),
             "Margins error: Summary for model table ({0}) is missing! "
             "Rerun underlying regression".format(model_summary_table))

     _assert(columns_exist_in_table(model_summary_table,
             ['method', 'source_table', 'out_table', 'dependent_varname',
              'independent_varname']),
             "Margins error: Invalid model summary table ({0})"
             " - some required columns missing".format(model_summary_table))
     return True
 #------------------------------------------------------------------------------


 def margins_validate_args(out_table, source_table, x_design=None,
                           **kwargs):
     _assert(out_table and
             out_table.strip().lower() not in ('null', ''),
             "Margins error: Invalid output table name!")
     _assert(not table_exists(out_table, only_first_schema=True),
             "Margins error: Output table already exists!")

     _assert(source_table and source_table.strip().lower() not in ('null', ''),
             "Margins error: Invalid data table name!")
     _assert(table_exists(source_table),
             "Margins error: Data table ({0}) is missing!".format(source_table))
     _assert(not table_is_empty(source_table),
             "Margins error: Data table ({0}) is empty!".format(source_table))
 # -------------------------------------------------------------------------


 def _get_categorical_set_unset_strings(x_design_parser, indep_name='x', shortened_output=False):
     """
     Return strings that represent the discrete difference strings (set and unset)
     string, along with the indices for all the categorical variables.
     """
     subset_indicators = x_design_parser.get_subset_indicator_terms()
     # subset_cat_indices represents the indicator variables
     # that are present in x_design_parser.subset_basis
     indicators_set_string = []
     indicators_unset_string = []
     create_indicator_string = lambda x: (py_list_to_sql_string(x,
                                          array_type="double precision"))
     for each_term in subset_indicators:
         set_list, unset_list = x_design_parser.get_discrete_diff_arrays(
             each_term, indep_name, shortened_output=shortened_output)
         set_list[:] = [i for i in set_list if i is not None]
         unset_list[:] = [i for i in unset_list if i is not None]
         # create two matrices - 1 with all set expressions,
         #                       1 with all unset expressions
         indicators_set_string.append(create_indicator_string(set_list))
         indicators_unset_string.append(create_indicator_string(unset_list))

     # create strings that represent the categorical discrete differences
     if subset_indicators:
         return [py_list_to_sql_string(i, array_type="double precision")
                 for i in (indicators_set_string, indicators_unset_string)]
     else:
         return ("NULL::double precision[]", "NULL::double precision[]")
 #------------------------------------------------------------------------------


 def _parse_marginal_vars(marginal_var_str):
     """
     Marginal vars is supposed to be a list of identifiers separated by ','.
     It's possible that the user inputs it as an array string
     eg: ARRAY[1, 2, 3, 4] or '{2, 4, 5}'. We strip out the 'ARRAY' if present in
     front and also strip square/curly brackets.
     """
     if str.lower(marginal_var_str.strip()[:6]) == "array[":
         marginal_var_str = marginal_var_str[5:]
     marginal_var_str = marginal_var_str.strip(" {}[]")
     marginal_var_list = []
     # reg = re.compile('".*?"|[^\,]+')
     # all_elem = reg.findall(marginal_var_str.strip())
     comma_reg = re.compile(r'((?:[^,"]|"[^"]*")+)')
     all_elem = [i.strip() for i in
                     comma_reg.split(marginal_var_str.strip())[1::2]]
     for each_str in all_elem:
         each_str = each_str.strip()
         if not (each_str.startswith('"') and each_str.endswith('"')):
             if not re.match(r'^\w+$', each_str):
                 raise ValueError("Invalid identifier '{0}' in marginal_var. "
                                  "Quote an identifier with non-alphanumeric "
                                  "characters using double quotes.".
                                  format(each_str))
             each_str = each_str.lower()
             # if each_str is not quoted then always keep a lower-case copy
             # (i.e. each_str is case-insensitive)
         # else:
         #     # if each_str is quoted then we keep the case intact, but
         #     # strip the double-quotes.
         #     each_str = each_str.strip('"')
         marginal_var_list.append(each_str)
     return marginal_var_list
 #------------------------------------------------------------------------------


 def _get_regression_arguments(schema_madlib, model_table):
     """
     Return all necessary argument values from the model and summary table of
     underlying regression

     Args:
         @params model_table: str, Name of the model table containing regression
                                 model
     Returns:
         Tuple:
             coef: list, Coefficient values from regression.
             independent_varname: str, The string passed as independent_varname
                                     in underlying regression.
             grouping_col: str, If grouping was used in the underlying regression,
                                 then this argument gives the grouping columns
                                 as a comma-separated list. This value is None
                                 if no grouping was used.
     """
     if model_table:
         model_summary_table = add_postfix(model_table, "_summary")
     else:
         raise ValueError("Margins error: Invalid regression model table name!")

     coef = plpy.execute("SELECT coef as x FROM {0}".format(model_table))[0]['x']
     independent_varname = plpy.execute("SELECT independent_varname as x FROM {0}".
                                        format(model_summary_table))[0]['x']
     if columns_exist_in_table(model_summary_table, ['grouping_col'], schema_madlib):
         grouping_col = plpy.execute("SELECT grouping_col AS x FROM {0}".
                                     format(model_summary_table))[0]['x']
     else:
         grouping_col = None
     if any(i is None for i in (coef, independent_varname)):
         plpy.error("Margins error: Model table ({0}) is missing important parameters. "
                    "Rerun logistic regression.".format(model_table))
     return coef, independent_varname, grouping_col
 #------------------------------------------------------------------------------


 def _get_parser(coef, x_design, marginal_vars=None):
     """
     Args:
         @param x_design: str, String representation of the design elements
                             (see MarginalEffectsBuilder for example)
         @param marginal_vars: str, comma-separated list of integers - indexes
                             of independent variables to output.
                             If None, then all variables are output.

     Returns:
         Tuple.
             x_design_parser: Object representing a marginal effects parser
             marginal_var_list: List of marginal variables

     """
     if marginal_vars:
         marginal_vars = marginal_vars.strip()

     n_indep = len(coef)
     if not x_design:
         x_design = ','.join(str(i+1) for i in range(n_indep))

     x_design_parser = MarginalEffectsBuilder(x_design)

     _assert(x_design_parser.n_total_terms == n_indep,
             "Margins error: Invalid x_design. Number of terms in x_design ({0}) "
             "not same as number of independent variables ({1})".
             format(x_design_parser.n_total_terms, n_indep))

     if marginal_vars:
         # basis terms are not necessarily continguous
         # example: basis_terms = [1, 2, 3, 5, 8, 11]
         # If all variables are needed in output then
         #   marginal_var_list would be [1, 2, 3, 4, 5, 6] (max value would be len(basis_terms))
         marginal_var_list = _parse_marginal_vars(marginal_vars)
         _assert(all(TermBase(x) in x_design_parser.inverse_basis_terms
                     for x in marginal_var_list),
                 "Margins error: marginal_vars argument contains values not "
                 "present as a basis variable in x_design")
     else:
         # marginal_var_list
         marginal_var_list = x_design_parser.get_sorted_basis_identifiers()

     # do not return any answer for reference terms
     ref_terms_removed = []
     if x_design_parser.reference_terms:
         for each_ref in x_design_parser.reference_terms.values():
             if each_ref.identifier in marginal_var_list:
                 marginal_var_list.remove(each_ref.identifier)
                 ref_terms_removed.append(each_ref.identifier)

     x_design_parser.subset_basis = marginal_var_list
     if ref_terms_removed:
         plpy.warning("""
             Warning: Reference terms (Variable(s): {ref_terms}) are present in output variables. "
             No marginal effects (and statistics) will be provided for these terms.
             """.format(ref_terms=', '.join(str(i) for i in ref_terms_removed)))

     return x_design_parser
 #------------------------------------------------------------------------------


 #------------------------------------------------------------------------------
 # Linear regression marginal effects
 #------------------------------------------------------------------------------
 def margins_linregr(schema_madlib, model_table, out_table, source_table,
                     x_design=None, marginal_vars=None, *args, **kwargs):
     """
     Args:
         @param schema_madlib
         @param model_table
         @param out_table
         @param x_design
         @param source_table
         @param marginal_vars

     Returns:
         None

     Raises:

     """
     # 1) Get all arguments and validate them
     coef, independent_varname, grouping_col = _get_regression_arguments(schema_madlib, model_table)

     # 2) Parse x_design and marginal_vars argument
     x_design_parser = _get_parser(coef, x_design, marginal_vars)
     # subset_basis contains indices for a 1-base array
     if not x_design_parser.subset_basis:
         raise ValueError("Invalid marginal variables selected.")

     # 3) Create a matrix representing the 2nd partial derivative
     #  (1st order wrt x and 2nd order wrt \beta). For categorical, this is
     # the discrete difference.
     indep_name = 'x'
     derivative_list = x_design_parser.create_2nd_derivative_matrix(indep_name)
     derivative_str = py_list_to_sql_string(
         [py_list_to_sql_string(i, array_type="double precision")
          for i in derivative_list],
         array_type="double precision")
     # grouping arguments
     if not grouping_col:
         grouping_str1 = ""
         grouping_str2 = ""
         using_str = ""
         join_str = ","
     else:
         grouping_str1 = grouping_col + ","
         grouping_str2 = "GROUP BY " + grouping_col
         using_str = "USING (" + grouping_col + ")"
         join_str = "LEFT OUTER JOIN "

     execute_string = """
         CREATE TABLE {out_table} AS
             SELECT
                 {grouping_str1}
                 {index_array} as variables,
                 (res).margins as margins,
                 (res).std_err as std_err,
                 (res).z_stats as z_stats,
                 (res).p_values as p_values
             FROM (
                 SELECT
                     {grouping_str1}
                     {schema_madlib}.__margins_int_linregr_agg(
                         {indep_name}
                         , coef
                         , variance_covariance
                         , {derivative_str})
                         as res
                 FROM (
                     SELECT
                         {grouping_str1}
                         {independent_varname} as {indep_name}
                     FROM {source_table} as s
                     ) q1 {join_str} {model_table} as m {using_str}
                 {grouping_str2}
                 ) q2
         """.format(schema_madlib=schema_madlib,
                    source_table=source_table,
                    out_table=out_table,
                    independent_varname=independent_varname,
                    indep_name=indep_name,
                    index_array=py_list_to_sql_string(
                        [x_design_parser.basis_terms[i]
                         for i in x_design_parser.subset_basis],
                        array_type="text"),
                    model_table=model_table,
                    derivative_str=derivative_str,
                    grouping_str1=grouping_str1,
                    grouping_str2=grouping_str2,
                    using_str=using_str,
                    join_str=join_str)
     plpy.execute(execute_string)


 #------------------------------------------------------------------------------
 # Logistic regression marginal effects
 #------------------------------------------------------------------------------
 def margins_logregr(schema_madlib, model_table, out_table, source_table,
                     x_design=None, marginal_vars=None,
                     *args, **kwargs):
     """
     Args:
         @param schema_madlib
         @param model_table
         @param out_table
         @param x_design
         @param source_table
         @param marginal_vars

     Returns:
         None

     Raises:
         ValueError
     """
     # 1) Get all arguments and validate them
     coef, independent_varname, grouping_col = _get_regression_arguments(schema_madlib, model_table)

     # 2) Get the x_design and marginal_vars arguments
     x_design_parser = _get_parser(coef, x_design, marginal_vars)
     # subset_basis contains indices for a 1-base array
     if not x_design_parser.subset_basis:
         raise ValueError("Invalid marginal variables selected.")

     # 3) Create a matrix representing the 2nd partial derivative
     #       (1st order wrt x and 2nd order wrt \beta)
     indep_name = 'x'
     if x_design_parser.contains_interaction():
         derivative_list = x_design_parser.create_2nd_derivative_matrix(indep_name)
         derivative_str = py_list_to_sql_string(
             [py_list_to_sql_string(i, array_type="double precision")
              for i in derivative_list],
             array_type="double precision")
     else:
         derivative_str = "NULL::double precision[]"

     # 4) Build the margins for indicator variables as discrete differences
     set_string, unset_string = _get_categorical_set_unset_strings(
         x_design_parser, indep_name, shortened_output=True)

     if x_design_parser.contains_indicators():
         zero_base_categorical_indices = py_list_to_sql_string(
             [i - 1 for i in x_design_parser.get_all_indicator_indices()],
             array_type="double precision")
     else:
         zero_base_categorical_indices = "NULL::double precision[]"

     # grouping arguments
     if not grouping_col:
         grouping_str1 = ""
         grouping_str2 = ""
         using_str = ""
         join_str = ","
     else:
         grouping_str1 = grouping_col + ","
         grouping_str2 = "GROUP BY " + grouping_col
         using_str = "USING (" + grouping_col + ")"
         join_str = "LEFT OUTER JOIN "

     execute_string = """
         CREATE TABLE {out_table} AS
             SELECT
                 {grouping_str1}
                 {index_array} as variables,
                 (res).margins as margins,
                 (res).std_err as std_err,
                 (res).z_stats as z_stats,
                 (res).p_values as p_values
             FROM (
                 SELECT
                     {grouping_str1}
                     {schema_madlib}.__margins_int_logregr_agg(
                         {indep_name}
                         , coef
                         , variance_covariance
                         , {zero_base_subset_indices}
                         , {derivative_str}
                         , {zero_base_categorical_indices}
                         , {set_string}
                         , {unset_string})
                         as res
                 FROM (
                     SELECT
                         {grouping_str1}
                         {independent_varname} as {indep_name}
                     FROM {source_table} as s
                 ) q1 {join_str} {model_table} as m {using_str}
                 {grouping_str2}
             ) q2
         """.format(schema_madlib=schema_madlib,
                    source_table=source_table,
                    out_table=out_table,
                    independent_varname=independent_varname,
                    indep_name=indep_name,
                    index_array=py_list_to_sql_string(
                         [x_design_parser.basis_terms[i]
                          for i in x_design_parser.subset_basis],
                         array_type="text"),
                    zero_base_categorical_indices=zero_base_categorical_indices,
                    model_table=model_table,
                    derivative_str=derivative_str,
                    set_string=set_string,
                    unset_string=unset_string,
                    grouping_str1=grouping_str1,
                    grouping_str2=grouping_str2,
                    using_str=using_str,
                    join_str=join_str,
                    zero_base_subset_indices=py_list_to_sql_string(
                        [i - 1 for i in x_design_parser.subset_basis],
                        array_type="double precision"))
     plpy.execute(execute_string)


 #------------------------------------------------------------------------------
 # Multinomial Logistic regression marginal effects
 #------------------------------------------------------------------------------
 def margins_mlogregr(schema_madlib, model_table, out_table, source_table,
                      x_design=None, marginal_vars=None,
                      *args, **kwargs):
     """
     Args:
         @param schema_madlib
         @param model_table
         @param out_table
         @param x_design
         @param source_table
         @param marginal_vars

     Returns:
         None

     Raises:

     """
     # 1) Get all arguments and validate them
     coef, independent_varname, grouping_col = _get_regression_arguments(schema_madlib, model_table)
     summary_table = add_postfix(model_table, "_summary")

     # 2) Parse x_design and marginal_vars argument
     x_design_parser = _get_parser(coef, x_design, marginal_vars)
     if not x_design_parser.subset_basis:
         raise ValueError("Invalid marginal variables selected.")

     # 3) Create a matrix representing the 2nd partial derivative
     #       (1st order wrt x and 2nd order wrt \beta)
     indep_name = 'x'
     if x_design_parser.contains_interaction():
         derivative_list = x_design_parser.create_2nd_derivative_matrix(indep_name)
         derivative_str = py_list_to_sql_string(
             [py_list_to_sql_string(i, array_type="double precision")
              for i in derivative_list],
             array_type="double precision")
     else:
         derivative_str = "NULL::double precision[]"

     # 4) Build the margins for indicator variables as discrete differences
     set_string, unset_string = _get_categorical_set_unset_strings(
         x_design_parser, indep_name, shortened_output=True)

     if x_design_parser.contains_indicators():
         zero_base_categorical_indices = py_list_to_sql_string(
             [i - 1 for i in x_design_parser.get_all_indicator_indices()],
             array_type="double precision")
     else:
         zero_base_categorical_indices = "NULL::double precision[]"

     execute_string = """
         CREATE TABLE {out_table} AS
             SELECT
                 category,
                 ref_category,
                 {index_array} as variables,
                 margins,
                 std_err,
                 z_stats,
                 p_values
             FROM
             (
                 SELECT
                     ref_category,
                     ({schema_madlib}.__mlogregr_format(
                         (res).margins, {num_basis}, num_categories, ref_category)
                     ).category,
                     ({schema_madlib}.__mlogregr_format(
                         (res).margins, {num_basis}, num_categories, ref_category)
                     ).coef as margins,
                     ({schema_madlib}.__mlogregr_format(
                         (res).std_err, {num_basis}, num_categories, ref_category)
                     ).coef as std_err,
                     ({schema_madlib}.__mlogregr_format(
                         (res).z_stats, {num_basis}, num_categories, ref_category)
                     ).coef as z_stats,
                     ({schema_madlib}.__mlogregr_format(
                         (res).p_values, {num_basis}, num_categories, ref_category)
                     ).coef as p_values
                 FROM
                 (
                     SELECT
                         num_categories,
                         ref_category,
                         {schema_madlib}.__margins_int_mlogregr_agg(
                             {indep_name}
                             , coef
                             , variance_covariance
                             , {zero_base_subset_indices}
                             , {derivative_str}
                             , {zero_base_categorical_indices}
                             , {set_string}
                             , {unset_string})
                             as res
                     FROM
                     (
                         SELECT
                             {independent_varname} as {indep_name}
                         FROM {source_table} as s
                     ) q1, {summary_table} as m
                     GROUP BY num_categories, ref_category
                 ) q2
             ) q3
         """.format(schema_madlib=schema_madlib,
                    source_table=source_table,
                    out_table=out_table,
                    independent_varname=independent_varname,
                    indep_name=indep_name,
                    index_array=py_list_to_sql_string(
                         [x_design_parser.basis_terms[i]
                          for i in x_design_parser.subset_basis],
                         array_type="text"),
                    zero_base_subset_indices=py_list_to_sql_string(
                        [i - 1 for i in x_design_parser.subset_basis],
                        array_type="double precision"),
                    zero_base_categorical_indices=zero_base_categorical_indices,
                    summary_table=summary_table,
                    derivative_str=derivative_str,
                    set_string=set_string,
                    unset_string=unset_string,
                    num_basis=len(x_design_parser.subset_basis))
     plpy.execute(execute_string)
 # -------------------------------------------------------------------------


 #------------------------------------------------------------------------------
 # Cox Proportional Hazards marginal effects
 #------------------------------------------------------------------------------
 def margins_coxph(schema_madlib, model_table, out_table, source_table,
                   x_design=None, marginal_vars=None,
                   *args, **kwargs):
     """
     Args:
         @param schema_madlib
         @param model_table
         @param out_table
         @param x_design
         @param source_table
         @param marginal_vars

     Returns:
         None

     Raises:

     """
     # 1) Get all arguments and validate them
     coef, independent_varname, grouping_col = _get_regression_arguments(schema_madlib, model_table)
     summary_table = add_postfix(model_table, "_summary")

     strata_cols = plpy.execute("SELECT strata from " + summary_table)[0]["strata"]

     if not strata_cols:
         strata_col_str = ""
         strata_cols = ""
     else:
         strata_col_str = "GROUP BY " + strata_cols
         strata_cols += ","

     # 2) Parse x_design and marginal_vars argument
     x_design_parser = _get_parser(coef, x_design, marginal_vars)
     if not x_design_parser.subset_basis:
         raise ValueError("Invalid marginal variables selected.")

     # 3) Create a matrix representing the 2nd partial derivative
     #       (1st order wrt x and 2nd order wrt \beta)
     indep_name = 'x'
     if x_design_parser.contains_interaction():
         derivative_list = x_design_parser.create_2nd_derivative_matrix(indep_name)
         derivative_str = py_list_to_sql_string(
             [py_list_to_sql_string(i, array_type="double precision")
              for i in derivative_list],
             array_type="double precision")
     else:
         derivative_str = "NULL::double precision[]"

     # 4) Build the margins for indicator variables as discrete differences
     set_string, unset_string = _get_categorical_set_unset_strings(
         x_design_parser, indep_name, shortened_output=True)

     if x_design_parser.contains_indicators():
         zero_base_categorical_indices = py_list_to_sql_string(
             [i - 1 for i in x_design_parser.get_all_indicator_indices()],
             array_type="double precision")
     else:
         zero_base_categorical_indices = "NULL::double precision[]"

     execute_string = """
         CREATE TABLE {out_table} AS
             SELECT
                 {index_array} as variables,
                 (res).margins as margins,
                 (res).std_err as std_err,
                 (res).z_stats as z_stats,
                 (res).p_values as p_values
             FROM (
                 SELECT
                     {schema_madlib}.__margins_int_coxph_agg(
                         {indep_name}
                         , coef
                         , hessian
                         , {zero_base_subset_indices}
                         , {derivative_str}
                         , {zero_base_categorical_indices}
                         , {set_string}
                         , {unset_string})
                         as res
                 FROM (
                     SELECT
                         {independent_varname} as {indep_name}
                     FROM {source_table} as s
                 ) q1, {model_table} as m
             ) q2
         """.format(schema_madlib=schema_madlib,
                    source_table=source_table,
                    out_table=out_table,
                    independent_varname=independent_varname,
                    indep_name=indep_name,
                    index_array=py_list_to_sql_string(
                         [x_design_parser.basis_terms[i]
                          for i in x_design_parser.subset_basis],
                         array_type="text"),
                    strata_col_str=strata_col_str,
                    strata_cols=strata_cols,
                    model_table=model_table,
                    zero_base_subset_indices=py_list_to_sql_string(
                        [i - 1 for i in x_design_parser.subset_basis],
                        array_type="double precision"),
                    zero_base_categorical_indices=zero_base_categorical_indices,
                    derivative_str=derivative_str,
                    set_string=set_string,
                    unset_string=unset_string,
                    num_basis=len(x_design_parser.subset_basis))
     plpy.execute(execute_string)
 # -------------------------------------------------------------------------