src/ports/postgres/modules/regress/marginal.py_in - madlib - Git at Google

 # coding=utf-8

 """
 @file marginal.py_in

 @brief Marginal Effects: Common functions

 @namespace marginal

 @brief Marginal effects: Common functions
 """
 import plpy
 import re
 from utilities.utilities import __unique_string
 from utilities.utilities import _string_to_array
 from utilities.validate_args import table_exists
 from utilities.validate_args import columns_exist_in_table
 from utilities.validate_args import table_is_empty
 from utilities.validate_args import scalar_col_has_no_null
 from utilities.validate_args import get_cols

 # use mad_vec to process arrays passed as strings in GPDB < 4.1 and PG < 9.0
 from utilities.utilities import __mad_version
 version_wrapper = __mad_version()
 string_to_array = version_wrapper.select_vecfunc()
 array_to_string = version_wrapper.select_vec_return()

 # ========================================================================

 def _margins_logregr_validate_args(
     schema_madlib,
     source_table,
     out_table,
     dependent_varname,
     independent_varname,
     input_group_cols,
     marginal_vars,
     max_iter,
     optimizer,
     tolerance):
     """
     Validate the arguments
     """
     if not source_table or source_table in ('null', '') or \
             (not table_exists(source_table)):
         plpy.error("Margins error: Data table does not exist!")

     if table_is_empty(source_table):
         plpy.error("Margins error: Data table is empty!")

     if out_table.lower() in ('null', ''):
         plpy.error("Margins error: Invalid output table name!")

     if table_exists(out_table):
         plpy.error("Margins error: Output table already exists!")

     if not dependent_varname or dependent_varname.lower() in ('null', ''):
         plpy.error("Margins error: Invalid dependent column name!")

     if not scalar_col_has_no_null(source_table, dependent_varname):
         plpy.error("Margins error: Dependent variable has Null values! \
                     Please filter out Null values before using this function!")

     if not independent_varname or independent_varname.lower() in ('null', ''):
         plpy.error("Margins error: Invalid independent column name!")

     if input_group_cols and input_group_cols.lower() in ('null', ''):
         plpy.error("Margins error: Invalid grouping columns name!")

     if max_iter <= 0:
         plpy.error("Margins error: Maximum number of iterations must be positive!")

     if tolerance < 0:
         plpy.error("Margins error: The tolerance cannot be negative!")

     if optimizer is None:
         optimizer = "irls"
     elif optimizer not in ("irls", "cg", "igd"):
         plpy.error(""" Margins error: Optimizer does not exist.
                    Must be 'newton'/'irls', 'cg', or 'igd'.
                    """)

     if optimizer == "newton":
         optimizer = "irls"
     elif optimizer not in ("irls", "cg", "igd"):
         plpy.error(""" Margins error: Optimizer %s does not exist.
                    Must be 'newton'/'irls', 'cg', or 'igd'.
                    """ % optimizer)
     if input_group_cols:
         if not columns_exist_in_table(source_table,
                                _string_to_array(input_group_cols), schema_madlib):
             plpy.error("Margins error: Grouping column does not exist!")


 # ========================================================================
 # Returns relevant ouputs
 # Note: The marginal_vars index list is 1 base
 def _internal_get_margins_from_array(marginal_coef, marginal_vars):

     if min(marginal_vars) < 1:
         plpy.error('''Margins error: All indices in the marginal coefficients array must be >= 1.''')
     if max(marginal_vars) > len(marginal_coef):
         plpy.error('''Margins error: All indices in the marginal coefficients array must be <= the number of independent variables.''')

     return [marginal_coef[i-1] for i in marginal_vars]


 # ========================================================================
 # Convert numeric array to SQL string
 def _internal_py_array_to_sql_string(array):
     return "ARRAY[%s]" % ','.join(map(str, array))


 # ========================================================================
 # Convert numeric array to SQL string
 def _internal_return_all_except_dep_vars(schema_madlib,
                                           source_table,
                                           dependent_varname):
     cols = get_cols(source_table, schema_madlib)
     outstr_array = []
     for i in range(len(cols)):
         if cols[i] != dependent_varname:
             outstr_array.append(cols[i])

     return "ARRAY[%s]" % ','.join(outstr_array)

 # ========================================================================
 # Main function call for marginal logisitc regression
 def margins_logregr(schema_madlib, source_table, out_table
                     , dependent_varname, input_independent_varname
                     , input_group_cols, marginal_vars
                     , max_iter, optimizer, tolerance , **kwargs):
     """
     @brief A wrapper function for the various marginal regression analyzes.

     @param source_table String identifying the input table
     @param out_table String identifying the output table to be created
     @param dependent_varname Column containing the dependent variable
     @param independent_varname Column containing the array of independent variables
     @param grouping_cols Set of columns to group by.
     @param marginal_vars Subset of independent variables to calculate marginal effects for.
     @param max_iter Maximum number of iterations
     @param optimzer Optimizer to be used (newton/irls, cg or idg)
     @param tolerance Resiual tolerance

     @par
     To include an intercept in the model, set one coordinate in the
     <tt>independentVariables</tt> array to 1.

     @return void

     @usage
     For function summary information. Run
     sql> select margins_logregr('help');
     OR
     sql> select margins_logregr();
     OR
     sql> select margins_logregr('?');
     For function usage information. Run
     sql> select margins_logregr('usage');
     """
     # Reset the message level to avoid random messages
     old_msg_level = plpy.execute("""
                                   SELECT setting
                                   FROM pg_settings
                                   WHERE name='client_min_messages'
                                   """)[0]['setting']
     plpy.execute('SET client_min_messages TO warning')

     # Validate arguments
     _margins_logregr_validate_args(schema_madlib,
                                 source_table,
                                 out_table,
                                 dependent_varname,
                                 input_independent_varname,
                                 input_group_cols,
                                 marginal_vars,
                                 max_iter,
                                 optimizer,
                                 tolerance);

     # NOTICE: * support was removed because other modules did not have it.
     # Uncomment the following code if you want to re-add '*' support

     # Check for '*' in indepdendent variables
     #if input_independent_varname == "*":
     #  independent_varname = _internal_return_all_except_dep_vars(
     #                      schema_madlib,
     #                      source_table,
     #                      dependent_varname
     #                      )
     #else:

     independent_varname = input_independent_varname

     # No grouping
     if not input_group_cols:
         # Step 1: Create output table with appropriate column names
         plpy.execute("""
           CREATE TABLE %s (
           margins DOUBLE PRECISION[],
           std_err          DOUBLE PRECISION[],
           t_stats          DOUBLE PRECISION[],
           p_values         DOUBLE PRECISION[])""" % out_table
         )

         # Step 2: Run the regression if the coefficients are not provided
         temp_table = __unique_string();
         plpy.execute("""
         SELECT %s.logregr_train('%s',
                                '%s',
                                '%s',
                                '%s',
                                NULL,
                                %s,
                                '%s',
                                %s)
         """ % (schema_madlib,
                source_table,
                temp_table,
                dependent_varname,
                independent_varname,
                max_iter,
                optimizer,
                tolerance))

         regr_coef = plpy.execute('SELECT coef from %s' % temp_table)
         regr_coef = string_to_array(regr_coef[0]['coef'])

         # If marginal variables are none. Then chose all variables
         if not marginal_vars:
           marginal_vars = range(1,len(regr_coef)+1)
         else:
           marginal_vars = map(int, string_to_array(marginal_vars))

         plpy.execute('DROP TABLE IF EXISTS %s' % temp_table)

         # Step 3: Compute the marginal effects
         marginal_log_rst = plpy.execute("""
                   SELECT (%s.marginal_logregr(
                               (%s)::BOOLEAN,
                                %s,
                                %s)).* FROM %s
                 """ % (
                       schema_madlib,
                       dependent_varname,
                       independent_varname,
                       _internal_py_array_to_sql_string(regr_coef),
                       source_table))

         # convert SQL array to a python list
         #  (taking into account GP4.1 exception behavior)
         margins = string_to_array(marginal_log_rst[0]["margins"])
         std_err = string_to_array(marginal_log_rst[0]["std_err"])
         p_values = string_to_array(marginal_log_rst[0]["p_values"])
         t_stats = string_to_array(marginal_log_rst[0]["t_stats"])

         # get subset of indices that need to be returned as solution
         margins  = _internal_get_margins_from_array(margins, marginal_vars)
         std_err = _internal_get_margins_from_array(std_err, marginal_vars)
         t_stats = _internal_get_margins_from_array(t_stats, marginal_vars)
         p_values = _internal_get_margins_from_array(p_values, marginal_vars)

         # Step 4:
         insert_string = """
                     INSERT INTO %s VALUES (%s,  %s, %s, %s)
                     """ % (out_table,
                            _internal_py_array_to_sql_string(margins),
                            _internal_py_array_to_sql_string(std_err),
                            _internal_py_array_to_sql_string(t_stats),
                            _internal_py_array_to_sql_string(p_values))


         # Step 4: Clean up output to make sure infinity and nan are cast properly
         insert_string = re.sub('Infinity', "'Infinity'::double precision", insert_string)
         insert_string = re.sub('infinity', "'Infinity'::double precision", insert_string)
         insert_string = re.sub('Nan', "'Nan'::double precision", insert_string)
         insert_string = re.sub('nan', "'Nan'::double precision", insert_string)
         insert_string = re.sub('NaN', "'Nan'::double precision", insert_string)
         plpy.execute(insert_string)

         plpy.execute("SET client_min_messages TO %s" % old_msg_level)

 # ========================================================================
 # Input handling for multinomial logistic regression
 def _margins_mlogregr_validate_args(
     schema_madlib,
     source_table,
     out_table,
     dependent_varname,
     independent_varname,
     ref_category,
     input_group_cols,
     marginal_vars,
     max_iter,
     optimizer,
     tolerance):
     """
     Validate the arguments
     """
     if not source_table or source_table in ('null', '') or \
             (not table_exists(source_table)):
         plpy.error("Margins error: Data table does not exist!")

     if table_is_empty(source_table):
         plpy.error("Margins error: Data table is empty!")

     if out_table.lower() in ('null', ''):
         plpy.error("Margins error: Invalid output table name!")

     if table_exists(out_table):
         plpy.error("Margins error: Output table already exists!")

     if not dependent_varname or dependent_varname.lower() in ('null', ''):
         plpy.error("Margins error: Invalid dependent column name!")

     if not scalar_col_has_no_null(source_table, dependent_varname):
         plpy.error("Margins error: Dependent variable has Null values! \
                     Please filter out Null values before using this function!")

     if not independent_varname or independent_varname.lower() in ('null', ''):
         plpy.error("Margins error: Invalid independent column name!")

     if input_group_cols and input_group_cols.lower() in ('null', ''):
         plpy.error("Margins error: Invalid grouping columns name!")

     if ref_category is None:
         plpy.error("Margins: Reference category cannot be null!")

     if ref_category < 0:
         plpy.error("Margins error: Reference category cannot be negative!")

     if max_iter <= 0:
         plpy.error("Margins error: Maximum number of iterations must be positive!")

     if tolerance < 0:
         plpy.error("Margins error: The tolerance cannot be negative!")

     if optimizer is None:
         plpy.error(""" Margins error: Optimizer does not exist.
                    Must be 'newton'/'irls'.
                    """)

     if optimizer == "newton":
         optimizer = "irls"
     elif optimizer not in ("irls"):
         plpy.error(""" Margins error: Optimizer does not exist.
                    Must be 'newton'/'irls'.
                    """)
     if input_group_cols:
         if not columns_exist_in_table(source_table,
                                _string_to_array(input_group_cols), schema_madlib):
             plpy.error("Margins error: Grouping column does not exist!")


 # ========================================================================
 # Main function call for marginal logisitc regression
 def margins_mlogregr(schema_madlib, source_table, out_table
                     , dependent_varname, input_independent_varname
                     , ref_category, input_group_cols, marginal_vars
                     , max_iter, optimizer, tolerance , **kwargs):
     """
     @brief A wrapper function for the various marginal regression analyzes.

     @param source_table String identifying the input table
     @param out_table String identifying the output table to be created
     @param dependent_varname Column containing the dependent variable
     @param independent_varname Column containing the array of independent variables
     @param ref_category Reference category for multinomial logistic regression
     @param grouping_cols Set of columns to group by.
     @param marginal_vars Subset of independent variables to calculate marginal effects for.
     @param max_iter Maximum number of iterations
     @param optimzer Optimizer to be used (newton/irls, cg or idg)
     @param tolerance Resiual tolerance

     @par
     To include an intercept in the model, set one coordinate in the
     <tt>independentVariables</tt> array to 1.

     @return void

     @usage
     For function summary information. Run
     sql> select margins_mlogregr('help');
     OR
     sql> select margins_mlogregr();
     OR
     sql> select margins_mlogregr('?');
     """
     # Reset the message level to avoid random messages
     old_msg_level = plpy.execute("""
                                   SELECT setting
                                   FROM pg_settings
                                   WHERE name='client_min_messages'
                                   """)[0]['setting']
     plpy.execute('SET client_min_messages TO warning')

     # Validate arguments
     _margins_mlogregr_validate_args(schema_madlib,
                                 source_table,
                                 out_table,
                                 dependent_varname,
                                 input_independent_varname,
                                 ref_category,
                                 input_group_cols,
                                 marginal_vars,
                                 max_iter,
                                 optimizer,
                                 tolerance);

     # NOTICE: * support was removed because other modules did not have it.
     # Uncomment the following code if you want to re-add '*' support

     # Check for '*' in indepdendent variables
     #if input_independent_varname == "*":
     #  independent_varname = _internal_return_all_except_dep_vars(
     #                      schema_madlib,
     #                      source_table,
     #                      dependent_varname
     #                      )
     #else:

     independent_varname = input_independent_varname

     # No grouping
     if not input_group_cols:
         # Step 1: Create output table with appropriate column names
         plpy.execute("""
           CREATE TABLE %s (
           margins DOUBLE PRECISION[],
           std_err          DOUBLE PRECISION[],
           t_stats          DOUBLE PRECISION[],
           p_values         DOUBLE PRECISION[])""" % out_table
         )

         # Step 2: Run the regression if the coefficients are not provided
         regr_coef = plpy.execute("""
         SELECT coef from %s.mlogregr('%s',
                            '%s',
                            '%s',
                            %s,
                            '%s',
                            %s,
                            %s)
         """ % (schema_madlib,
                source_table,
                dependent_varname,
                independent_varname,
                max_iter,
                optimizer,
                tolerance,
                ref_category))

         regr_coef = string_to_array(regr_coef[0]['coef'])
         num_categories = plpy.execute(\
                     'SELECT count(DISTINCT %s) as num_cats FROM %s' %
                                       (dependent_varname, source_table))
         num_categories = int(num_categories[0]['num_cats'])

         # If marginal variables are none. Then chose all variables
         if not marginal_vars:
           size = len(regr_coef)
           marginal_vars = range(1,size+1)
         else:
           # For each independent var, get out all the indices
           marginal_vars_only = map(int, string_to_array(marginal_vars))
           marginal_vars = []

           # Calculate the number of idep_vars
           # Note: We assume that the reference variables are not being
           # output from the multinomial regression
           num_indep_vars = len(regr_coef) / (num_categories-1)

           for j in range(num_categories-1):
             for m in marginal_vars_only:
               # Note: The marginal_vars is a base 1 array
               marginal_vars.append(j * num_indep_vars + m)

         # Step 3: Compute the marginal effects
         marginal_log_rst = plpy.execute("""
                   SELECT (%s.marginal_mlogregr(
                               (%s)::INTEGER,
                                %s,
                                %s,
                                %s,
                                %s)).* FROM %s
                 """ % (
                       schema_madlib,
                       dependent_varname,
                       num_categories,
                       ref_category,
                       independent_varname,
                       _internal_py_array_to_sql_string(regr_coef),
                       source_table))

         margins = string_to_array(marginal_log_rst[0]["margins"])
         std_err = string_to_array(marginal_log_rst[0]["std_err"])
         p_values = string_to_array(marginal_log_rst[0]["p_values"])
         t_stats = string_to_array(marginal_log_rst[0]["t_stats"])

         margins  = _internal_get_margins_from_array(margins, marginal_vars)
         std_err = _internal_get_margins_from_array(std_err, marginal_vars)
         t_stats = _internal_get_margins_from_array(t_stats, marginal_vars)
         p_values = _internal_get_margins_from_array(p_values, marginal_vars)

         # Step 4: Insert into table
         insert_string = """
                     INSERT INTO %s VALUES (%s,  %s, %s, %s)
                     """ % (out_table,
                            _internal_py_array_to_sql_string(margins),
                            _internal_py_array_to_sql_string(std_err),
                            _internal_py_array_to_sql_string(t_stats),
                            _internal_py_array_to_sql_string(p_values))


         # Step 5: Clean up output to make sure infinity and nan are cast properly
         insert_string = re.sub('Infinity', "'Infinity'::double precision", insert_string)
         insert_string = re.sub('inf', "'Infinity'::double precision", insert_string)
         insert_string = re.sub('Nan', "'Nan'::double precision", insert_string)
         plpy.execute(insert_string)

         plpy.execute("SET client_min_messages TO %s" % old_msg_level)
	# coding=utf-8

	"""
	@file marginal.py_in

	@brief Marginal Effects: Common functions

	@namespace marginal

	@brief Marginal effects: Common functions
	"""
	import plpy
	import re
	from utilities.utilities import __unique_string
	from utilities.utilities import _string_to_array
	from utilities.validate_args import table_exists
	from utilities.validate_args import columns_exist_in_table
	from utilities.validate_args import table_is_empty
	from utilities.validate_args import scalar_col_has_no_null
	from utilities.validate_args import get_cols

	# use mad_vec to process arrays passed as strings in GPDB < 4.1 and PG < 9.0
	from utilities.utilities import __mad_version
	version_wrapper = __mad_version()
	string_to_array = version_wrapper.select_vecfunc()
	array_to_string = version_wrapper.select_vec_return()

	# ========================================================================

	def _margins_logregr_validate_args(
	schema_madlib,
	source_table,
	out_table,
	dependent_varname,
	independent_varname,
	input_group_cols,
	marginal_vars,
	max_iter,
	optimizer,
	tolerance):
	"""
	Validate the arguments
	"""
	if not source_table or source_table in ('null', '') or \
	(not table_exists(source_table)):
	plpy.error("Margins error: Data table does not exist!")

	if table_is_empty(source_table):
	plpy.error("Margins error: Data table is empty!")

	if out_table.lower() in ('null', ''):
	plpy.error("Margins error: Invalid output table name!")

	if table_exists(out_table):
	plpy.error("Margins error: Output table already exists!")

	if not dependent_varname or dependent_varname.lower() in ('null', ''):
	plpy.error("Margins error: Invalid dependent column name!")

	if not scalar_col_has_no_null(source_table, dependent_varname):
	plpy.error("Margins error: Dependent variable has Null values! \
	Please filter out Null values before using this function!")

	if not independent_varname or independent_varname.lower() in ('null', ''):
	plpy.error("Margins error: Invalid independent column name!")

	if input_group_cols and input_group_cols.lower() in ('null', ''):
	plpy.error("Margins error: Invalid grouping columns name!")

	if max_iter <= 0:
	plpy.error("Margins error: Maximum number of iterations must be positive!")

	if tolerance < 0:
	plpy.error("Margins error: The tolerance cannot be negative!")

	if optimizer is None:
	optimizer = "irls"
	elif optimizer not in ("irls", "cg", "igd"):
	plpy.error(""" Margins error: Optimizer does not exist.
	Must be 'newton'/'irls', 'cg', or 'igd'.
	""")

	if optimizer == "newton":
	optimizer = "irls"
	elif optimizer not in ("irls", "cg", "igd"):
	plpy.error(""" Margins error: Optimizer %s does not exist.
	Must be 'newton'/'irls', 'cg', or 'igd'.
	""" % optimizer)
	if input_group_cols:
	if not columns_exist_in_table(source_table,
	_string_to_array(input_group_cols), schema_madlib):
	plpy.error("Margins error: Grouping column does not exist!")


	# ========================================================================
	# Returns relevant ouputs
	# Note: The marginal_vars index list is 1 base
	def _internal_get_margins_from_array(marginal_coef, marginal_vars):

	if min(marginal_vars) < 1:
	plpy.error('''Margins error: All indices in the marginal coefficients array must be >= 1.''')
	if max(marginal_vars) > len(marginal_coef):
	plpy.error('''Margins error: All indices in the marginal coefficients array must be <= the number of independent variables.''')

	return [marginal_coef[i-1] for i in marginal_vars]


	# ========================================================================
	# Convert numeric array to SQL string
	def _internal_py_array_to_sql_string(array):
	return "ARRAY[%s]" % ','.join(map(str, array))


	# ========================================================================
	# Convert numeric array to SQL string
	def _internal_return_all_except_dep_vars(schema_madlib,
	source_table,
	dependent_varname):
	cols = get_cols(source_table, schema_madlib)
	outstr_array = []
	for i in range(len(cols)):
	if cols[i] != dependent_varname:
	outstr_array.append(cols[i])

	return "ARRAY[%s]" % ','.join(outstr_array)

	# ========================================================================
	# Main function call for marginal logisitc regression
	def margins_logregr(schema_madlib, source_table, out_table
	, dependent_varname, input_independent_varname
	, input_group_cols, marginal_vars
	, max_iter, optimizer, tolerance , **kwargs):
	"""
	@brief A wrapper function for the various marginal regression analyzes.

	@param source_table String identifying the input table
	@param out_table String identifying the output table to be created
	@param dependent_varname Column containing the dependent variable
	@param independent_varname Column containing the array of independent variables
	@param grouping_cols Set of columns to group by.
	@param marginal_vars Subset of independent variables to calculate marginal effects for.
	@param max_iter Maximum number of iterations
	@param optimzer Optimizer to be used (newton/irls, cg or idg)
	@param tolerance Resiual tolerance

	@par
	To include an intercept in the model, set one coordinate in the
	<tt>independentVariables</tt> array to 1.

	@return void

	@usage
	For function summary information. Run
	sql> select margins_logregr('help');
	OR
	sql> select margins_logregr();
	OR
	sql> select margins_logregr('?');
	For function usage information. Run
	sql> select margins_logregr('usage');
	"""
	# Reset the message level to avoid random messages
	old_msg_level = plpy.execute("""
	SELECT setting
	FROM pg_settings
	WHERE name='client_min_messages'
	""")[0]['setting']
	plpy.execute('SET client_min_messages TO warning')

	# Validate arguments
	_margins_logregr_validate_args(schema_madlib,
	source_table,
	out_table,
	dependent_varname,
	input_independent_varname,
	input_group_cols,
	marginal_vars,
	max_iter,
	optimizer,
	tolerance);

	# NOTICE: * support was removed because other modules did not have it.
	# Uncomment the following code if you want to re-add '*' support

	# Check for '*' in indepdendent variables
	#if input_independent_varname == "*":
	# independent_varname = _internal_return_all_except_dep_vars(
	# schema_madlib,
	# source_table,
	# dependent_varname
	# )
	#else:

	independent_varname = input_independent_varname

	# No grouping
	if not input_group_cols:
	# Step 1: Create output table with appropriate column names
	plpy.execute("""
	CREATE TABLE %s (
	margins DOUBLE PRECISION[],
	std_err DOUBLE PRECISION[],
	t_stats DOUBLE PRECISION[],
	p_values DOUBLE PRECISION[])""" % out_table
	)

	# Step 2: Run the regression if the coefficients are not provided
	temp_table = __unique_string();
	plpy.execute("""
	SELECT %s.logregr_train('%s',
	'%s',
	'%s',
	'%s',
	NULL,
	%s,
	'%s',
	%s)
	""" % (schema_madlib,
	source_table,
	temp_table,
	dependent_varname,
	independent_varname,
	max_iter,
	optimizer,
	tolerance))

	regr_coef = plpy.execute('SELECT coef from %s' % temp_table)
	regr_coef = string_to_array(regr_coef[0]['coef'])

	# If marginal variables are none. Then chose all variables
	if not marginal_vars:
	marginal_vars = range(1,len(regr_coef)+1)
	else:
	marginal_vars = map(int, string_to_array(marginal_vars))

	plpy.execute('DROP TABLE IF EXISTS %s' % temp_table)

	# Step 3: Compute the marginal effects
	marginal_log_rst = plpy.execute("""
	SELECT (%s.marginal_logregr(
	(%s)::BOOLEAN,
	%s,
	%s)).* FROM %s
	""" % (
	schema_madlib,
	dependent_varname,
	independent_varname,
	_internal_py_array_to_sql_string(regr_coef),
	source_table))

	# convert SQL array to a python list
	# (taking into account GP4.1 exception behavior)
	margins = string_to_array(marginal_log_rst[0]["margins"])
	std_err = string_to_array(marginal_log_rst[0]["std_err"])
	p_values = string_to_array(marginal_log_rst[0]["p_values"])
	t_stats = string_to_array(marginal_log_rst[0]["t_stats"])

	# get subset of indices that need to be returned as solution
	margins = _internal_get_margins_from_array(margins, marginal_vars)
	std_err = _internal_get_margins_from_array(std_err, marginal_vars)
	t_stats = _internal_get_margins_from_array(t_stats, marginal_vars)
	p_values = _internal_get_margins_from_array(p_values, marginal_vars)

	# Step 4:
	insert_string = """
	INSERT INTO %s VALUES (%s, %s, %s, %s)
	""" % (out_table,
	_internal_py_array_to_sql_string(margins),
	_internal_py_array_to_sql_string(std_err),
	_internal_py_array_to_sql_string(t_stats),
	_internal_py_array_to_sql_string(p_values))


	# Step 4: Clean up output to make sure infinity and nan are cast properly
	insert_string = re.sub('Infinity', "'Infinity'::double precision", insert_string)
	insert_string = re.sub('infinity', "'Infinity'::double precision", insert_string)
	insert_string = re.sub('Nan', "'Nan'::double precision", insert_string)
	insert_string = re.sub('nan', "'Nan'::double precision", insert_string)
	insert_string = re.sub('NaN', "'Nan'::double precision", insert_string)
	plpy.execute(insert_string)

	plpy.execute("SET client_min_messages TO %s" % old_msg_level)

	# ========================================================================
	# Input handling for multinomial logistic regression
	def _margins_mlogregr_validate_args(
	schema_madlib,
	source_table,
	out_table,
	dependent_varname,
	independent_varname,
	ref_category,
	input_group_cols,
	marginal_vars,
	max_iter,
	optimizer,
	tolerance):
	"""
	Validate the arguments
	"""
	if not source_table or source_table in ('null', '') or \
	(not table_exists(source_table)):
	plpy.error("Margins error: Data table does not exist!")

	if table_is_empty(source_table):
	plpy.error("Margins error: Data table is empty!")

	if out_table.lower() in ('null', ''):
	plpy.error("Margins error: Invalid output table name!")

	if table_exists(out_table):
	plpy.error("Margins error: Output table already exists!")

	if not dependent_varname or dependent_varname.lower() in ('null', ''):
	plpy.error("Margins error: Invalid dependent column name!")

	if not scalar_col_has_no_null(source_table, dependent_varname):
	plpy.error("Margins error: Dependent variable has Null values! \
	Please filter out Null values before using this function!")

	if not independent_varname or independent_varname.lower() in ('null', ''):
	plpy.error("Margins error: Invalid independent column name!")

	if input_group_cols and input_group_cols.lower() in ('null', ''):
	plpy.error("Margins error: Invalid grouping columns name!")

	if ref_category is None:
	plpy.error("Margins: Reference category cannot be null!")

	if ref_category < 0:
	plpy.error("Margins error: Reference category cannot be negative!")

	if max_iter <= 0:
	plpy.error("Margins error: Maximum number of iterations must be positive!")

	if tolerance < 0:
	plpy.error("Margins error: The tolerance cannot be negative!")

	if optimizer is None:
	plpy.error(""" Margins error: Optimizer does not exist.
	Must be 'newton'/'irls'.
	""")

	if optimizer == "newton":
	optimizer = "irls"
	elif optimizer not in ("irls"):
	plpy.error(""" Margins error: Optimizer does not exist.
	Must be 'newton'/'irls'.
	""")
	if input_group_cols:
	if not columns_exist_in_table(source_table,
	_string_to_array(input_group_cols), schema_madlib):
	plpy.error("Margins error: Grouping column does not exist!")



	# ========================================================================
	# Main function call for marginal logisitc regression
	def margins_mlogregr(schema_madlib, source_table, out_table
	, dependent_varname, input_independent_varname
	, ref_category, input_group_cols, marginal_vars
	, max_iter, optimizer, tolerance , **kwargs):
	"""
	@brief A wrapper function for the various marginal regression analyzes.

	@param source_table String identifying the input table
	@param out_table String identifying the output table to be created
	@param dependent_varname Column containing the dependent variable
	@param independent_varname Column containing the array of independent variables
	@param ref_category Reference category for multinomial logistic regression
	@param grouping_cols Set of columns to group by.
	@param marginal_vars Subset of independent variables to calculate marginal effects for.
	@param max_iter Maximum number of iterations
	@param optimzer Optimizer to be used (newton/irls, cg or idg)
	@param tolerance Resiual tolerance

	@par
	To include an intercept in the model, set one coordinate in the
	<tt>independentVariables</tt> array to 1.

	@return void

	@usage
	For function summary information. Run
	sql> select margins_mlogregr('help');
	OR
	sql> select margins_mlogregr();
	OR
	sql> select margins_mlogregr('?');
	"""
	# Reset the message level to avoid random messages
	old_msg_level = plpy.execute("""
	SELECT setting
	FROM pg_settings
	WHERE name='client_min_messages'
	""")[0]['setting']
	plpy.execute('SET client_min_messages TO warning')

	# Validate arguments
	_margins_mlogregr_validate_args(schema_madlib,
	source_table,
	out_table,
	dependent_varname,
	input_independent_varname,
	ref_category,
	input_group_cols,
	marginal_vars,
	max_iter,
	optimizer,
	tolerance);

	# NOTICE: * support was removed because other modules did not have it.
	# Uncomment the following code if you want to re-add '*' support

	# Check for '*' in indepdendent variables
	#if input_independent_varname == "*":
	# independent_varname = _internal_return_all_except_dep_vars(
	# schema_madlib,
	# source_table,
	# dependent_varname
	# )
	#else:

	independent_varname = input_independent_varname

	# No grouping
	if not input_group_cols:
	# Step 1: Create output table with appropriate column names
	plpy.execute("""
	CREATE TABLE %s (
	margins DOUBLE PRECISION[],
	std_err DOUBLE PRECISION[],
	t_stats DOUBLE PRECISION[],
	p_values DOUBLE PRECISION[])""" % out_table
	)

	# Step 2: Run the regression if the coefficients are not provided
	regr_coef = plpy.execute("""
	SELECT coef from %s.mlogregr('%s',
	'%s',
	'%s',
	%s,
	'%s',
	%s,
	%s)
	""" % (schema_madlib,
	source_table,
	dependent_varname,
	independent_varname,
	max_iter,
	optimizer,
	tolerance,
	ref_category))

	regr_coef = string_to_array(regr_coef[0]['coef'])
	num_categories = plpy.execute(\
	'SELECT count(DISTINCT %s) as num_cats FROM %s' %
	(dependent_varname, source_table))
	num_categories = int(num_categories[0]['num_cats'])

	# If marginal variables are none. Then chose all variables
	if not marginal_vars:
	size = len(regr_coef)
	marginal_vars = range(1,size+1)
	else:
	# For each independent var, get out all the indices
	marginal_vars_only = map(int, string_to_array(marginal_vars))
	marginal_vars = []

	# Calculate the number of idep_vars
	# Note: We assume that the reference variables are not being
	# output from the multinomial regression
	num_indep_vars = len(regr_coef) / (num_categories-1)

	for j in range(num_categories-1):
	for m in marginal_vars_only:
	# Note: The marginal_vars is a base 1 array
	marginal_vars.append(j * num_indep_vars + m)

	# Step 3: Compute the marginal effects
	marginal_log_rst = plpy.execute("""
	SELECT (%s.marginal_mlogregr(
	(%s)::INTEGER,
	%s,
	%s,
	%s,
	%s)).* FROM %s
	""" % (
	schema_madlib,
	dependent_varname,
	num_categories,
	ref_category,
	independent_varname,
	_internal_py_array_to_sql_string(regr_coef),
	source_table))

	margins = string_to_array(marginal_log_rst[0]["margins"])
	std_err = string_to_array(marginal_log_rst[0]["std_err"])
	p_values = string_to_array(marginal_log_rst[0]["p_values"])
	t_stats = string_to_array(marginal_log_rst[0]["t_stats"])

	margins = _internal_get_margins_from_array(margins, marginal_vars)
	std_err = _internal_get_margins_from_array(std_err, marginal_vars)
	t_stats = _internal_get_margins_from_array(t_stats, marginal_vars)
	p_values = _internal_get_margins_from_array(p_values, marginal_vars)

	# Step 4: Insert into table
	insert_string = """
	INSERT INTO %s VALUES (%s, %s, %s, %s)
	""" % (out_table,
	_internal_py_array_to_sql_string(margins),
	_internal_py_array_to_sql_string(std_err),
	_internal_py_array_to_sql_string(t_stats),
	_internal_py_array_to_sql_string(p_values))


	# Step 5: Clean up output to make sure infinity and nan are cast properly
	insert_string = re.sub('Infinity', "'Infinity'::double precision", insert_string)
	insert_string = re.sub('inf', "'Infinity'::double precision", insert_string)
	insert_string = re.sub('Nan', "'Nan'::double precision", insert_string)
	plpy.execute(insert_string)

	plpy.execute("SET client_min_messages TO %s" % old_msg_level)