blob: 15c5f9d4f7b49e9cdf11a510281670d165571c82 [file] [log] [blame]
"""
@file margins.py_in
@brief Marginal Effects with Interactions: Contains the main interface function
and other functions that are common to the various methods and related to
database constructs.
@namespace marginal
"""
import plpy
from utilities.utilities import _assert
from utilities.validate_args import table_exists
from utilities.validate_args import columns_exist_in_table
from utilities.validate_args import table_is_empty
from utilities.utilities import py_list_to_sql_string
from utilities.utilities import add_postfix
from utilities.control import MinWarning
from margins_builder import MarginalEffectsBuilder
from margins_builder import TermBase
import re
#------------------------------------------------------------------------------
def margins(schema_madlib, model_table, out_table, x_design=None,
source_table=None, marginal_vars=None, *args, **kwargs):
"""
Call the appropriate margins functions depending on the regression type
found in the model table.
Args:
@param schema_madlib
@param model_table
@param out_table
@param x_design
@param source_table
@param marginal_vars
@param args
@param kwargs
Returns:
None
"""
with MinWarning('warning'):
# 1) validate all arguments
margins_validate_model_table(model_table)
if model_table:
model_summary_table = add_postfix(model_table, "_summary")
else:
raise ValueError("Margins error: Invalid regression model table name!")
if not source_table:
source_table = plpy.execute("SELECT source_table FROM {0}".
format(model_summary_table))[0]['source_table']
margins_validate_args(out_table, source_table, x_design)
# 2) get the regression type ...
reg_type = plpy.execute(
"SELECT method from {0}".format(model_summary_table))[0]['method']
margins_method = None
if not reg_type:
plpy.error("Margins error: Regression type cannot be obtained "
"from the model table. 'margins()' currently only "
"supported for linregr, logregr, mlogregr, or coxph")
elif reg_type in ("linregr", "linear", "linear_regression",
"linear regression"):
margins_method = margins_linregr
elif reg_type in ("logregr", "logistic", "logistic_regression",
"logistic regression"):
margins_method = margins_logregr
elif reg_type in ("mlogregr", "multilogistic",
"multilogistic_regression",
"multinomial logistic regression"):
margins_method = margins_mlogregr
elif reg_type in ("coxph", "cox_proportional_hazards",
"cox prop", "cox proportional hazards"):
margins_method = margins_coxph
else:
plpy.error("""
Margins not supported for this model table.
Re-run training {linregr, logregr, mlogregr, coxph}_train
before using margins().""")
# 3) ... and call the appropriate sub-function
margins_method(schema_madlib, model_table, out_table, source_table,
x_design, marginal_vars, *args, **kwargs)
#------------------------------------------------------------------------------
def margins_help(schema_madlib, message, **kwargs):
"""
Help function for margins
Args:
@param schema_madlib
@param message: string, Help message string
@param kwargs
Returns:
String. Help/usage information
"""
if not message:
help_string = """
-----------------------------------------------------------------------
SUMMARY
-----------------------------------------------------------------------
Functionality: Calculate marginal effects for binomial/multinomial logistic
regression with interaction terms.
A marginal effect (ME) or partial effect measures the effect on the
conditional mean of a response (dependent variable) for a change in one of the
regressors (independent variable).
We currently only support margins for linear, logistic,
multinomial logistic regression, and cox proportional hazard.
For more details on function usage:
SELECT {schema_madlib}.margins('usage')
"""
elif message in ['usage', 'help', '?']:
help_string = """
-----------------------------------------------------------------------
USAGE
-----------------------------------------------------------------------
SELECT {schema_madlib}.margins(
'model_table', -- Name of table containing regression model
'output_table', -- Name of result table
'x_design', -- Design of the independent variables
-- (Optional, if not provided or NULL then independent variable list
is assumed to have no interaction terms)
'source_table', -- Source table to apply marginal effects on
-- (Optional, if not provided or NULL then assume training table as the source)
'marginal_vars' -- Variable identifiers (strings same as those in x_design) to
-- calculate marginal effects on
-- (Optional, if not provided or NULL then marginal effects for all
-- basis variables will be returned)
);
-----------------------------------------------------------------------
OUTPUT
-----------------------------------------------------------------------
The output table ('output_table' above) has the following columns
variables INTEGER[], -- Indices of the basis variables,
-- will be same as marginal vars if provided
margins DOUBLE PRECISION[], -- Marginal effects
std_err DOUBLE PRECISION[], -- Standard errors using delta method
z_stats DOUBLE PRECISION[], -- z-stats of the standard errors
p_values DOUBLE PRECISION[], -- p-values of the standard errors
"""
else:
help_string = "No such option. Use {schema_madlib}.margins()"
return help_string.format(schema_madlib=schema_madlib)
# ---------------------------------------------------------------------
def margins_validate_model_table(model_table):
"""
Args:
@param schema_madlib
@param model_table
Returns:
"""
_assert(model_table and
model_table.strip().lower() not in ('null', ''),
"Margins error: Invalid regression model table name!")
_assert(table_exists(model_table),
"Margins error: Specified Model table ({0}) is missing! "
"Rerun underlying regression".format(model_table))
model_summary_table = add_postfix(model_table, '_summary')
_assert(table_exists(model_summary_table),
"Margins error: Summary for model table ({0}) is missing! "
"Rerun underlying regression".format(model_summary_table))
_assert(columns_exist_in_table(model_summary_table,
['method', 'source_table', 'out_table', 'dependent_varname',
'independent_varname']),
"Margins error: Invalid model summary table ({0})"
" - some required columns missing".format(model_summary_table))
return True
#------------------------------------------------------------------------------
def margins_validate_args(out_table, source_table, x_design=None,
**kwargs):
_assert(out_table and
out_table.strip().lower() not in ('null', ''),
"Margins error: Invalid output table name!")
_assert(not table_exists(out_table, only_first_schema=True),
"Margins error: Output table already exists!")
_assert(source_table and source_table.strip().lower() not in ('null', ''),
"Margins error: Invalid data table name!")
_assert(table_exists(source_table),
"Margins error: Data table ({0}) is missing!".format(source_table))
_assert(not table_is_empty(source_table),
"Margins error: Data table ({0}) is empty!".format(source_table))
# -------------------------------------------------------------------------
def _get_categorical_set_unset_strings(x_design_parser, indep_name='x', shortened_output=False):
"""
Return strings that represent the discrete difference strings (set and unset)
string, along with the indices for all the categorical variables.
"""
subset_indicators = x_design_parser.get_subset_indicator_terms()
# subset_cat_indices represents the indicator variables
# that are present in x_design_parser.subset_basis
indicators_set_string = []
indicators_unset_string = []
create_indicator_string = lambda x: (py_list_to_sql_string(x,
array_type="double precision"))
for each_term in subset_indicators:
set_list, unset_list = x_design_parser.get_discrete_diff_arrays(
each_term, indep_name, shortened_output=shortened_output)
set_list[:] = [i for i in set_list if i is not None]
unset_list[:] = [i for i in unset_list if i is not None]
# create two matrices - 1 with all set expressions,
# 1 with all unset expressions
indicators_set_string.append(create_indicator_string(set_list))
indicators_unset_string.append(create_indicator_string(unset_list))
# create strings that represent the categorical discrete differences
if subset_indicators:
return [py_list_to_sql_string(i, array_type="double precision")
for i in (indicators_set_string, indicators_unset_string)]
else:
return ("NULL::double precision[]", "NULL::double precision[]")
#------------------------------------------------------------------------------
def _parse_marginal_vars(marginal_var_str):
"""
Marginal vars is supposed to be a list of identifiers separated by ','.
It's possible that the user inputs it as an array string
eg: ARRAY[1, 2, 3, 4] or '{2, 4, 5}'. We strip out the 'ARRAY' if present in
front and also strip square/curly brackets.
"""
if str.lower(marginal_var_str.strip()[:6]) == "array[":
marginal_var_str = marginal_var_str[5:]
marginal_var_str = marginal_var_str.strip(" {}[]")
marginal_var_list = []
# reg = re.compile('".*?"|[^\,]+')
# all_elem = reg.findall(marginal_var_str.strip())
comma_reg = re.compile(r'((?:[^,"]|"[^"]*")+)')
all_elem = [i.strip() for i in
comma_reg.split(marginal_var_str.strip())[1::2]]
for each_str in all_elem:
each_str = each_str.strip()
if not (each_str.startswith('"') and each_str.endswith('"')):
if not re.match(r'^\w+$', each_str):
raise ValueError("Invalid identifier '{0}' in marginal_var. "
"Quote an identifier with non-alphanumeric "
"characters using double quotes.".
format(each_str))
each_str = each_str.lower()
# if each_str is not quoted then always keep a lower-case copy
# (i.e. each_str is case-insensitive)
# else:
# # if each_str is quoted then we keep the case intact, but
# # strip the double-quotes.
# each_str = each_str.strip('"')
marginal_var_list.append(each_str)
return marginal_var_list
#------------------------------------------------------------------------------
def _get_regression_arguments(schema_madlib, model_table):
"""
Return all necessary argument values from the model and summary table of
underlying regression
Args:
@params model_table: str, Name of the model table containing regression
model
Returns:
Tuple:
coef: list, Coefficient values from regression.
independent_varname: str, The string passed as independent_varname
in underlying regression.
grouping_col: str, If grouping was used in the underlying regression,
then this argument gives the grouping columns
as a comma-separated list. This value is None
if no grouping was used.
"""
if model_table:
model_summary_table = add_postfix(model_table, "_summary")
else:
raise ValueError("Margins error: Invalid regression model table name!")
coef = plpy.execute("SELECT coef as x FROM {0}".format(model_table))[0]['x']
independent_varname = plpy.execute("SELECT independent_varname as x FROM {0}".
format(model_summary_table))[0]['x']
if columns_exist_in_table(model_summary_table, ['grouping_col'], schema_madlib):
grouping_col = plpy.execute("SELECT grouping_col AS x FROM {0}".
format(model_summary_table))[0]['x']
else:
grouping_col = None
if any(i is None for i in (coef, independent_varname)):
plpy.error("Margins error: Model table ({0}) is missing important parameters. "
"Rerun logistic regression.".format(model_table))
return coef, independent_varname, grouping_col
#------------------------------------------------------------------------------
def _get_parser(coef, x_design, marginal_vars=None):
"""
Args:
@param x_design: str, String representation of the design elements
(see MarginalEffectsBuilder for example)
@param marginal_vars: str, comma-separated list of integers - indexes
of independent variables to output.
If None, then all variables are output.
Returns:
Tuple.
x_design_parser: Object representing a marginal effects parser
marginal_var_list: List of marginal variables
"""
if marginal_vars:
marginal_vars = marginal_vars.strip()
n_indep = len(coef)
if not x_design:
x_design = ','.join(str(i+1) for i in range(n_indep))
x_design_parser = MarginalEffectsBuilder(x_design)
_assert(x_design_parser.n_total_terms == n_indep,
"Margins error: Invalid x_design. Number of terms in x_design ({0}) "
"not same as number of independent variables ({1})".
format(x_design_parser.n_total_terms, n_indep))
if marginal_vars:
# basis terms are not necessarily continguous
# example: basis_terms = [1, 2, 3, 5, 8, 11]
# If all variables are needed in output then
# marginal_var_list would be [1, 2, 3, 4, 5, 6] (max value would be len(basis_terms))
marginal_var_list = _parse_marginal_vars(marginal_vars)
_assert(all(TermBase(x) in x_design_parser.inverse_basis_terms
for x in marginal_var_list),
"Margins error: marginal_vars argument contains values not "
"present as a basis variable in x_design")
else:
# marginal_var_list
marginal_var_list = x_design_parser.get_sorted_basis_identifiers()
# do not return any answer for reference terms
ref_terms_removed = []
if x_design_parser.reference_terms:
for each_ref in x_design_parser.reference_terms.values():
if each_ref.identifier in marginal_var_list:
marginal_var_list.remove(each_ref.identifier)
ref_terms_removed.append(each_ref.identifier)
x_design_parser.subset_basis = marginal_var_list
if ref_terms_removed:
plpy.warning("""
Warning: Reference terms (Variable(s): {ref_terms}) are present in output variables. "
No marginal effects (and statistics) will be provided for these terms.
""".format(ref_terms=', '.join(str(i) for i in ref_terms_removed)))
return x_design_parser
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
# Linear regression marginal effects
#------------------------------------------------------------------------------
def margins_linregr(schema_madlib, model_table, out_table, source_table,
x_design=None, marginal_vars=None, *args, **kwargs):
"""
Args:
@param schema_madlib
@param model_table
@param out_table
@param x_design
@param source_table
@param marginal_vars
Returns:
None
Raises:
"""
# 1) Get all arguments and validate them
coef, independent_varname, grouping_col = _get_regression_arguments(schema_madlib, model_table)
# 2) Parse x_design and marginal_vars argument
x_design_parser = _get_parser(coef, x_design, marginal_vars)
# subset_basis contains indices for a 1-base array
if not x_design_parser.subset_basis:
raise ValueError("Invalid marginal variables selected.")
# 3) Create a matrix representing the 2nd partial derivative
# (1st order wrt x and 2nd order wrt \beta). For categorical, this is
# the discrete difference.
indep_name = 'x'
derivative_list = x_design_parser.create_2nd_derivative_matrix(indep_name)
derivative_str = py_list_to_sql_string(
[py_list_to_sql_string(i, array_type="double precision")
for i in derivative_list],
array_type="double precision")
# grouping arguments
if not grouping_col:
grouping_str1 = ""
grouping_str2 = ""
using_str = ""
join_str = ","
else:
grouping_str1 = grouping_col + ","
grouping_str2 = "GROUP BY " + grouping_col
using_str = "USING (" + grouping_col + ")"
join_str = "LEFT OUTER JOIN "
execute_string = """
CREATE TABLE {out_table} AS
SELECT
{grouping_str1}
{index_array} as variables,
(res).margins as margins,
(res).std_err as std_err,
(res).z_stats as z_stats,
(res).p_values as p_values
FROM (
SELECT
{grouping_str1}
{schema_madlib}.__margins_int_linregr_agg(
{indep_name}
, coef
, variance_covariance
, {derivative_str})
as res
FROM (
SELECT
{grouping_str1}
{independent_varname} as {indep_name}
FROM {source_table} as s
) q1 {join_str} {model_table} as m {using_str}
{grouping_str2}
) q2
""".format(schema_madlib=schema_madlib,
source_table=source_table,
out_table=out_table,
independent_varname=independent_varname,
indep_name=indep_name,
index_array=py_list_to_sql_string(
[x_design_parser.basis_terms[i]
for i in x_design_parser.subset_basis],
array_type="text"),
model_table=model_table,
derivative_str=derivative_str,
grouping_str1=grouping_str1,
grouping_str2=grouping_str2,
using_str=using_str,
join_str=join_str)
plpy.execute(execute_string)
#------------------------------------------------------------------------------
# Logistic regression marginal effects
#------------------------------------------------------------------------------
def margins_logregr(schema_madlib, model_table, out_table, source_table,
x_design=None, marginal_vars=None,
*args, **kwargs):
"""
Args:
@param schema_madlib
@param model_table
@param out_table
@param x_design
@param source_table
@param marginal_vars
Returns:
None
Raises:
ValueError
"""
# 1) Get all arguments and validate them
coef, independent_varname, grouping_col = _get_regression_arguments(schema_madlib, model_table)
# 2) Get the x_design and marginal_vars arguments
x_design_parser = _get_parser(coef, x_design, marginal_vars)
# subset_basis contains indices for a 1-base array
if not x_design_parser.subset_basis:
raise ValueError("Invalid marginal variables selected.")
# 3) Create a matrix representing the 2nd partial derivative
# (1st order wrt x and 2nd order wrt \beta)
indep_name = 'x'
if x_design_parser.contains_interaction():
derivative_list = x_design_parser.create_2nd_derivative_matrix(indep_name)
derivative_str = py_list_to_sql_string(
[py_list_to_sql_string(i, array_type="double precision")
for i in derivative_list],
array_type="double precision")
else:
derivative_str = "NULL::double precision[]"
# 4) Build the margins for indicator variables as discrete differences
set_string, unset_string = _get_categorical_set_unset_strings(
x_design_parser, indep_name, shortened_output=True)
if x_design_parser.contains_indicators():
zero_base_categorical_indices = py_list_to_sql_string(
[i - 1 for i in x_design_parser.get_all_indicator_indices()],
array_type="double precision")
else:
zero_base_categorical_indices = "NULL::double precision[]"
# grouping arguments
if not grouping_col:
grouping_str1 = ""
grouping_str2 = ""
using_str = ""
join_str = ","
else:
grouping_str1 = grouping_col + ","
grouping_str2 = "GROUP BY " + grouping_col
using_str = "USING (" + grouping_col + ")"
join_str = "LEFT OUTER JOIN "
execute_string = """
CREATE TABLE {out_table} AS
SELECT
{grouping_str1}
{index_array} as variables,
(res).margins as margins,
(res).std_err as std_err,
(res).z_stats as z_stats,
(res).p_values as p_values
FROM (
SELECT
{grouping_str1}
{schema_madlib}.__margins_int_logregr_agg(
{indep_name}
, coef
, variance_covariance
, {zero_base_subset_indices}
, {derivative_str}
, {zero_base_categorical_indices}
, {set_string}
, {unset_string})
as res
FROM (
SELECT
{grouping_str1}
{independent_varname} as {indep_name}
FROM {source_table} as s
) q1 {join_str} {model_table} as m {using_str}
{grouping_str2}
) q2
""".format(schema_madlib=schema_madlib,
source_table=source_table,
out_table=out_table,
independent_varname=independent_varname,
indep_name=indep_name,
index_array=py_list_to_sql_string(
[x_design_parser.basis_terms[i]
for i in x_design_parser.subset_basis],
array_type="text"),
zero_base_categorical_indices=zero_base_categorical_indices,
model_table=model_table,
derivative_str=derivative_str,
set_string=set_string,
unset_string=unset_string,
grouping_str1=grouping_str1,
grouping_str2=grouping_str2,
using_str=using_str,
join_str=join_str,
zero_base_subset_indices=py_list_to_sql_string(
[i - 1 for i in x_design_parser.subset_basis],
array_type="double precision"))
plpy.execute(execute_string)
#------------------------------------------------------------------------------
# Multinomial Logistic regression marginal effects
#------------------------------------------------------------------------------
def margins_mlogregr(schema_madlib, model_table, out_table, source_table,
x_design=None, marginal_vars=None,
*args, **kwargs):
"""
Args:
@param schema_madlib
@param model_table
@param out_table
@param x_design
@param source_table
@param marginal_vars
Returns:
None
Raises:
"""
# 1) Get all arguments and validate them
coef, independent_varname, grouping_col = _get_regression_arguments(schema_madlib, model_table)
summary_table = add_postfix(model_table, "_summary")
# 2) Parse x_design and marginal_vars argument
x_design_parser = _get_parser(coef, x_design, marginal_vars)
if not x_design_parser.subset_basis:
raise ValueError("Invalid marginal variables selected.")
# 3) Create a matrix representing the 2nd partial derivative
# (1st order wrt x and 2nd order wrt \beta)
indep_name = 'x'
if x_design_parser.contains_interaction():
derivative_list = x_design_parser.create_2nd_derivative_matrix(indep_name)
derivative_str = py_list_to_sql_string(
[py_list_to_sql_string(i, array_type="double precision")
for i in derivative_list],
array_type="double precision")
else:
derivative_str = "NULL::double precision[]"
# 4) Build the margins for indicator variables as discrete differences
set_string, unset_string = _get_categorical_set_unset_strings(
x_design_parser, indep_name, shortened_output=True)
if x_design_parser.contains_indicators():
zero_base_categorical_indices = py_list_to_sql_string(
[i - 1 for i in x_design_parser.get_all_indicator_indices()],
array_type="double precision")
else:
zero_base_categorical_indices = "NULL::double precision[]"
execute_string = """
CREATE TABLE {out_table} AS
SELECT
category,
ref_category,
{index_array} as variables,
margins,
std_err,
z_stats,
p_values
FROM
(
SELECT
ref_category,
({schema_madlib}.__mlogregr_format(
(res).margins, {num_basis}, num_categories, ref_category)
).category,
({schema_madlib}.__mlogregr_format(
(res).margins, {num_basis}, num_categories, ref_category)
).coef as margins,
({schema_madlib}.__mlogregr_format(
(res).std_err, {num_basis}, num_categories, ref_category)
).coef as std_err,
({schema_madlib}.__mlogregr_format(
(res).z_stats, {num_basis}, num_categories, ref_category)
).coef as z_stats,
({schema_madlib}.__mlogregr_format(
(res).p_values, {num_basis}, num_categories, ref_category)
).coef as p_values
FROM
(
SELECT
num_categories,
ref_category,
{schema_madlib}.__margins_int_mlogregr_agg(
{indep_name}
, coef
, variance_covariance
, {zero_base_subset_indices}
, {derivative_str}
, {zero_base_categorical_indices}
, {set_string}
, {unset_string})
as res
FROM
(
SELECT
{independent_varname} as {indep_name}
FROM {source_table} as s
) q1, {summary_table} as m
GROUP BY num_categories, ref_category
) q2
) q3
""".format(schema_madlib=schema_madlib,
source_table=source_table,
out_table=out_table,
independent_varname=independent_varname,
indep_name=indep_name,
index_array=py_list_to_sql_string(
[x_design_parser.basis_terms[i]
for i in x_design_parser.subset_basis],
array_type="text"),
zero_base_subset_indices=py_list_to_sql_string(
[i - 1 for i in x_design_parser.subset_basis],
array_type="double precision"),
zero_base_categorical_indices=zero_base_categorical_indices,
summary_table=summary_table,
derivative_str=derivative_str,
set_string=set_string,
unset_string=unset_string,
num_basis=len(x_design_parser.subset_basis))
plpy.execute(execute_string)
# -------------------------------------------------------------------------
#------------------------------------------------------------------------------
# Cox Proportional Hazards marginal effects
#------------------------------------------------------------------------------
def margins_coxph(schema_madlib, model_table, out_table, source_table,
x_design=None, marginal_vars=None,
*args, **kwargs):
"""
Args:
@param schema_madlib
@param model_table
@param out_table
@param x_design
@param source_table
@param marginal_vars
Returns:
None
Raises:
"""
# 1) Get all arguments and validate them
coef, independent_varname, grouping_col = _get_regression_arguments(schema_madlib, model_table)
summary_table = add_postfix(model_table, "_summary")
strata_cols = plpy.execute("SELECT strata from " + summary_table)[0]["strata"]
if not strata_cols:
strata_col_str = ""
strata_cols = ""
else:
strata_col_str = "GROUP BY " + strata_cols
strata_cols += ","
# 2) Parse x_design and marginal_vars argument
x_design_parser = _get_parser(coef, x_design, marginal_vars)
if not x_design_parser.subset_basis:
raise ValueError("Invalid marginal variables selected.")
# 3) Create a matrix representing the 2nd partial derivative
# (1st order wrt x and 2nd order wrt \beta)
indep_name = 'x'
if x_design_parser.contains_interaction():
derivative_list = x_design_parser.create_2nd_derivative_matrix(indep_name)
derivative_str = py_list_to_sql_string(
[py_list_to_sql_string(i, array_type="double precision")
for i in derivative_list],
array_type="double precision")
else:
derivative_str = "NULL::double precision[]"
# 4) Build the margins for indicator variables as discrete differences
set_string, unset_string = _get_categorical_set_unset_strings(
x_design_parser, indep_name, shortened_output=True)
if x_design_parser.contains_indicators():
zero_base_categorical_indices = py_list_to_sql_string(
[i - 1 for i in x_design_parser.get_all_indicator_indices()],
array_type="double precision")
else:
zero_base_categorical_indices = "NULL::double precision[]"
execute_string = """
CREATE TABLE {out_table} AS
SELECT
{index_array} as variables,
(res).margins as margins,
(res).std_err as std_err,
(res).z_stats as z_stats,
(res).p_values as p_values
FROM (
SELECT
{schema_madlib}.__margins_int_coxph_agg(
{indep_name}
, coef
, hessian
, {zero_base_subset_indices}
, {derivative_str}
, {zero_base_categorical_indices}
, {set_string}
, {unset_string})
as res
FROM (
SELECT
{independent_varname} as {indep_name}
FROM {source_table} as s
) q1, {model_table} as m
) q2
""".format(schema_madlib=schema_madlib,
source_table=source_table,
out_table=out_table,
independent_varname=independent_varname,
indep_name=indep_name,
index_array=py_list_to_sql_string(
[x_design_parser.basis_terms[i]
for i in x_design_parser.subset_basis],
array_type="text"),
strata_col_str=strata_col_str,
strata_cols=strata_cols,
model_table=model_table,
zero_base_subset_indices=py_list_to_sql_string(
[i - 1 for i in x_design_parser.subset_basis],
array_type="double precision"),
zero_base_categorical_indices=zero_base_categorical_indices,
derivative_str=derivative_str,
set_string=set_string,
unset_string=unset_string,
num_basis=len(x_design_parser.subset_basis))
plpy.execute(execute_string)
# -------------------------------------------------------------------------