| <!-- HTML header for doxygen 1.8.4--> |
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| <html xmlns="http://www.w3.org/1999/xhtml"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> |
| <meta http-equiv="X-UA-Compatible" content="IE=9"/> |
| <meta name="generator" content="Doxygen 1.8.4"/> |
| <meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/> |
| <title>MADlib: Marginal Effects</title> |
| <link href="tabs.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="jquery.js"></script> |
| <script type="text/javascript" src="dynsections.js"></script> |
| <link href="navtree.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="resize.js"></script> |
| <script type="text/javascript" src="navtree.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(initResizable); |
| $(window).load(resizeHeight); |
| </script> |
| <link href="search/search.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="search/search.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(function() { searchBox.OnSelectItem(0); }); |
| </script> |
| <script type="text/x-mathjax-config"> |
| MathJax.Hub.Config({ |
| extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"], |
| jax: ["input/TeX","output/HTML-CSS"], |
| }); |
| </script><script src="../mathjax/MathJax.js"></script> |
| <link href="doxygen.css" rel="stylesheet" type="text/css" /> |
| <link href="madlib_extra.css" rel="stylesheet" type="text/css"/> |
| <!-- google analytics --> |
| <script> |
| (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
| m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); |
| ga('create', 'UA-45382226-1', 'auto'); |
| ga('send', 'pageview'); |
| </script> |
| </head> |
| <body> |
| <div id="top"><!-- do not remove this div, it is closed by doxygen! --> |
| <div id="titlearea"> |
| <table cellspacing="0" cellpadding="0"> |
| <tbody> |
| <tr style="height: 56px;"> |
| <td style="padding-left: 0.5em;"> |
| <div id="projectname">MADlib |
|  <span id="projectnumber">1.2</span> <span style="font-size:10pt; font-style:italic"><a href="../latest/./group__grp__marginal.html"> A newer version is available</a></span> |
| </div> |
| <div id="projectbrief">User Documentation</div> |
| </td> |
| <!--BEGIN VERSIONS LINKS--> |
| <td style="padding-left: 0.5em;"> |
| <div class="versionlist"><ul> |
| <li class="head">More versions:</li> |
| <li><a href="../v1.1/index.html">v1.1</li> |
| <li><a href="../v1.0/index.html">v1.0</li> |
| <li><a href="../v0.7/index.html">v0.7</li> |
| <li><a href="../v0.5/index.html">v0.5</li></ul> |
| </div> |
| </td> |
| <td> <div id="MSearchBox" class="MSearchBoxInactive"> |
| <span class="left"> |
| <img id="MSearchSelect" src="search/mag_sel.png" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| alt=""/> |
| <input type="text" id="MSearchField" value="Search" accesskey="S" |
| onfocus="searchBox.OnSearchFieldFocus(true)" |
| onblur="searchBox.OnSearchFieldFocus(false)" |
| onkeyup="searchBox.OnSearchFieldChange(event)"/> |
| </span><span class="right"> |
| <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a> |
| </span> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <!-- end header part --> |
| <!-- Generated by Doxygen 1.8.4 --> |
| <script type="text/javascript"> |
| var searchBox = new SearchBox("searchBox", "search",false,'Search'); |
| </script> |
| </div><!-- top --> |
| <div id="side-nav" class="ui-resizable side-nav-resizable"> |
| <div id="nav-tree"> |
| <div id="nav-tree-contents"> |
| <div id="nav-sync" class="sync"></div> |
| </div> |
| </div> |
| <div id="splitbar" style="-moz-user-select:none;" |
| class="ui-resizable-handle"> |
| </div> |
| </div> |
| <script type="text/javascript"> |
| $(document).ready(function(){initNavTree('group__grp__marginal.html','');}); |
| </script> |
| <div id="doc-content"> |
| <!-- window showing the filter options --> |
| <div id="MSearchSelectWindow" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| onkeydown="return searchBox.OnSearchSelectKey(event)"> |
| <a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark"> </span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark"> </span>Groups</a></div> |
| |
| <!-- iframe showing the search results (closed by default) --> |
| <div id="MSearchResultsWindow"> |
| <iframe src="javascript:void(0)" frameborder="0" |
| name="MSearchResults" id="MSearchResults"> |
| </iframe> |
| </div> |
| |
| <div class="header"> |
| <div class="headertitle"> |
| <div class="title">Marginal Effects<div class="ingroups"><a class="el" href="group__grp__glm.html">Generalized Linear Models</a></div></div> </div> |
| </div><!--header--> |
| <div class="contents"> |
| <dl class="section user"><dt>About</dt><dd></dd></dl> |
| <p>A marginal effect (ME) or partial effect measures the effect on the conditional mean of \( y \) of a change in one of the regressors, say \(X_k\). In the linear regression model, the ME equals the relevant slope coefficient, greatly simplifying analysis. For nonlinear models, we require specialized algorithms for calculating ME.</p> |
| <p>The standard approach to modeling dichotomous/binary variables (so \(y \in \{0, 1\} \)) is to estimate a generalized linear model under the assumption that \( y \) follows some form of Bernoulli distribution. Thus the expected value of \( y \) becomes, </p> |
| <p class="formulaDsp"> |
| \[ y = G(X' \beta), \] |
| </p> |
| <p>where G is the specified binomial distribution. For logistic regression, the funciton \( G \) represents the inverse logit function.</p> |
| <p>In logistic regression: </p> |
| <p class="formulaDsp"> |
| \[ P = \frac{1}{1 + e^{-(\beta_0 + \beta_1 x_1 + \dots \beta_j x_j)}} = \frac{1}{1 + e^{-z}} \implies \frac{\partial P}{\partial X_k} = \beta_k \cdot \frac{1}{1 + e^{-z}} \cdot \frac{e^{-z}}{1 + e^{-z}} \\ = \beta_k \cdot P \cdot (1-P) \] |
| </p> |
| <p>There are several methods for calculating the marginal effects for dichotomous dependent variables. This package uses the average of the marginal effects at every sample observation.</p> |
| <p>This is calculated as follows: </p> |
| <p class="formulaDsp"> |
| \[ \frac{\partial y}{\partial x_k} = \beta_k \frac{\sum_{i=1}^n P(y_i = 1)(1-P(y_i = 1))}{n}, \\ \text{where}, P(y_i=1) = g(X^{(i)}\beta) \] |
| </p> |
| <p>We use the delta method for calculating standard errors on the marginal effects.</p> |
| <dl class="section user"><dt>Input</dt><dd></dd></dl> |
| <p>The input parameters are expected to be of the following form: </p> |
| <pre> <em>margins_logregr</em> ( |
| <em>sourceTable</em> VARCHAR, |
| <em>outputTable</em> VARCHAR, |
| <em>dependentVariable</em> VARCHAR, |
| <em>independentVariable</em> VARCHAR, |
| [<em>groupingCol</em> VARCHAR, |
| <em>marginal_vars</em> INTEGER[], |
| <em>max_iter</em> INTEGER, |
| <em>optimizer</em> VARCHAR, |
| <em>tolerance</em> DOUBLE PRECISION |
| ] |
| )</pre><p>For multinomial logistic regression the input parameters are expected to be of the following form: </p> |
| <pre> <em>margins_mlogregr</em> ( |
| <em>sourceTable</em> VARCHAR, |
| <em>outputTable</em> VARCHAR, |
| <em>dependentVariable</em> VARCHAR, |
| <em>independentVariable</em> VARCHAR, |
| [<em>groupingCol</em> VARCHAR, |
| <em>referenceCategory</em> INTEGER, |
| <em>marginal_vars</em> INTEGER[], |
| <em>max_iter</em> INTEGER, |
| <em>optimizer</em> VARCHAR, |
| <em>tolerance</em> DOUBLE PRECISION |
| ] |
| )</pre><dl class="section warning"><dt>Warning</dt><dd>The <b>'groupingCol'</b> input parameter for <em>margins_logregr</em> and <em>margins_mlogregr</em> is a placeholder in the Madlib V1.0. These inputs will be implemented in a future release. </dd></dl> |
| <dl class="section user"><dt>Usage</dt><dd></dd></dl> |
| <p><b> The Interface</b></p> |
| <p>For logistic regression, one can call the following function </p> |
| <pre> |
| SELECT madlib.<a class="el" href="marginal_8sql__in.html#ad37e0349ac3924daba27c4600311ed71">margins_logregr</a>( |
| <em>'source_table'</em>, -- name of input table, VARCHAR |
| <em>'out_table'</em>, -- name of output table, VARCHAR |
| <em>'dependent_varname'</em>, -- dependent variable, VARCHAR |
| <em>'independent_varname'</em>, -- independent variable, VARCHAR |
| [ <em>'grouping_cols'</em>, -- comma separated list of grouping vars, VARCHAR (Default NULL) |
| <em>'marginal_effect_variables'</em>, -- Index list (base 1) with positions representing which marginal variable to calculate, INTEGER[] (Default NULL) |
| <em> max_iter</em>, -- Maximum number of iterations to run the logistic regression INTEGER (Default 20) |
| <em>'optimizer'</em>, -- Optimizer used for logistic regression VARCHAR (Default & recommended 'irls') |
| <em>tolerance</em>, -- Tolerance for the logistic regression DOUBLE PRECISION (default 1e-4) |
| ] |
| ); |
| </pre><p>For multinomial logistic regression, one can call the following function </p> |
| <pre> |
| SELECT madlib.margins_mlogregr( |
| <em>'source_table'</em>, -- name of input table, VARCHAR |
| <em>'out_table'</em>, -- name of output table, VARCHAR |
| <em>'dependent_varname'</em>, -- dependent variable, VARCHAR |
| <em>'independent_varname'</em>, -- independent variable, VARCHAR |
| [ <em>'reference_category'</em>, -- Reference category for multinomial logistic regression INTEGER |
| <em>'grouping_cols'</em>, -- comma separated list of grouping vars, VARCHAR (Default NULL) |
| <em>'marginal_effect_variables'</em>, -- Index list (base 1) with positions representing which marginal variable to calculate, INTEGER[] (Default NULL) |
| <em> max_iter</em>, -- Maximum number of iterations to run the logistic regression INTEGER (Default 20) |
| <em>'optimizer'</em>, -- Optimizer used for logistic regression VARCHAR (Default & recommended 'irls') |
| <em>tolerance</em>, -- Tolerance for the logistic regression DOUBLE PRECISION (default 1e-4) |
| ] |
| ); |
| </pre><p>Output is stored in the <em>out_table</em>: </p> |
| <pre> |
| [ margins | std_err | t_stats | p_values | |
| +------+---------+-------+----------+ |
| </pre><p><b> Multinomial-Logistic Regression Notes</b></p> |
| <ul> |
| <li>The reference category ranges from [0, numCategories-1]. The default reference category is zero.</li> |
| <li>The marginal effects and supporting statistics are output in the following order. For a problem with K dependent variables (1, ..., K) and J categories (0,..., J-1). The output is \( \{ m_{k_1, j_0} \ldots m_{k_K, j_{0}}, m_{k_1, j_1} \ldots m_{k_K, j_{1}}, m_{k_1, j_{J-1}} \ldots m_{k_K, j_{J-1}} \} \). The order is NOT CONSISTENT with mlogregr, robust_variance_mlogregr and clustered_variance_mlogregr. This is deliberate because the interfaces of all the other functions will be moved to match that used in marginal.</li> |
| <li>Selectively picking the marginal effects of the independent variables will return the marginal effects of ALL categories associated with the independent variable.</li> |
| <li>Here the <em>'independent_varname'</em> can be the name of a column, which contains array of numeric values. It can also have a format of string 'array[1, x1, x2, x3]', where <em>x1</em>, <em>x2</em> and <em>x3</em> are all column names.</li> |
| <li>Here the <em>'vars'</em> is an index list (base 1) with the set of indices amongst the independent variables that must be selected. One can also chose <em>NULL</em> to pick all indepependent variables.</li> |
| </ul> |
| <dl class="section user"><dt>Examples</dt><dd><ol type="1"> |
| <li>Create the sample data set: <pre class="fragment">sql> SELECT * FROM data; |
| id | second_attack | treatment | trait_anxiety |
| ----+---------------+-----------+--------------- |
| 1 | 1 | 1 | 70 |
| 3 | 1 | 1 | 50 |
| 5 | 1 | 0 | 40 |
| 7 | 1 | 0 | 75 |
| 9 | 1 | 0 | 70 |
| 11 | 0 | 1 | 65 |
| 13 | 0 | 1 | 45 |
| 15 | 0 | 1 | 40 |
| 17 | 0 | 0 | 55 |
| ... |
| </pre></li> |
| <li>For function summary information. Run <pre class="fragment">sql> select margins_logregr('help'); |
| OR |
| sql> select margins_logregr(); |
| OR |
| sql> select margins_logregr(''); |
| </pre></li> |
| <li>Run the logistic regression function and then compute the marginal effects of all variables in the regression: <pre class="fragment">sql> select margins_logregr('patients', 'result_table', 'second_attack', 'ARRAY[1, treatment, trait_anxiety]'); |
| sql> select * from result_table; |
| margins | {-0.970665392796,-0.156214190168,0.0181587690137} |
| coef | {-6.36346994178179,-1.02410605239327,0.119044916668605} |
| std_err | {0.802871454422,0.292691682191,0.0137459874022} |
| t_stats | {-1.2089922832,-0.533715850748,1.32102325446} |
| p_values | {0.243212810329,0.600447858606,0.204000202116} |
| |
| |
| -# Alternate Syntax: Run the logistic regression function and then compute the marginal effects if all variables in the regression: |
| sql> select margins_logregr('patients', 'result_table', 'second_attack', 'ARRAY[1, treatment, trait_anxiety]', NULL, NULL); |
| sql> select * from result_table; |
| margins | {-0.970665392796,-0.156214190168,0.0181587690137} |
| coef | {-6.36346994178179,-1.02410605239327,0.119044916668605} |
| std_err | {0.802871454422,0.292691682191,0.0137459874022} |
| t_stats | {-1.2089922832,-0.533715850748,1.32102325446} |
| p_values | {0.243212810329,0.600447858606,0.204000202116} |
| |
| |
| -# Run the logistic regression function and then compute the marginal effects of the first variable in the regression |
| sql> select margins_logregr('patients', 'result_table', 'second_attack', 'ARRAY[1, treatment, trait_anxiety]', NULL, ARRAY[1]); |
| sql> select * from result_table; |
| margins | {-0.970665392796} |
| coef | {-6.36346994178179} |
| std_err | {0.802871454422} |
| t_stats | {-1.2089922832} |
| p_values | {0.243212810329} |
| </pre></li> |
| </ol> |
| </dd></dl> |
| <dl class="section user"><dt>Examples</dt><dd><ol type="1"> |
| <li>Create the sample data set (the full dataset has 3 categories): <pre class="fragment">sql> SELECT * FROM data; |
| id | feature_1 | feature_2 | category |
| ----+---------------+-----------+--------------- |
| 1 | 1 | 35 | 1 |
| 3 | 2 | 33 | 0 |
| 5 | 3 | 39 | 1 |
| 7 | 1 | 37 | 1 |
| 9 | 2 | 31 | 1 |
| 11 | 3 | 36 | 1 |
| 13 | 2 | 36 | 1 |
| 15 | 2 | 36 | 0 |
| 17 | 2 | 31 | 5 |
| ... |
| </pre></li> |
| <li>For function summary information. Run <pre class="fragment">sql> select margins_mlogregr('help'); |
| OR |
| sql> select margins_mlogregr(); |
| OR |
| sql> select margins_mlogregr(''); |
| </pre></li> |
| <li>Run the regression function and then compute the marginal effects of all variables in the regression (see docs for detailed order) <pre class="fragment">sql> select margins_mlogregr('test_data', 'result_table', 'category', 'ARRAY[1, feature_1, feature_2]'); |
| sql> select * from result_table; |
| margins | {0.741613239156,-0.032868883552,-0.0144502990691,-0.972055011831,0.112337273885,0.0172621628253} |
| std_err | {0.183172236055,0.044184899499,0.00332608999704,0.263532615748,0.0555196094594,0.00457999429836} |
| t_stats | {4.04872078394,-0.743894043547,-4.34453038911,-3.68855676202,2.02338011702,3.76903587663} |
| p_values | {7.43784735554e-05,0.457840607871,2.24855476205e-05,0.000292799037776,0.0444060346517,0.000217384008015} |
| </pre></li> |
| <li>Run the regression and compute the marginals effects for the first dependent variable (all categories) <pre class="fragment">sql> select margins_mlogregr('test_data', 'result_table', 'category', 'ARRAY[1, feature_1, feature_2]', 0, NULL, ARRAY[1]); |
| sql> select * from result_table; |
| margins | {0.741613239156,-0.972055011831} |
| std_err | {0.183172236055,0.263532615748} |
| t_stats | {4.04872078394,-3.68855676202} |
| p_values | {7.43784735554e-05,0.000292799037776} |
| </pre></li> |
| </ol> |
| </dd></dl> |
| <pre class="fragment">-# For function usage information. |
| sql> select margins_mlogregr('usage'); |
| </pre><dl class="section user"><dt>Literature</dt><dd></dd></dl> |
| <p>[1] mfx function in STATA: <a href="http://www.stata.com/help.cgi?mfx_option">http://www.stata.com/help.cgi?mfx_option</a></p> |
| <dl class="section see"><dt>See Also</dt><dd>File <a class="el" href="marginal_8sql__in.html" title="SQL functions for linear regression. ">marginal.sql_in</a> documenting the SQL functions. </dd></dl> |
| </div><!-- contents --> |
| </div><!-- doc-content --> |
| <!-- start footer part --> |
| <div id="nav-path" class="navpath"><!-- id is needed for treeview function! --> |
| <ul> |
| <li class="footer">Generated on Thu Jan 9 2014 20:35:40 for MADlib by |
| <a href="http://www.doxygen.org/index.html"> |
| <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.4 </li> |
| </ul> |
| </div> |
| </body> |
| </html> |