blob: f63606201bea4edae708c8fbffb135175f331d31 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<title>MADlib: logistic.sql_in File Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript">
$(document).ready(initResizable);
</script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { searchBox.OnSelectItem(0); });
</script>
<script src="../mathjax/MathJax.js">
MathJax.Hub.Config({
extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
jax: ["input/TeX","output/HTML-CSS"],
});
</script>
</head>
<body>
<div id="top"><!-- do not remove this div! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">MADlib
&#160;<span id="projectnumber">0.7</span> <span style="font-size:10pt; font-style:italic"><a href="../latest/./logistic_8sql__in.html"> A newer version is available</a></span>
</div>
<div id="projectbrief">User Documentation</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- Generated by Doxygen 1.7.5.1 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<script type="text/javascript" src="dynsections.js"></script>
<div id="navrow1" class="tabs">
<ul class="tablist">
<li><a href="index.html"><span>Main&#160;Page</span></a></li>
<li><a href="modules.html"><span>Modules</span></a></li>
<li class="current"><a href="files.html"><span>Files</span></a></li>
<li>
<div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</li>
</ul>
</div>
<div id="navrow2" class="tabs2">
<ul class="tablist">
<li><a href="files.html"><span>File&#160;List</span></a></li>
<li><a href="globals.html"><span>File&#160;Members</span></a></li>
</ul>
</div>
</div>
<div id="side-nav" class="ui-resizable side-nav-resizable">
<div id="nav-tree">
<div id="nav-tree-contents">
</div>
</div>
<div id="splitbar" style="-moz-user-select:none;"
class="ui-resizable-handle">
</div>
</div>
<script type="text/javascript">
initNavTree('logistic_8sql__in.html','');
</script>
<div id="doc-content">
<div class="header">
<div class="summary">
<a href="#func-members">Functions</a> </div>
<div class="headertitle">
<div class="title">logistic.sql_in File Reference</div> </div>
</div>
<div class="contents">
<p>SQL functions for logistic regression.
<a href="#details">More...</a></p>
<p><a href="logistic_8sql__in_source.html">Go to the source code of this file.</a></p>
<table class="memberdecls">
<tr><td colspan="2"><h2><a name="func-members"></a>
Functions</h2></td></tr>
<tr><td class="memItemLeft" align="right" valign="top">void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="logistic_8sql__in.html#a32880a39de2e36b6c6be72691a6a4a40">logregr_train</a> (varchar tbl_source, varchar tbl_output, varchar dep_col, varchar ind_col, varchar grouping_col, integer max_iter, varchar optimizer, float8 tolerance, boolean verbose)</td></tr>
<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Compute logistic-regression coefficients and diagnostic statistics. <a href="#a32880a39de2e36b6c6be72691a6a4a40"></a><br/></td></tr>
<tr><td class="memItemLeft" align="right" valign="top">float8&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="logistic_8sql__in.html#a4ded9be5c8b111dbb3109efaad83d69e">logistic</a> (float8 x)</td></tr>
<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Evaluate the usual logistic function in an under-/overflow-safe way. <a href="#a4ded9be5c8b111dbb3109efaad83d69e"></a><br/></td></tr>
</table>
<hr/><a name="details" id="details"></a><h2>Detailed Description</h2>
<div class="textblock"><dl class="date"><dt><b>Date:</b></dt><dd>January 2011</dd></dl>
<dl class="see"><dt><b>See also:</b></dt><dd>For a brief introduction to <a class="el" href="logistic_8sql__in.html#a4ded9be5c8b111dbb3109efaad83d69e" title="Evaluate the usual logistic function in an under-/overflow-safe way.">logistic</a> regression, see the module description <a class="el" href="group__grp__logreg.html">Logistic Regression</a>. </dd></dl>
<p>Definition in file <a class="el" href="logistic_8sql__in_source.html">logistic.sql_in</a>.</p>
</div><hr/><h2>Function Documentation</h2>
<a class="anchor" id="a4ded9be5c8b111dbb3109efaad83d69e"></a><!-- doxytag: member="logistic.sql_in::logistic" ref="a4ded9be5c8b111dbb3109efaad83d69e" args="(float8 x)" -->
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">float8 logistic </td>
<td>(</td>
<td class="paramtype">float8&#160;</td>
<td class="paramname"><em>x</em></td><td>)</td>
<td></td>
</tr>
</table>
</div>
<div class="memdoc">
<dl><dt><b>Parameters:</b></dt><dd>
<table class="params">
<tr><td class="paramname">x</td><td></td></tr>
</table>
</dd>
</dl>
<dl class="return"><dt><b>Returns:</b></dt><dd>\( \frac{1}{1 + \exp(-x)} \)</dd></dl>
<p>Evaluating this expression directly can lead to under- or overflows. This function performs the evaluation in a safe manner, making use of the following observations:</p>
<p>In order for the outcome of \( \exp(x) \) to be within the range of the minimum positive double-precision number (i.e., \( 2^{-1074} \)) and the maximum positive double-precision number (i.e., \( (1 + (1 - 2^{52})) * 2^{1023}) \), \( x \) has to be within the natural logarithm of these numbers, so roughly in between -744 and 709. However, \( 1 + \exp(x) \) will just evaluate to 1 if \( \exp(x) \) is less than the machine epsilon (i.e., \( 2^{-52} \)) or, equivalently, if \( x \) is less than the natural logarithm of that; i.e., in any case if \( x \) is less than -37. Note that taking the reciprocal of the largest double-precision number will not cause an underflow. Hence, no further checks are necessary. </p>
<p>Definition at line <a class="el" href="logistic_8sql__in_source.html#l00598">598</a> of file <a class="el" href="logistic_8sql__in_source.html">logistic.sql_in</a>.</p>
</div>
</div>
<a class="anchor" id="a32880a39de2e36b6c6be72691a6a4a40"></a><!-- doxytag: member="logistic.sql_in::logregr_train" ref="a32880a39de2e36b6c6be72691a6a4a40" args="(varchar tbl_source, varchar tbl_output, varchar dep_col, varchar ind_col, varchar grouping_col, integer max_iter, varchar optimizer, float8 tolerance, boolean verbose)" -->
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">void logregr_train </td>
<td>(</td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>tbl_source</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>tbl_output</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>dep_col</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>ind_col</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>grouping_col</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">integer&#160;</td>
<td class="paramname"><em>max_iter</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>optimizer</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float8&#160;</td>
<td class="paramname"><em>tolerance</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">boolean&#160;</td>
<td class="paramname"><em>verbose</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div>
<div class="memdoc">
<p>To include an intercept in the model, set one coordinate in the <code>independentVariables</code> array to 1.</p>
<dl><dt><b>Parameters:</b></dt><dd>
<table class="params">
<tr><td class="paramname">tbl_source</td><td>Name of the source relation containing the training data </td></tr>
<tr><td class="paramname">tbl_output</td><td>Name of the output relation to store the model results Columns of the output relation are as follows:<ul>
<li><code>coef FLOAT8[]</code> - Array of coefficients, \( \boldsymbol c \)</li>
<li><code>log_likelihood FLOAT8</code> - Log-likelihood \( l(\boldsymbol c) \)</li>
<li><code>std_err FLOAT8[]</code> - Array of standard errors, \( \mathit{se}(c_1), \dots, \mathit{se}(c_k) \)</li>
<li><code>z_stats FLOAT8[]</code> - Array of Wald z-statistics, \( \boldsymbol z \)</li>
<li><code>p_values FLOAT8[]</code> - Array of Wald p-values, \( \boldsymbol p \)</li>
<li><code>odds_ratios FLOAT8[]</code>: Array of odds ratios, \( \mathit{odds}(c_1), \dots, \mathit{odds}(c_k) \)</li>
<li><code>condition_no FLOAT8</code> - The condition number of matrix \( X^T A X \) during the iteration immediately <em>preceding</em> convergence (i.e., \( A \) is computed using the coefficients of the previous iteration) </li>
</ul>
</td></tr>
<tr><td class="paramname">dep_col</td><td>Name of the dependent column (of type BOOLEAN) </td></tr>
<tr><td class="paramname">ind_col</td><td>Name of the independent column (of type DOUBLE PRECISION[]) </td></tr>
<tr><td class="paramname">grouping_col</td><td>Comma delimited list of column names to group-by </td></tr>
<tr><td class="paramname">max_iter</td><td>The maximum number of iterations </td></tr>
<tr><td class="paramname">optimizer</td><td>The optimizer to use (either <code>'irls'</code>/<code>'newton'</code> for iteratively reweighted least squares or <code>'cg'</code> for conjugent gradient) </td></tr>
<tr><td class="paramname">tolerance</td><td>The difference between log-likelihood values in successive iterations that should indicate convergence. This value should be non-negative and a zero value here disables the convergence criterion, and execution will only stop after <code>maxNumIterations</code> iterations. </td></tr>
<tr><td class="paramname">verbose</td><td>If true, any error or warning message will be printed to the console (irrespective of the 'client_min_messages' set by server). If false, no error/warning message is printed to console.</td></tr>
</table>
</dd>
</dl>
<dl class="user"><dt><b>Usage:</b></dt><dd><ul>
<li>Get vector of coefficients \( \boldsymbol c \) and all diagnostic statistics:<br/>
<pre>SELECT logregr_train('<em>sourceName</em>', '<em>outName</em>'
'<em>dependentVariable</em>', '<em>independentVariables</em>');
SELECT * from outName;
</pre></li>
<li>Get vector of coefficients \( \boldsymbol c \):<br/>
<pre>SELECT coef from outName;</pre></li>
<li>Get a subset of the output columns, e.g., only the array of coefficients \( \boldsymbol c \), the log-likelihood of determination \( l(\boldsymbol c) \), and the array of p-values \( \boldsymbol p \): <pre>SELECT coef, log_likelihood, p_values FROM outName;</pre></li>
</ul>
</dd></dl>
<dl class="note"><dt><b>Note:</b></dt><dd>This function starts an iterative algorithm. It is not an aggregate function. Source, output, and column names have to be passed as strings (due to limitations of the SQL syntax). </dd></dl>
<p>Definition at line <a class="el" href="logistic_8sql__in_source.html#l00495">495</a> of file <a class="el" href="logistic_8sql__in_source.html">logistic.sql_in</a>.</p>
</div>
</div>
</div>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="logistic_8sql__in.html">logistic.sql_in</a> </li>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a></div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<li class="footer">Generated on Fri May 10 2013 01:37:13 for MADlib by
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.5.1 </li>
</ul>
</div>
</body>
</html>