docs/v1.4/group__grp__linreg.html - madlib-site - Git at Google

 <!-- HTML header for doxygen 1.8.4-->
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
 <meta name="generator" content="Doxygen 1.8.4"/>
 <meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
 <title>MADlib: Linear Regression</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="jquery.js"></script>
 <script type="text/javascript" src="dynsections.js"></script>
 <link href="navtree.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="resize.js"></script>
 <script type="text/javascript" src="navtree.js"></script>
 <script type="text/javascript">
   $(document).ready(initResizable);
   $(window).load(resizeHeight);
 </script>
 <link href="search/search.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="search/search.js"></script>
 <script type="text/javascript">
   $(document).ready(function() { searchBox.OnSelectItem(0); });
 </script>
 <script type="text/x-mathjax-config">
   MathJax.Hub.Config({
     extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
     jax: ["input/TeX","output/HTML-CSS"],
 });
 </script><script src="../mathjax/MathJax.js"></script>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
 <link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
 <!-- google analytics -->
 <script>
   (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
   })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
   ga('create', 'UA-45382226-1', 'auto');
   ga('send', 'pageview');
 </script>
 </head>
 <body>
 <div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 <div id="titlearea">
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
    <div id="projectname">MADlib
    &#160;<span id="projectnumber">1.4.1</span>
    </div>
    <div id="projectbrief">User Documentation</div>
   </td>
    <td>        <div id="MSearchBox" class="MSearchBoxInactive">
         <span class="left">
           <img id="MSearchSelect" src="search/mag_sel.png"
                onmouseover="return searchBox.OnSearchSelectShow()"
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
                onfocus="searchBox.OnSearchFieldFocus(true)"
                onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
           </span>
         </div>
 </td>
  </tr>
  </tbody>
 </table>
 </div>
 <!-- end header part -->
 <!-- Generated by Doxygen 1.8.4 -->
 <script type="text/javascript">
 var searchBox = new SearchBox("searchBox", "search",false,'Search');
 </script>
 </div><!-- top -->
 <div id="side-nav" class="ui-resizable side-nav-resizable">
   <div id="nav-tree">
     <div id="nav-tree-contents">
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
   <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
 <script type="text/javascript">
 $(document).ready(function(){initNavTree('group__grp__linreg.html','');});
 </script>
 <div id="doc-content">
 <!-- window showing the filter options -->
 <div id="MSearchSelectWindow"
      onmouseover="return searchBox.OnSearchSelectShow()"
      onmouseout="return searchBox.OnSearchSelectHide()"
      onkeydown="return searchBox.OnSearchSelectKey(event)">
 <a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Groups</a></div>

 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
 <iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>

 <div class="header">
   <div class="headertitle">
 <div class="title">Linear Regression<div class="ingroups"><a class="el" href="group__grp__glm.html">Generalized Linear Models</a></div></div>  </div>
 </div><!--header-->
 <div class="contents">
 <div class="toc"><b>Contents</b> </p>
 <ul>
 <li class="level1">
 <a href="#train">Training Function</a> </li>
 <li class="level1">
 <a href="#predict">Prediction Function</a> </li>
 <li class="level1">
 <a href="#examples">Examples</a> </li>
 <li class="level1">
 <a href="#background">Technical Background</a> </li>
 <li class="level1">
 <a href="#literature">Literature</a> </li>
 <li class="level1">
 <a href="#related">Related Topics</a> </li>
 </ul>
 </div><p>Linear regression models a linear relationship of a scalar dependent variable \( y \) to one or more explanatory independent variables \( x \) to build a model of coefficients.</p>
 <p><a class="anchor" id="train"></a></p>
 <dl class="section user"><dt>Training Function</dt><dd></dd></dl>
 <p>The linear regression training function has the following syntax. </p>
 <pre class="syntax">
 linregr_train( source_table,
                out_table,
                dependent_varname,
                independent_varname,
                grouping_cols,
                heteroskedasticity_option
              )
 </pre><p><b>Arguments</b> </p>
 <dl class="arglist">
 <dt>source_table </dt>
 <dd><p class="startdd">TEXT. The name of the table containing the training data.</p>
 <p class="enddd"></p>
 </dd>
 <dt>out_table </dt>
 <dd><p class="startdd">TEXT. Name of the generated table containing the output model.</p>
 <p>The output table contains the following columns. </p>
 <table  class="output">
 <tr>
 <th>&lt;...&gt; </th><td>Any grouping columns provided during training. Present only if the grouping option is used.  </td></tr>
 <tr>
 <th>coef </th><td>FLOAT8[]. Vector of the coefficients of the regression.  </td></tr>
 <tr>
 <th>r2 </th><td>FLOAT8. R-squared coefficient of determination of the model.  </td></tr>
 <tr>
 <th>std_err </th><td>FLOAT8[]. Vector of the standard error of the coefficients.  </td></tr>
 <tr>
 <th>t_stats </th><td>FLOAT8[]. Vector of the t-statistics of the coefficients.  </td></tr>
 <tr>
 <th>p_values </th><td>FLOAT8[]. Vector of the p-values of the coefficients.  </td></tr>
 <tr>
 <th>condition_no </th><td>FLOAT8 array. The condition number of the \(X^{*}X\) matrix. A high condition number is usually an indication that there may be some numeric instability in the result yielding a less reliable model. A high condition number often results when there is a significant amount of colinearity in the underlying design matrix, in which case other regression techniques, such as elastic net regression, may be more appropriate.  </td></tr>
 <tr>
 <th>bp_stats </th><td>FLOAT8. The Breush-Pagan statistic of heteroskedacity. Present only if the heteroskedacity argument was set to True when the model was trained.  </td></tr>
 <tr>
 <th>bp_p_value </th><td>FLOAT8. The Breush-Pagan calculated p-value. Present only if the heteroskedacity parameter was set to True when the model was trained.  </td></tr>
 <tr>
 <th>num_rows_processed </th><td>INTEGER. The number of rows that are actually used in each group.  </td></tr>
 <tr>
 <th>num_missing_rows_skipped </th><td>INTEGER. The number of rows that have NULL values in the dependent and independent variables, and were skipped in the computation for each group. </td></tr>
 </table>
 <p>A summary table named &lt;out_table&gt;_summary is created together with the output table. It has the following columns: </p>
 <table  class="output">
 <tr>
 <th>source_table </th><td>The data source table name </td></tr>
 <tr>
 <th>out_table </th><td>The output table name </td></tr>
 <tr>
 <th>dependent_varname </th><td>The dependent variable </td></tr>
 <tr>
 <th>independent_varname </th><td>The independent variables </td></tr>
 <tr>
 <th>num_rows_processed </th><td>The total number of rows that were used in the computation. </td></tr>
 <tr>
 <th>num_missing_rows_skipped </th><td>The total number of rows that were skipped because of NULL values in them. </td></tr>
 </table>
 <dl class="section note"><dt>Note</dt><dd>For p-values, we just return the computation result directly. Other statistical packages, like 'R', produce the same result, but on printing the result to screen, another format function is used and any p-value that is smaller than the machine epsilon (the smallest positive floating-point number 'x' such that '1 + x != 1') will be printed on screen as "&lt; xxx" (xxx is the value of the machine epsilon). Although the result may look different, they are in fact the same. </dd></dl>
 </dd>
 <dt>dependent_varname </dt>
 <dd><p class="startdd">TEXT. Expression to evaluate for the dependent variable.</p>
 <p class="enddd"></p>
 </dd>
 <dt>independent_varname </dt>
 <dd><p class="startdd">TEXT. Expression list to evaluate for the independent variables. An intercept variable is not assumed. It is common to provide an explicit intercept term by including a single constant <code>1</code> term in the independent variable list.</p>
 <p class="enddd"></p>
 </dd>
 <dt>grouping_cols (optional) </dt>
 <dd><p class="startdd">TEXT, default: NULL. An expression list used to group the input dataset into discrete groups, running one regression per group. Similar to the SQL <code>GROUP BY</code> clause. When this value is null, no grouping is used and a single result model is generated.</p>
 <p class="enddd"></p>
 </dd>
 <dt>heteroskedasticity_option (optional) </dt>
 <dd>BOOLEAN, default: FALSE. When TRUE, the heteroskedasticity of the model is also calculated and returned with the results. </dd>
 </dl>
 <p><a class="anchor" id="warning"></a></p>
 <dl class="section warning"><dt>Warning</dt><dd>The aggregate 'linregr' has been deprecated in favor of the function 'linregr_train'. If the aggregate 'linregr' is used to output the results of linear regression to a table, it is recommended to follow the general pattern shown below (replace text within '&lt;...&gt;' with the appropriate variable names). <pre class="syntax">
 CREATE TABLE &lt;output table&gt; AS
 SELECT (r).*
 FROM (
     SELECT linregr(&lt;dependent variable&gt;, &lt;independent variable&gt;) as r
     FROM &lt;source table&gt;
     ) q;
 </pre></dd></dl>
 <p><a class="anchor" id="predict"></a></p>
 <dl class="section user"><dt>Prediction Function</dt><dd><pre class="syntax">
 linregr_predict(coef, col_ind)
 </pre> <b>Arguments</b> <dl class="arglist">
 <dt>coef </dt>
 <dd><p class="startdd">FLOAT8[]. Vector of the coefficients of regression.</p>
 <p class="enddd"></p>
 </dd>
 <dt>col_ind </dt>
 <dd><p class="startdd">FLOAT8[]. An array containing the independent variable column names. </p>
 <p class="enddd"><a class="anchor" id="examples"></a></p>
 </dd>
 </dl>
 </dd></dl>
 <dl class="section user"><dt>Examples</dt><dd><ol type="1">
 <li>Create an input data set. <pre class="example">
 CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT,
             size INT, lot INT);
 COPY houses FROM STDIN WITH DELIMITER '|';
   1 |  590 |       2 |    1 |  50000 |  770 | 22100
   2 | 1050 |       3 |    2 |  85000 | 1410 | 12000
   3 |   20 |       3 |    1 |  22500 | 1060 |  3500
   4 |  870 |       2 |    2 |  90000 | 1300 | 17500
   5 | 1320 |       3 |    2 | 133000 | 1500 | 30000
   6 | 1350 |       2 |    1 |  90500 |  820 | 25700
   7 | 2790 |       3 |  2.5 | 260000 | 2130 | 25000
   8 |  680 |       2 |    1 | 142500 | 1170 | 22000
   9 | 1840 |       3 |    2 | 160000 | 1500 | 19000
  10 | 3680 |       4 |    2 | 240000 | 2790 | 20000
  11 | 1660 |       3 |    1 |  87000 | 1030 | 17500
  12 | 1620 |       3 |    2 | 118600 | 1250 | 20000
  13 | 3100 |       3 |    2 | 140000 | 1760 | 38000
  14 | 2070 |       2 |    3 | 148000 | 1550 | 14000
  15 |  650 |       3 |  1.5 |  65000 | 1450 | 12000
 \.
 </pre></li>
 <li>Train a regression model. First, a single regression for all the data. <pre class="example">
 SELECT madlib.linregr_train( 'houses',
                              'houses_linregr',
                              'price',
                              'ARRAY[1, tax, bath, size]'
                            );
 </pre></li>
 <li>Generate three output models, one for each value of "bedroom". <pre class="example">
 SELECT madlib.linregr_train( 'houses',
                              'houses_linregr_bedroom',
                              'price',
                              'ARRAY[1, tax, bath, size]',
                              'bedroom'
                            );
 </pre></li>
 <li>Examine the resulting models. <pre class="example">
 -- Set extended display on for easier reading of output
 \x ON
 SELECT * FROM houses_linregr;
 </pre> Result: <pre class="result">
 -[ RECORD 1 ]+---------------------------------------------------------------------------
 coef         | {-12849.4168959872,28.9613922651765,10181.6290712648,50.516894915354}
 r2           | 0.768577580597443
 std_err      | {33453.0344331391,15.8992104963997,19437.7710925923,32.928023174087}
 t_stats      | {-0.38410317968819,1.82156166004184,0.523806408809133,1.53416118083605}
 p_values     | {0.708223134615422,0.0958005827189772,0.610804093526536,0.153235085548186}
 condition_no | 9002.50457085737
 </pre></li>
 <li>View the results grouped by bedroom. <pre class="example">
 SELECT * FROM houses_linregr_bedroom;
 </pre> Result: <pre class="result">
 -[ RECORD 1 ]+--------------------------------------------------------------------------
 bedroom      | 2
 coef         | {-84242.0345406597,55.4430144648696,-78966.9753675319,225.611910021192}
 r2           | 0.968809546465313
 std_err      | {35018.9991665742,19.5731125320686,23036.8071292552,49.0448678148784}
 t_stats      | {-2.40560942761235,2.83261103077151,-3.42786111480046,4.60011251070697}
 p_values     | {0.250804617665239,0.21605133377602,0.180704400437373,0.136272031474122}
 condition_no | 10086.1048721726
 -[ RECORD 2 ]+--------------------------------------------------------------------------
 bedroom      | 4
 coef         | {0.0112536020318378,41.4132554771633,0.0225072040636757,31.3975496688276}
 r2           | 1
 std_err      | {0,0,0,0}
 t_stats      | {Infinity,Infinity,Infinity,Infinity}
 p_values     |
 condition_no | Infinity
 -[ RECORD 3 ]+--------------------------------------------------------------------------
 bedroom      | 3
 coef         | {-88155.8292501601,27.1966436294429,41404.0293363612,62.637521075324}
 r2           | 0.841699901311252
 std_err      | {57867.9999702625,17.8272309154689,43643.1321511114,70.8506824863954}
 t_stats      | {-1.52339512849005,1.52556747362508,0.948695185143966,0.884077878676067}
 p_values     | {0.188161432894871,0.187636685729869,0.386340032374927,0.417132778705789}
 condition_no | 11722.6225642147
 </pre> Alternatively you can unnest the results for easier reading of output. <pre class="example">
 \x OFF
 SELECT unnest(ARRAY['intercept','tax','bath','size']) as attribute,
        unnest(coef) as coefficient,
        unnest(std_err) as standard_error,
        unnest(t_stats) as t_stat,
        unnest(p_values) as pvalue
 FROM houses_linregr;
 </pre></li>
 <li>Use the prediction function to evaluate residuals. <pre class="example">
 SELECT houses.*,
        madlib.linregr_predict( ARRAY[1,tax,bath,size],
                                m.coef
                              ) as predict,
         price -
           madlib.linregr_predict( ARRAY[1,tax,bath,size],
                                   m.coef
                                 ) as residual
 FROM houses, houses_linregr m;
 </pre></li>
 </ol>
 </dd></dl>
 <p><a class="anchor" id="notes"></a></p>
 <dl class="section user"><dt>Note</dt><dd>All table names can be optionally schema qualified (current_schemas() would be searched if a schema name is not provided) and all table and column names should follow case-sensitivity and quoting rules per the database. (For instance, 'mytable' and 'MyTable' both resolve to the same entity, i.e. 'mytable'. If mixed-case or multi-byte characters are desired for entity names then the string should be double-quoted; in this case the input would be '"MyTable"').</dd></dl>
 <p><a class="anchor" id="background"></a></p>
 <dl class="section user"><dt>Technical Background</dt><dd></dd></dl>
 <p>Ordinary least-squares (OLS) linear regression refers to a stochastic model in which the conditional mean of the dependent variable (usually denoted \( Y \)) is an affine function of the vector of independent variables (usually denoted \( \boldsymbol x \)). That is, </p>
 <p class="formulaDsp">
 \[ E[Y \mid \boldsymbol x] = \boldsymbol c^T \boldsymbol x \]
 </p>
 <p> for some unknown vector of coefficients \( \boldsymbol c \). The assumption is that the residuals are i.i.d. distributed Gaussians. That is, the (conditional) probability density of \( Y \) is given by </p>
 <p class="formulaDsp">
 \[ f(y \mid \boldsymbol x) = \frac{1}{\sqrt{2 \pi \sigma^2}} \cdot \exp\left(-\frac{1}{2 \sigma^2} \cdot (y - \boldsymbol x^T \boldsymbol c)^2 \right) \,. \]
 </p>
 <p> OLS linear regression finds the vector of coefficients \( \boldsymbol c \) that maximizes the likelihood of the observations.</p>
 <p>Let</p>
 <ul>
 <li>\( \boldsymbol y \in \mathbf R^n \) denote the vector of observed dependent variables, with \( n \) rows, containing the observed values of the dependent variable,</li>
 <li>\( X \in \mathbf R^{n \times k} \) denote the design matrix with \( k \) columns and \( n \) rows, containing all observed vectors of independent variables. \( \boldsymbol x_i \) as rows,</li>
 <li>\( X^T \) denote the transpose of \( X \),</li>
 <li>\( X^+ \) denote the pseudo-inverse of \( X \).</li>
 </ul>
 <p>Maximizing the likelihood is equivalent to maximizing the log-likelihood \( \sum_{i=1}^n \log f(y_i \mid \boldsymbol x_i) \), which simplifies to minimizing the <b>residual sum of squares</b> \( RSS \) (also called sum of squared residuals or sum of squared errors of prediction), </p>
 <p class="formulaDsp">
 \[ RSS = \sum_{i=1}^n ( y_i - \boldsymbol c^T \boldsymbol x_i )^2 = (\boldsymbol y - X \boldsymbol c)^T (\boldsymbol y - X \boldsymbol c) \,. \]
 </p>
 <p> The first-order conditions yield that the \( RSS \) is minimized at </p>
 <p class="formulaDsp">
 \[ \boldsymbol c = (X^T X)^+ X^T \boldsymbol y \,. \]
 </p>
 <p>Computing the <b>total sum of squares</b> \( TSS \), the <b>explained sum of squares</b> \( ESS \) (also called the regression sum of squares), and the <b>coefficient of determination</b> \( R^2 \) is done according to the following formulas: </p>
 <p class="formulaDsp">
 \begin{align*} ESS &amp; = \boldsymbol y^T X \boldsymbol c - \frac{ \| y \|_1^2 }{n} \\ TSS &amp; = \sum_{i=1}^n y_i^2 - \frac{ \| y \|_1^2 }{n} \\ R^2 &amp; = \frac{ESS}{TSS} \end{align*}
 </p>
 <p> Note: The last equality follows from the definition \( R^2 = 1 - \frac{RSS}{TSS} \) and the fact that for linear regression \( TSS = RSS + ESS \). A proof of the latter can be found, e.g., at: <a href="http://en.wikipedia.org/wiki/Sum_of_squares">http://en.wikipedia.org/wiki/Sum_of_squares</a></p>
 <p>We estimate the variance \( Var[Y - \boldsymbol c^T \boldsymbol x \mid \boldsymbol x] \) as </p>
 <p class="formulaDsp">
 \[ \sigma^2 = \frac{RSS}{n - k} \]
 </p>
 <p> and compute the t-statistic for coefficient \( i \) as </p>
 <p class="formulaDsp">
 \[ t_i = \frac{c_i}{\sqrt{\sigma^2 \cdot \left( (X^T X)^{-1} \right)_{ii} }} \,. \]
 </p>
 <p>The \( p \)-value for coefficient \( i \) gives the probability of seeing a value at least as extreme as the one observed, provided that the null hypothesis ( \( c_i = 0 \)) is true. Letting \( F_\nu \) denote the cumulative density function of student-t with \( \nu \) degrees of freedom, the \( p \)-value for coefficient \( i \) is therefore </p>
 <p class="formulaDsp">
 \[ p_i = \Pr(|T| \geq |t_i|) = 2 \cdot (1 - F_{n - k}( |t_i| )) \]
 </p>
 <p> where \( T \) is a student-t distributed random variable with mean 0.</p>
 <p>The condition number [2] \( \kappa(X) = \|X\|_2\cdot\|X^{-1}\|_2\) is computed as the product of two spectral norms [3]. The spectral norm of a matrix \(X\) is the largest singular value of \(X\) i.e. the square root of the largest eigenvalue of the positive-semidefinite matrix \(X^{*}X\):</p>
 <p class="formulaDsp">
 \[ \|X\|_2 = \sqrt{\lambda_{\max}\left(X^{*}X\right)}\ , \]
 </p>
 <p> where \(X^{*}\) is the conjugate transpose of \(X\). The condition number of a linear regression problem is a worst-case measure of how sensitive the result is to small perturbations of the input. A large condition number (say, more than 1000) indicates the presence of significant multicollinearity.</p>
 <p><a class="anchor" id="literature"></a></p>
 <dl class="section user"><dt>Literature</dt><dd></dd></dl>
 <p>[1] Cosma Shalizi: Statistics 36-350: Data Mining, Lecture Notes, 21 October 2009, <a href="http://www.stat.cmu.edu/~cshalizi/350/lectures/17/lecture-17.pdf">http://www.stat.cmu.edu/~cshalizi/350/lectures/17/lecture-17.pdf</a></p>
 <p>[2] Wikipedia: Condition Number, <a href="http://en.wikipedia.org/wiki/Condition_number">http://en.wikipedia.org/wiki/Condition_number</a>.</p>
 <p>[3] Wikipedia: Spectral Norm, <a href="http://en.wikipedia.org/wiki/Spectral_norm#Spectral_norm">http://en.wikipedia.org/wiki/Spectral_norm#Spectral_norm</a></p>
 <p>[4] Wikipedia: Breusch–Pagan test, <a href="http://en.wikipedia.org/wiki/Breusch%E2%80%93Pagan_test">http://en.wikipedia.org/wiki/Breusch%E2%80%93Pagan_test</a></p>
 <p>[5] Wikipedia: Heteroscedasticity-consistent standard errors, <a href="http://en.wikipedia.org/wiki/Heteroscedasticity-consistent_standard_errors">http://en.wikipedia.org/wiki/Heteroscedasticity-consistent_standard_errors</a></p>
 <p><a class="anchor" id="related"></a></p>
 <dl class="section user"><dt>Related Topics</dt><dd></dd></dl>
 <p><a class="el" href="group__grp__robust.html">Robust Variance</a></p>
 <p><a class="el" href="group__grp__clustered__errors.html">Clustered Variance</a></p>
 <p><a class="el" href="group__grp__validation.html">Cross Validation</a></p>
 <p>File <a class="el" href="linear_8sql__in.html" title="SQL functions for linear regression. ">linear.sql_in</a>, source file for the SQL functions </p>
 </div><!-- contents -->
 </div><!-- doc-content -->
 <!-- start footer part -->
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
     <li class="footer">Generated on Thu Jan 9 2014 20:25:06 for MADlib by
     <a href="http://www.doxygen.org/index.html">
     <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.4 </li>
   </ul>
 </div>
 </body>
 </html>