| <!-- HTML header for doxygen 1.8.4--> |
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| <html xmlns="http://www.w3.org/1999/xhtml"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> |
| <meta http-equiv="X-UA-Compatible" content="IE=9"/> |
| <meta name="generator" content="Doxygen 1.8.4"/> |
| <meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/> |
| <title>MADlib: Linear Regression</title> |
| <link href="tabs.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="jquery.js"></script> |
| <script type="text/javascript" src="dynsections.js"></script> |
| <link href="navtree.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="resize.js"></script> |
| <script type="text/javascript" src="navtree.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(initResizable); |
| $(window).load(resizeHeight); |
| </script> |
| <link href="search/search.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="search/search.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(function() { searchBox.OnSelectItem(0); }); |
| </script> |
| <script type="text/x-mathjax-config"> |
| MathJax.Hub.Config({ |
| extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"], |
| jax: ["input/TeX","output/HTML-CSS"], |
| }); |
| </script><script src="../mathjax/MathJax.js"></script> |
| <link href="doxygen.css" rel="stylesheet" type="text/css" /> |
| <link href="madlib_extra.css" rel="stylesheet" type="text/css"/> |
| <!-- google analytics --> |
| <script> |
| (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
| m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); |
| ga('create', 'UA-45382226-1', 'auto'); |
| ga('send', 'pageview'); |
| </script> |
| </head> |
| <body> |
| <div id="top"><!-- do not remove this div, it is closed by doxygen! --> |
| <div id="titlearea"> |
| <table cellspacing="0" cellpadding="0"> |
| <tbody> |
| <tr style="height: 56px;"> |
| <td style="padding-left: 0.5em;"> |
| <div id="projectname">MADlib |
|  <span id="projectnumber">1.4.1</span> |
| </div> |
| <div id="projectbrief">User Documentation</div> |
| </td> |
| <td> <div id="MSearchBox" class="MSearchBoxInactive"> |
| <span class="left"> |
| <img id="MSearchSelect" src="search/mag_sel.png" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| alt=""/> |
| <input type="text" id="MSearchField" value="Search" accesskey="S" |
| onfocus="searchBox.OnSearchFieldFocus(true)" |
| onblur="searchBox.OnSearchFieldFocus(false)" |
| onkeyup="searchBox.OnSearchFieldChange(event)"/> |
| </span><span class="right"> |
| <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a> |
| </span> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <!-- end header part --> |
| <!-- Generated by Doxygen 1.8.4 --> |
| <script type="text/javascript"> |
| var searchBox = new SearchBox("searchBox", "search",false,'Search'); |
| </script> |
| </div><!-- top --> |
| <div id="side-nav" class="ui-resizable side-nav-resizable"> |
| <div id="nav-tree"> |
| <div id="nav-tree-contents"> |
| <div id="nav-sync" class="sync"></div> |
| </div> |
| </div> |
| <div id="splitbar" style="-moz-user-select:none;" |
| class="ui-resizable-handle"> |
| </div> |
| </div> |
| <script type="text/javascript"> |
| $(document).ready(function(){initNavTree('group__grp__linreg.html','');}); |
| </script> |
| <div id="doc-content"> |
| <!-- window showing the filter options --> |
| <div id="MSearchSelectWindow" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| onkeydown="return searchBox.OnSearchSelectKey(event)"> |
| <a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark"> </span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark"> </span>Groups</a></div> |
| |
| <!-- iframe showing the search results (closed by default) --> |
| <div id="MSearchResultsWindow"> |
| <iframe src="javascript:void(0)" frameborder="0" |
| name="MSearchResults" id="MSearchResults"> |
| </iframe> |
| </div> |
| |
| <div class="header"> |
| <div class="headertitle"> |
| <div class="title">Linear Regression<div class="ingroups"><a class="el" href="group__grp__glm.html">Generalized Linear Models</a></div></div> </div> |
| </div><!--header--> |
| <div class="contents"> |
| <div class="toc"><b>Contents</b> </p> |
| <ul> |
| <li class="level1"> |
| <a href="#train">Training Function</a> </li> |
| <li class="level1"> |
| <a href="#predict">Prediction Function</a> </li> |
| <li class="level1"> |
| <a href="#examples">Examples</a> </li> |
| <li class="level1"> |
| <a href="#background">Technical Background</a> </li> |
| <li class="level1"> |
| <a href="#literature">Literature</a> </li> |
| <li class="level1"> |
| <a href="#related">Related Topics</a> </li> |
| </ul> |
| </div><p>Linear regression models a linear relationship of a scalar dependent variable \( y \) to one or more explanatory independent variables \( x \) to build a model of coefficients.</p> |
| <p><a class="anchor" id="train"></a></p> |
| <dl class="section user"><dt>Training Function</dt><dd></dd></dl> |
| <p>The linear regression training function has the following syntax. </p> |
| <pre class="syntax"> |
| linregr_train( source_table, |
| out_table, |
| dependent_varname, |
| independent_varname, |
| grouping_cols, |
| heteroskedasticity_option |
| ) |
| </pre><p><b>Arguments</b> </p> |
| <dl class="arglist"> |
| <dt>source_table </dt> |
| <dd><p class="startdd">TEXT. The name of the table containing the training data.</p> |
| <p class="enddd"></p> |
| </dd> |
| <dt>out_table </dt> |
| <dd><p class="startdd">TEXT. Name of the generated table containing the output model.</p> |
| <p>The output table contains the following columns. </p> |
| <table class="output"> |
| <tr> |
| <th><...> </th><td>Any grouping columns provided during training. Present only if the grouping option is used. </td></tr> |
| <tr> |
| <th>coef </th><td>FLOAT8[]. Vector of the coefficients of the regression. </td></tr> |
| <tr> |
| <th>r2 </th><td>FLOAT8. R-squared coefficient of determination of the model. </td></tr> |
| <tr> |
| <th>std_err </th><td>FLOAT8[]. Vector of the standard error of the coefficients. </td></tr> |
| <tr> |
| <th>t_stats </th><td>FLOAT8[]. Vector of the t-statistics of the coefficients. </td></tr> |
| <tr> |
| <th>p_values </th><td>FLOAT8[]. Vector of the p-values of the coefficients. </td></tr> |
| <tr> |
| <th>condition_no </th><td>FLOAT8 array. The condition number of the \(X^{*}X\) matrix. A high condition number is usually an indication that there may be some numeric instability in the result yielding a less reliable model. A high condition number often results when there is a significant amount of colinearity in the underlying design matrix, in which case other regression techniques, such as elastic net regression, may be more appropriate. </td></tr> |
| <tr> |
| <th>bp_stats </th><td>FLOAT8. The Breush-Pagan statistic of heteroskedacity. Present only if the heteroskedacity argument was set to True when the model was trained. </td></tr> |
| <tr> |
| <th>bp_p_value </th><td>FLOAT8. The Breush-Pagan calculated p-value. Present only if the heteroskedacity parameter was set to True when the model was trained. </td></tr> |
| <tr> |
| <th>num_rows_processed </th><td>INTEGER. The number of rows that are actually used in each group. </td></tr> |
| <tr> |
| <th>num_missing_rows_skipped </th><td>INTEGER. The number of rows that have NULL values in the dependent and independent variables, and were skipped in the computation for each group. </td></tr> |
| </table> |
| <p>A summary table named <out_table>_summary is created together with the output table. It has the following columns: </p> |
| <table class="output"> |
| <tr> |
| <th>source_table </th><td>The data source table name </td></tr> |
| <tr> |
| <th>out_table </th><td>The output table name </td></tr> |
| <tr> |
| <th>dependent_varname </th><td>The dependent variable </td></tr> |
| <tr> |
| <th>independent_varname </th><td>The independent variables </td></tr> |
| <tr> |
| <th>num_rows_processed </th><td>The total number of rows that were used in the computation. </td></tr> |
| <tr> |
| <th>num_missing_rows_skipped </th><td>The total number of rows that were skipped because of NULL values in them. </td></tr> |
| </table> |
| <dl class="section note"><dt>Note</dt><dd>For p-values, we just return the computation result directly. Other statistical packages, like 'R', produce the same result, but on printing the result to screen, another format function is used and any p-value that is smaller than the machine epsilon (the smallest positive floating-point number 'x' such that '1 + x != 1') will be printed on screen as "< xxx" (xxx is the value of the machine epsilon). Although the result may look different, they are in fact the same. </dd></dl> |
| </dd> |
| <dt>dependent_varname </dt> |
| <dd><p class="startdd">TEXT. Expression to evaluate for the dependent variable.</p> |
| <p class="enddd"></p> |
| </dd> |
| <dt>independent_varname </dt> |
| <dd><p class="startdd">TEXT. Expression list to evaluate for the independent variables. An intercept variable is not assumed. It is common to provide an explicit intercept term by including a single constant <code>1</code> term in the independent variable list.</p> |
| <p class="enddd"></p> |
| </dd> |
| <dt>grouping_cols (optional) </dt> |
| <dd><p class="startdd">TEXT, default: NULL. An expression list used to group the input dataset into discrete groups, running one regression per group. Similar to the SQL <code>GROUP BY</code> clause. When this value is null, no grouping is used and a single result model is generated.</p> |
| <p class="enddd"></p> |
| </dd> |
| <dt>heteroskedasticity_option (optional) </dt> |
| <dd>BOOLEAN, default: FALSE. When TRUE, the heteroskedasticity of the model is also calculated and returned with the results. </dd> |
| </dl> |
| <p><a class="anchor" id="warning"></a></p> |
| <dl class="section warning"><dt>Warning</dt><dd>The aggregate 'linregr' has been deprecated in favor of the function 'linregr_train'. If the aggregate 'linregr' is used to output the results of linear regression to a table, it is recommended to follow the general pattern shown below (replace text within '<...>' with the appropriate variable names). <pre class="syntax"> |
| CREATE TABLE <output table> AS |
| SELECT (r).* |
| FROM ( |
| SELECT linregr(<dependent variable>, <independent variable>) as r |
| FROM <source table> |
| ) q; |
| </pre></dd></dl> |
| <p><a class="anchor" id="predict"></a></p> |
| <dl class="section user"><dt>Prediction Function</dt><dd><pre class="syntax"> |
| linregr_predict(coef, col_ind) |
| </pre> <b>Arguments</b> <dl class="arglist"> |
| <dt>coef </dt> |
| <dd><p class="startdd">FLOAT8[]. Vector of the coefficients of regression.</p> |
| <p class="enddd"></p> |
| </dd> |
| <dt>col_ind </dt> |
| <dd><p class="startdd">FLOAT8[]. An array containing the independent variable column names. </p> |
| <p class="enddd"><a class="anchor" id="examples"></a></p> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="section user"><dt>Examples</dt><dd><ol type="1"> |
| <li>Create an input data set. <pre class="example"> |
| CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT, |
| size INT, lot INT); |
| COPY houses FROM STDIN WITH DELIMITER '|'; |
| 1 | 590 | 2 | 1 | 50000 | 770 | 22100 |
| 2 | 1050 | 3 | 2 | 85000 | 1410 | 12000 |
| 3 | 20 | 3 | 1 | 22500 | 1060 | 3500 |
| 4 | 870 | 2 | 2 | 90000 | 1300 | 17500 |
| 5 | 1320 | 3 | 2 | 133000 | 1500 | 30000 |
| 6 | 1350 | 2 | 1 | 90500 | 820 | 25700 |
| 7 | 2790 | 3 | 2.5 | 260000 | 2130 | 25000 |
| 8 | 680 | 2 | 1 | 142500 | 1170 | 22000 |
| 9 | 1840 | 3 | 2 | 160000 | 1500 | 19000 |
| 10 | 3680 | 4 | 2 | 240000 | 2790 | 20000 |
| 11 | 1660 | 3 | 1 | 87000 | 1030 | 17500 |
| 12 | 1620 | 3 | 2 | 118600 | 1250 | 20000 |
| 13 | 3100 | 3 | 2 | 140000 | 1760 | 38000 |
| 14 | 2070 | 2 | 3 | 148000 | 1550 | 14000 |
| 15 | 650 | 3 | 1.5 | 65000 | 1450 | 12000 |
| \. |
| </pre></li> |
| <li>Train a regression model. First, a single regression for all the data. <pre class="example"> |
| SELECT madlib.linregr_train( 'houses', |
| 'houses_linregr', |
| 'price', |
| 'ARRAY[1, tax, bath, size]' |
| ); |
| </pre></li> |
| <li>Generate three output models, one for each value of "bedroom". <pre class="example"> |
| SELECT madlib.linregr_train( 'houses', |
| 'houses_linregr_bedroom', |
| 'price', |
| 'ARRAY[1, tax, bath, size]', |
| 'bedroom' |
| ); |
| </pre></li> |
| <li>Examine the resulting models. <pre class="example"> |
| -- Set extended display on for easier reading of output |
| \x ON |
| SELECT * FROM houses_linregr; |
| </pre> Result: <pre class="result"> |
| -[ RECORD 1 ]+--------------------------------------------------------------------------- |
| coef | {-12849.4168959872,28.9613922651765,10181.6290712648,50.516894915354} |
| r2 | 0.768577580597443 |
| std_err | {33453.0344331391,15.8992104963997,19437.7710925923,32.928023174087} |
| t_stats | {-0.38410317968819,1.82156166004184,0.523806408809133,1.53416118083605} |
| p_values | {0.708223134615422,0.0958005827189772,0.610804093526536,0.153235085548186} |
| condition_no | 9002.50457085737 |
| </pre></li> |
| <li>View the results grouped by bedroom. <pre class="example"> |
| SELECT * FROM houses_linregr_bedroom; |
| </pre> Result: <pre class="result"> |
| -[ RECORD 1 ]+-------------------------------------------------------------------------- |
| bedroom | 2 |
| coef | {-84242.0345406597,55.4430144648696,-78966.9753675319,225.611910021192} |
| r2 | 0.968809546465313 |
| std_err | {35018.9991665742,19.5731125320686,23036.8071292552,49.0448678148784} |
| t_stats | {-2.40560942761235,2.83261103077151,-3.42786111480046,4.60011251070697} |
| p_values | {0.250804617665239,0.21605133377602,0.180704400437373,0.136272031474122} |
| condition_no | 10086.1048721726 |
| -[ RECORD 2 ]+-------------------------------------------------------------------------- |
| bedroom | 4 |
| coef | {0.0112536020318378,41.4132554771633,0.0225072040636757,31.3975496688276} |
| r2 | 1 |
| std_err | {0,0,0,0} |
| t_stats | {Infinity,Infinity,Infinity,Infinity} |
| p_values | |
| condition_no | Infinity |
| -[ RECORD 3 ]+-------------------------------------------------------------------------- |
| bedroom | 3 |
| coef | {-88155.8292501601,27.1966436294429,41404.0293363612,62.637521075324} |
| r2 | 0.841699901311252 |
| std_err | {57867.9999702625,17.8272309154689,43643.1321511114,70.8506824863954} |
| t_stats | {-1.52339512849005,1.52556747362508,0.948695185143966,0.884077878676067} |
| p_values | {0.188161432894871,0.187636685729869,0.386340032374927,0.417132778705789} |
| condition_no | 11722.6225642147 |
| </pre> Alternatively you can unnest the results for easier reading of output. <pre class="example"> |
| \x OFF |
| SELECT unnest(ARRAY['intercept','tax','bath','size']) as attribute, |
| unnest(coef) as coefficient, |
| unnest(std_err) as standard_error, |
| unnest(t_stats) as t_stat, |
| unnest(p_values) as pvalue |
| FROM houses_linregr; |
| </pre></li> |
| <li>Use the prediction function to evaluate residuals. <pre class="example"> |
| SELECT houses.*, |
| madlib.linregr_predict( ARRAY[1,tax,bath,size], |
| m.coef |
| ) as predict, |
| price - |
| madlib.linregr_predict( ARRAY[1,tax,bath,size], |
| m.coef |
| ) as residual |
| FROM houses, houses_linregr m; |
| </pre></li> |
| </ol> |
| </dd></dl> |
| <p><a class="anchor" id="notes"></a></p> |
| <dl class="section user"><dt>Note</dt><dd>All table names can be optionally schema qualified (current_schemas() would be searched if a schema name is not provided) and all table and column names should follow case-sensitivity and quoting rules per the database. (For instance, 'mytable' and 'MyTable' both resolve to the same entity, i.e. 'mytable'. If mixed-case or multi-byte characters are desired for entity names then the string should be double-quoted; in this case the input would be '"MyTable"').</dd></dl> |
| <p><a class="anchor" id="background"></a></p> |
| <dl class="section user"><dt>Technical Background</dt><dd></dd></dl> |
| <p>Ordinary least-squares (OLS) linear regression refers to a stochastic model in which the conditional mean of the dependent variable (usually denoted \( Y \)) is an affine function of the vector of independent variables (usually denoted \( \boldsymbol x \)). That is, </p> |
| <p class="formulaDsp"> |
| \[ E[Y \mid \boldsymbol x] = \boldsymbol c^T \boldsymbol x \] |
| </p> |
| <p> for some unknown vector of coefficients \( \boldsymbol c \). The assumption is that the residuals are i.i.d. distributed Gaussians. That is, the (conditional) probability density of \( Y \) is given by </p> |
| <p class="formulaDsp"> |
| \[ f(y \mid \boldsymbol x) = \frac{1}{\sqrt{2 \pi \sigma^2}} \cdot \exp\left(-\frac{1}{2 \sigma^2} \cdot (y - \boldsymbol x^T \boldsymbol c)^2 \right) \,. \] |
| </p> |
| <p> OLS linear regression finds the vector of coefficients \( \boldsymbol c \) that maximizes the likelihood of the observations.</p> |
| <p>Let</p> |
| <ul> |
| <li>\( \boldsymbol y \in \mathbf R^n \) denote the vector of observed dependent variables, with \( n \) rows, containing the observed values of the dependent variable,</li> |
| <li>\( X \in \mathbf R^{n \times k} \) denote the design matrix with \( k \) columns and \( n \) rows, containing all observed vectors of independent variables. \( \boldsymbol x_i \) as rows,</li> |
| <li>\( X^T \) denote the transpose of \( X \),</li> |
| <li>\( X^+ \) denote the pseudo-inverse of \( X \).</li> |
| </ul> |
| <p>Maximizing the likelihood is equivalent to maximizing the log-likelihood \( \sum_{i=1}^n \log f(y_i \mid \boldsymbol x_i) \), which simplifies to minimizing the <b>residual sum of squares</b> \( RSS \) (also called sum of squared residuals or sum of squared errors of prediction), </p> |
| <p class="formulaDsp"> |
| \[ RSS = \sum_{i=1}^n ( y_i - \boldsymbol c^T \boldsymbol x_i )^2 = (\boldsymbol y - X \boldsymbol c)^T (\boldsymbol y - X \boldsymbol c) \,. \] |
| </p> |
| <p> The first-order conditions yield that the \( RSS \) is minimized at </p> |
| <p class="formulaDsp"> |
| \[ \boldsymbol c = (X^T X)^+ X^T \boldsymbol y \,. \] |
| </p> |
| <p>Computing the <b>total sum of squares</b> \( TSS \), the <b>explained sum of squares</b> \( ESS \) (also called the regression sum of squares), and the <b>coefficient of determination</b> \( R^2 \) is done according to the following formulas: </p> |
| <p class="formulaDsp"> |
| \begin{align*} ESS & = \boldsymbol y^T X \boldsymbol c - \frac{ \| y \|_1^2 }{n} \\ TSS & = \sum_{i=1}^n y_i^2 - \frac{ \| y \|_1^2 }{n} \\ R^2 & = \frac{ESS}{TSS} \end{align*} |
| </p> |
| <p> Note: The last equality follows from the definition \( R^2 = 1 - \frac{RSS}{TSS} \) and the fact that for linear regression \( TSS = RSS + ESS \). A proof of the latter can be found, e.g., at: <a href="http://en.wikipedia.org/wiki/Sum_of_squares">http://en.wikipedia.org/wiki/Sum_of_squares</a></p> |
| <p>We estimate the variance \( Var[Y - \boldsymbol c^T \boldsymbol x \mid \boldsymbol x] \) as </p> |
| <p class="formulaDsp"> |
| \[ \sigma^2 = \frac{RSS}{n - k} \] |
| </p> |
| <p> and compute the t-statistic for coefficient \( i \) as </p> |
| <p class="formulaDsp"> |
| \[ t_i = \frac{c_i}{\sqrt{\sigma^2 \cdot \left( (X^T X)^{-1} \right)_{ii} }} \,. \] |
| </p> |
| <p>The \( p \)-value for coefficient \( i \) gives the probability of seeing a value at least as extreme as the one observed, provided that the null hypothesis ( \( c_i = 0 \)) is true. Letting \( F_\nu \) denote the cumulative density function of student-t with \( \nu \) degrees of freedom, the \( p \)-value for coefficient \( i \) is therefore </p> |
| <p class="formulaDsp"> |
| \[ p_i = \Pr(|T| \geq |t_i|) = 2 \cdot (1 - F_{n - k}( |t_i| )) \] |
| </p> |
| <p> where \( T \) is a student-t distributed random variable with mean 0.</p> |
| <p>The condition number [2] \( \kappa(X) = \|X\|_2\cdot\|X^{-1}\|_2\) is computed as the product of two spectral norms [3]. The spectral norm of a matrix \(X\) is the largest singular value of \(X\) i.e. the square root of the largest eigenvalue of the positive-semidefinite matrix \(X^{*}X\):</p> |
| <p class="formulaDsp"> |
| \[ \|X\|_2 = \sqrt{\lambda_{\max}\left(X^{*}X\right)}\ , \] |
| </p> |
| <p> where \(X^{*}\) is the conjugate transpose of \(X\). The condition number of a linear regression problem is a worst-case measure of how sensitive the result is to small perturbations of the input. A large condition number (say, more than 1000) indicates the presence of significant multicollinearity.</p> |
| <p><a class="anchor" id="literature"></a></p> |
| <dl class="section user"><dt>Literature</dt><dd></dd></dl> |
| <p>[1] Cosma Shalizi: Statistics 36-350: Data Mining, Lecture Notes, 21 October 2009, <a href="http://www.stat.cmu.edu/~cshalizi/350/lectures/17/lecture-17.pdf">http://www.stat.cmu.edu/~cshalizi/350/lectures/17/lecture-17.pdf</a></p> |
| <p>[2] Wikipedia: Condition Number, <a href="http://en.wikipedia.org/wiki/Condition_number">http://en.wikipedia.org/wiki/Condition_number</a>.</p> |
| <p>[3] Wikipedia: Spectral Norm, <a href="http://en.wikipedia.org/wiki/Spectral_norm#Spectral_norm">http://en.wikipedia.org/wiki/Spectral_norm#Spectral_norm</a></p> |
| <p>[4] Wikipedia: Breusch–Pagan test, <a href="http://en.wikipedia.org/wiki/Breusch%E2%80%93Pagan_test">http://en.wikipedia.org/wiki/Breusch%E2%80%93Pagan_test</a></p> |
| <p>[5] Wikipedia: Heteroscedasticity-consistent standard errors, <a href="http://en.wikipedia.org/wiki/Heteroscedasticity-consistent_standard_errors">http://en.wikipedia.org/wiki/Heteroscedasticity-consistent_standard_errors</a></p> |
| <p><a class="anchor" id="related"></a></p> |
| <dl class="section user"><dt>Related Topics</dt><dd></dd></dl> |
| <p><a class="el" href="group__grp__robust.html">Robust Variance</a></p> |
| <p><a class="el" href="group__grp__clustered__errors.html">Clustered Variance</a></p> |
| <p><a class="el" href="group__grp__validation.html">Cross Validation</a></p> |
| <p>File <a class="el" href="linear_8sql__in.html" title="SQL functions for linear regression. ">linear.sql_in</a>, source file for the SQL functions </p> |
| </div><!-- contents --> |
| </div><!-- doc-content --> |
| <!-- start footer part --> |
| <div id="nav-path" class="navpath"><!-- id is needed for treeview function! --> |
| <ul> |
| <li class="footer">Generated on Thu Jan 9 2014 20:25:06 for MADlib by |
| <a href="http://www.doxygen.org/index.html"> |
| <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.4 </li> |
| </ul> |
| </div> |
| </body> |
| </html> |