blob: bd90c60f02d4138b4fa95bb50ce235c39eeb61fc [file] [log] [blame]
<!-- HTML header for doxygen 1.8.4-->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.4"/>
<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
<title>MADlib: Pearson&#39;s Correlation</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript">
$(document).ready(initResizable);
$(window).load(resizeHeight);
</script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { searchBox.OnSelectItem(0); });
</script>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
jax: ["input/TeX","output/HTML-CSS"],
});
</script><script src="../mathjax/MathJax.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
<!-- google analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-45382226-1', 'auto');
ga('send', 'pageview');
</script>
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectlogo"><a href="http://madlib.incubator.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
<td style="padding-left: 0.5em;">
<div id="projectname">
<span id="projectnumber">1.6</span> <span style="font-size:10pt; font-style:italic"><a href="../latest/./group__grp__correlation.html"> A newer version is available</a></span>
</div>
<div id="projectbrief">User Documentation</div>
</td>
<td> <div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.4 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
</div><!-- top -->
<div id="side-nav" class="ui-resizable side-nav-resizable">
<div id="nav-tree">
<div id="nav-tree-contents">
<div id="nav-sync" class="sync"></div>
</div>
</div>
<div id="splitbar" style="-moz-user-select:none;"
class="ui-resizable-handle">
</div>
</div>
<script type="text/javascript">
$(document).ready(function(){initNavTree('group__grp__correlation.html','');});
</script>
<div id="doc-content">
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Groups</a></div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div class="header">
<div class="headertitle">
<div class="title">Pearson's Correlation<div class="ingroups"><a class="el" href="group__grp__desc__stats.html">Descriptive Statistics</a></div></div> </div>
</div><!--header-->
<div class="contents">
<div class="toc"><b>Contents</b> </p>
<ul>
<li>
<a href="#usage">Correlation Function</a> </li>
<li>
<a href="#examples">Examples</a> </li>
<li>
<a href="#seealso">See Also</a> </li>
</ul>
</div><p>A correlation function is the degree and direction of association of two variables&mdash;how well one random variable can be predicted from the other. The coefficient of correlation varies from -1 to 1. A coefficient of 1 implies perfect correlation, 0 means no correlation, and -1 means perfect anti-correlation.</p>
<p>This function provides a cross-correlation matrix for all pairs of numeric columns in a <em>source_table</em>. A correlation matrix describes correlation among \( M \) variables. It is a square symmetrical \( M \)x \(M \) matrix with the \( (ij) \)th element equal to the correlation coefficient between the \(i\)th and the \(j\)th variable. The diagonal elements (correlations of variables with themselves) are always equal to 1.0.</p>
<p><a class="anchor" id="usage"></a></p>
<dl class="section user"><dt>Correlation Function</dt><dd></dd></dl>
<p>The correlation function has the following syntax: </p>
<pre class="syntax">
correlation( source_table,
output_table,
target_cols
)
</pre> <dl class="arglist">
<dt>source_table </dt>
<dd>TEXT. The name of the data containing the input data. </dd>
<dt>output_table </dt>
<dd>TEXT. The name of the table where the cross-correlation matrix will be saved. The output is a table with N+2 columns and N rows, where N is the number of target columns. It contains the following columns. <table class="output">
<tr>
<th>column_position </th><td>The first column contains the position of the variable in the '<em>source_table</em>'. </td></tr>
<tr>
<th>variable </th><td>The second column contains the row-header for the variables. </td></tr>
<tr>
<th>remaining columns </th><td>The remainder of the table is the NxN correlation matrix for the pairs of numeric '<em>source_table</em>' columns. </td></tr>
</table>
The output table is arranged as a lower-triangular matrix with the upper triangle set to NULL and the diagonal elements set to 1.0. To obtain the result from the '<em>output_table</em>' in this matrix format ensure to order the elements using the '<em>column_position</em>', as shown in the example below. <pre class="example">
SELECT * FROM output_table ORDER BY column_position;
</pre> </dd>
<dt>target_cols (optional) </dt>
<dd>TEXT, default: '*'. A comma-separated list of the columns to correlate. If NULL or <code>'*'</code>, results are produced for all numeric columns. </dd>
</dl>
<p><a class="anchor" id="examples"></a></p>
<dl class="section user"><dt>Examples</dt><dd><ol type="1">
<li>View online help for the correlation function. <pre class="example">
SELECT madlib.correlation();
</pre></li>
<li>Create an input data set. <pre class="example">
DROP TABLE IF EXISTS example_data;
CREATE TABLE example_data(
id SERIAL, outlook TEXT,
temperature FLOAT8, humidity FLOAT8,
windy TEXT, class TEXT);
COPY example_data FROM STDIN WITH DELIMITER '|';
1 | sunny | 85 | 85 | false | Don't Play
2 | sunny | 80 | 90 | true | Don't Play
3 | overcast | 83 | 78 | false | Play
4 | rain | 70 | 96 | false | Play
5 | rain | 68 | 80 | false | Play
6 | rain | 65 | 70 | true | Don't Play
7 | overcast | 64 | 65 | true | Play
8 | sunny | 72 | 95 | false | Don't Play
9 | sunny | 69 | 70 | false | Play
10 | rain | 75 | 80 | false | Play
11 | sunny | 75 | 70 | true | Play
12 | overcast | 72 | 90 | true | Play
13 | overcast | 81 | 75 | false | Play
14 | rain | 71 | 80 | true | Don't Play
15 | | 100 | 100 | true |
16 | | 110 | 100 | true |
\.
</pre></li>
<li>Run the <a class="el" href="correlation_8sql__in.html#a1d17ea5ec13e198d6d310b6aa79173de">correlation()</a> function on the data set. <pre class="example">
-- Correlate all numeric columns
SELECT madlib.correlation( 'example_data',
'example_data_output'
);
-- Setting target_cols to NULL or '*' also correlates all numeric columns
SELECT madlib.correlation( 'example_data',
'example_data_output',
'*'
);
-- Correlate only the temperature and humidity columns
SELECT madlib.correlation( 'example_data',
'example_data_output',
'temperature, humidity'
);
</pre></li>
<li>View the correlation matrix. <pre class="example">
SELECT * FROM example_data_output ORDER BY column_position;
</pre> Result: <pre class="result">
column_position | variable | temperature | humidity
-----------------+-------------+-------------------+----------
1 | temperature | 1 |
2 | humidity | 0.616876934548786 | 1.0
(2 rows)
</pre></li>
</ol>
</dd></dl>
<p><a class="anchor" id="related"></a></p>
<dl class="section user"><dt>Related Topics</dt><dd></dd></dl>
<p>File <a class="el" href="correlation_8sql__in.html" title="SQL functions for correlation computation. ">correlation.sql_in</a> documenting the SQL functions</p>
<p><a class="el" href="group__grp__summary.html">Summary</a> for general descriptive statistics for a table </p>
</div><!-- contents -->
</div><!-- doc-content -->
<!-- start footer part -->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
<ul>
<li class="footer">Generated on Thu Jul 3 2014 17:38:01 for MADlib by
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.4 </li>
</ul>
</div>
</body>
</html>