blob: db9a0cfaf4d54eed2df26f1dad6ee31ab2ceb4b6 [file] [log] [blame]
<!-- HTML header for doxygen 1.8.4-->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.10"/>
<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
<title>MADlib: rf.sql_in File Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtreedata.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript">
$(document).ready(initResizable);
$(window).load(resizeHeight);
</script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { init_search(); });
</script>
<!-- hack in the navigation tree -->
<script type="text/javascript" src="navtree_hack.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
<!-- google analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-45382226-1', 'madlib.net');
ga('send', 'pageview');
</script>
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectlogo"><a href="http://madlib.net"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
<td style="padding-left: 0.5em;">
<div id="projectname">
<span id="projectnumber">1.8dev</span>
</div>
<div id="projectbrief">User Documentation for MADlib</div>
</td>
<td> <div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.10 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
</div><!-- top -->
<div id="side-nav" class="ui-resizable side-nav-resizable">
<div id="nav-tree">
<div id="nav-tree-contents">
<div id="nav-sync" class="sync"></div>
</div>
</div>
<div id="splitbar" style="-moz-user-select:none;"
class="ui-resizable-handle">
</div>
</div>
<script type="text/javascript">
$(document).ready(function(){initNavTree('rf_8sql__in.html','');});
</script>
<div id="doc-content">
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div class="header">
<div class="summary">
<a href="#func-members">Functions</a> </div>
<div class="headertitle">
<div class="title">rf.sql_in File Reference</div> </div>
</div><!--header-->
<div class="contents">
<p>random forest APIs and main control logic written in PL/PGSQL
<a href="#details">More...</a></p>
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
Functions</h2></td></tr>
<tr class="memitem:a3cf718282802b63dc0a0d19b34f6829b"><td class="memItemLeft" align="right" valign="top">rf_train_result&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="rf_8sql__in.html#a3cf718282802b63dc0a0d19b34f6829b">rf_train</a> (text split_criterion, text training_table_name, text result_rf_table_name, int num_trees, int features_per_node, float sampling_percentage, text continuous_feature_names, text feature_col_names, text id_col_name, text class_col_name, text how2handle_missing_value, int max_tree_depth, float node_prune_threshold, float node_split_threshold, int verbosity)</td></tr>
<tr class="memdesc:a3cf718282802b63dc0a0d19b34f6829b"><td class="mdescLeft">&#160;</td><td class="mdescRight">This API is defined for training a random forest. The training function provides a number of parameters that enables more flexible controls on how an RF is generated. It constructs the RF based on a training set stored in a database table, each row of which defines a set of features, an ID, and a labeled class. Features could be either discrete or continuous. All the DTs of the result RF will be kept in a single table. <a href="#a3cf718282802b63dc0a0d19b34f6829b">More...</a><br /></td></tr>
<tr class="separator:a3cf718282802b63dc0a0d19b34f6829b"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a3981c021e89c0c5f40ab436d96848845"><td class="memItemLeft" align="right" valign="top">rf_train_result&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="rf_8sql__in.html#a3981c021e89c0c5f40ab436d96848845">rf_train</a> (text split_criterion, text training_table_name, text result_rf_table_name)</td></tr>
<tr class="memdesc:a3981c021e89c0c5f40ab436d96848845"><td class="mdescLeft">&#160;</td><td class="mdescRight">This API (short form) is defined for training a random forest. For convenience, a short form of the training API with three parameters is also defined. This one needs only the split criterion name, the name of the table where training data is kept, and the name of the table where the trained RF should be kept. All other parameters in the full form will take their default values. <a href="#a3981c021e89c0c5f40ab436d96848845">More...</a><br /></td></tr>
<tr class="separator:a3981c021e89c0c5f40ab436d96848845"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a60b3f093409a14331b7601522ac0ac42"><td class="memItemLeft" align="right" valign="top">set&lt; text &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="rf_8sql__in.html#a60b3f093409a14331b7601522ac0ac42">rf_display</a> (text rf_table_name, int[] tree_id, int max_depth)</td></tr>
<tr class="memdesc:a60b3f093409a14331b7601522ac0ac42"><td class="mdescLeft">&#160;</td><td class="mdescRight">Display the trees in the random forest with human readable format. <a href="#a60b3f093409a14331b7601522ac0ac42">More...</a><br /></td></tr>
<tr class="separator:a60b3f093409a14331b7601522ac0ac42"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a8ff04ad76d40eed5036706b365403376"><td class="memItemLeft" align="right" valign="top">set&lt; text &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="rf_8sql__in.html#a8ff04ad76d40eed5036706b365403376">rf_display</a> (text rf_table_name, int[] tree_id)</td></tr>
<tr class="memdesc:a8ff04ad76d40eed5036706b365403376"><td class="mdescLeft">&#160;</td><td class="mdescRight">Display the trees in the random forest with human readable format. This function displays all the levels of these specified trees. <a href="#a8ff04ad76d40eed5036706b365403376">More...</a><br /></td></tr>
<tr class="separator:a8ff04ad76d40eed5036706b365403376"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:af89e4b67475e2e57039382467fa43747"><td class="memItemLeft" align="right" valign="top">set&lt; text &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="rf_8sql__in.html#af89e4b67475e2e57039382467fa43747">rf_display</a> (text rf_table_name)</td></tr>
<tr class="memdesc:af89e4b67475e2e57039382467fa43747"><td class="mdescLeft">&#160;</td><td class="mdescRight">Display the trees in the random forest with human readable format. This function displays all the levels of all trees in RF. <a href="#af89e4b67475e2e57039382467fa43747">More...</a><br /></td></tr>
<tr class="separator:af89e4b67475e2e57039382467fa43747"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a116584e8a5194a4277e964900ee5383d"><td class="memItemLeft" align="right" valign="top">rf_classify_result&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="rf_8sql__in.html#a116584e8a5194a4277e964900ee5383d">rf_classify</a> (text rf_table_name, text classification_table_name, text result_table_name, boolean is_serial_classification, int verbosity)</td></tr>
<tr class="memdesc:a116584e8a5194a4277e964900ee5383d"><td class="mdescLeft">&#160;</td><td class="mdescRight">Classify dataset using a trained RF. <a href="#a116584e8a5194a4277e964900ee5383d">More...</a><br /></td></tr>
<tr class="separator:a116584e8a5194a4277e964900ee5383d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a19941b576b2d8ec04d3ffa7e766ba5a3"><td class="memItemLeft" align="right" valign="top">rf_classify_result&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="rf_8sql__in.html#a19941b576b2d8ec04d3ffa7e766ba5a3">rf_classify</a> (text rf_table_name, text classification_table_name, text result_table_name, int verbosity)</td></tr>
<tr class="memdesc:a19941b576b2d8ec04d3ffa7e766ba5a3"><td class="mdescLeft">&#160;</td><td class="mdescRight">Classify dataset using a trained RF. This function does the same thing as the full version defined as above except that it will only use parallel classification. <a href="#a19941b576b2d8ec04d3ffa7e766ba5a3">More...</a><br /></td></tr>
<tr class="separator:a19941b576b2d8ec04d3ffa7e766ba5a3"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a57cd1d51be539e0da4fff351f8b477fe"><td class="memItemLeft" align="right" valign="top">rf_classify_result&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="rf_8sql__in.html#a57cd1d51be539e0da4fff351f8b477fe">rf_classify</a> (text rf_table_name, text classification_table_name, text result_table_name)</td></tr>
<tr class="memdesc:a57cd1d51be539e0da4fff351f8b477fe"><td class="mdescLeft">&#160;</td><td class="mdescRight">Classify dataset using a trained RF. This function does the same thing as the full version defined as above except that it will only use parallel classification and run in quiet mode. <a href="#a57cd1d51be539e0da4fff351f8b477fe">More...</a><br /></td></tr>
<tr class="separator:a57cd1d51be539e0da4fff351f8b477fe"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ac6745c3d4cae3443b217f3ba66d11ac4"><td class="memItemLeft" align="right" valign="top">float8&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="rf_8sql__in.html#ac6745c3d4cae3443b217f3ba66d11ac4">rf_score</a> (text rf_table_name, text scoring_table_name, int verbosity)</td></tr>
<tr class="memdesc:ac6745c3d4cae3443b217f3ba66d11ac4"><td class="mdescLeft">&#160;</td><td class="mdescRight">Check the accuracy of a trained RF with a scoring set. <a href="#ac6745c3d4cae3443b217f3ba66d11ac4">More...</a><br /></td></tr>
<tr class="separator:ac6745c3d4cae3443b217f3ba66d11ac4"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a9fd5da138e06924e89541ce4035ce8e1"><td class="memItemLeft" align="right" valign="top">float8&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="rf_8sql__in.html#a9fd5da138e06924e89541ce4035ce8e1">rf_score</a> (text rf_table_name, text scoring_table_name)</td></tr>
<tr class="memdesc:a9fd5da138e06924e89541ce4035ce8e1"><td class="mdescLeft">&#160;</td><td class="mdescRight">Check the accuracy of a trained RF with a scoring set in quiet mode. <a href="#a9fd5da138e06924e89541ce4035ce8e1">More...</a><br /></td></tr>
<tr class="separator:a9fd5da138e06924e89541ce4035ce8e1"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:af33b77b75df225ee65a8acf18705256e"><td class="memItemLeft" align="right" valign="top">boolean&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="rf_8sql__in.html#af33b77b75df225ee65a8acf18705256e">rf_clean</a> (text rf_table_name)</td></tr>
<tr class="memdesc:af33b77b75df225ee65a8acf18705256e"><td class="mdescLeft">&#160;</td><td class="mdescRight">Cleanup the trained random forest table and any relevant tables. <a href="#af33b77b75df225ee65a8acf18705256e">More...</a><br /></td></tr>
<tr class="separator:af33b77b75df225ee65a8acf18705256e"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table>
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
<div class="textblock"><dl class="section date"><dt>Date</dt><dd>April 5, 2012 </dd></dl>
</div><h2 class="groupheader">Function Documentation</h2>
<a class="anchor" id="a116584e8a5194a4277e964900ee5383d"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">rf_classify_result rf_classify </td>
<td>(</td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>rf_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>classification_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>result_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">boolean&#160;</td>
<td class="paramname"><em>is_serial_classification</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>verbosity</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>The classification result will be stored in the table which is defined as:</p>
<p>CREATE TABLE classification_result ( id INT|BIGINT, class SUPPORTED_DATA_TYPE, prob FLOAT );</p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rf_table_name</td><td>The name of RF table. It can't be NULL. </td></tr>
<tr><td class="paramname">classification_table_name</td><td>The name of the table/view that keeps the data to be classified. It can't be NULL and must exist. </td></tr>
<tr><td class="paramname">result_table_name</td><td>The name of result table. It can't be NULL and must exist. </td></tr>
<tr><td class="paramname">is_serial_classification</td><td>Whether classify with all trees at a time or one by one. It can't be NULL. </td></tr>
<tr><td class="paramname">verbosity</td><td>&gt; 0 means this function runs in verbose mode. It can't be NULL.</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>A rf_classify_result object. </dd></dl>
</div>
</div>
<a class="anchor" id="a19941b576b2d8ec04d3ffa7e766ba5a3"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">rf_classify_result rf_classify </td>
<td>(</td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>rf_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>classification_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>result_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>verbosity</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rf_table_name</td><td>The name of RF table. It can't be NULL. </td></tr>
<tr><td class="paramname">classification_table_name</td><td>The name of the table/view that keeps the data to be classified. It can't be NULL and must exist. </td></tr>
<tr><td class="paramname">result_table_name</td><td>The name of result table. It can't be NULL and must exist. </td></tr>
<tr><td class="paramname">verbosity</td><td>&gt; 0 means this function runs in verbose mode. It can't be NULL.</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>A rf_classify_result object. </dd></dl>
</div>
</div>
<a class="anchor" id="a57cd1d51be539e0da4fff351f8b477fe"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">rf_classify_result rf_classify </td>
<td>(</td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>rf_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>classification_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>result_table_name</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rf_table_name</td><td>The name of RF table. It can't be NULL. </td></tr>
<tr><td class="paramname">classification_table_name</td><td>The name of the table/view that keeps the data to be classified. It can't be NULL and must exist. </td></tr>
<tr><td class="paramname">result_table_name</td><td>The name of result table. It can't be NULL and must exist.</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>A rf_classify_result object. </dd></dl>
</div>
</div>
<a class="anchor" id="af33b77b75df225ee65a8acf18705256e"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">boolean rf_clean </td>
<td>(</td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>rf_table_name</em></td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rf_table_name</td><td>The name of RF table. It can't be NULL.</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>The status of that cleanup operation. </dd></dl>
</div>
</div>
<a class="anchor" id="a60b3f093409a14331b7601522ac0ac42"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">set&lt;text&gt; rf_display </td>
<td>(</td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>rf_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int[]&#160;</td>
<td class="paramname"><em>tree_id</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>max_depth</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rf_table_name</td><td>The name of RF table. It can't be NULL and must exist. </td></tr>
<tr><td class="paramname">tree_id</td><td>The trees to be displayed. If it's NULL, we display all the trees. </td></tr>
<tr><td class="paramname">max_depth</td><td>The max depth to be displayed. If It's NULL, this function will show all levels.</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>The text representing the trees in random forest with human readable format. </dd></dl>
</div>
</div>
<a class="anchor" id="a8ff04ad76d40eed5036706b365403376"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">set&lt;text&gt; rf_display </td>
<td>(</td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>rf_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int[]&#160;</td>
<td class="paramname"><em>tree_id</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rf_table_name</td><td>The name of RF table. It can't be NULL and must exist. </td></tr>
<tr><td class="paramname">tree_id</td><td>The trees to be displayed. If it's NULL, we display all the trees.</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>The text representing the trees in random forest with human readable format. </dd></dl>
</div>
</div>
<a class="anchor" id="af89e4b67475e2e57039382467fa43747"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">set&lt;text&gt; rf_display </td>
<td>(</td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>rf_table_name</em></td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rf_table_name</td><td>The name of RF table. It can't be NULL and must exist.</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>The text representing the trees in random forest with human readable format. </dd></dl>
</div>
</div>
<a class="anchor" id="ac6745c3d4cae3443b217f3ba66d11ac4"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">float8 rf_score </td>
<td>(</td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>rf_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>scoring_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>verbosity</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rf_table_name</td><td>The name of RF table. It can't be NULL. </td></tr>
<tr><td class="paramname">scoring_table_name</td><td>The name of the table/view that keeps the data to be scored. It can't be NULL and must exist. </td></tr>
<tr><td class="paramname">verbosity</td><td>&gt; 0 means this function runs in verbose mode. It can't be NULL.</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>The estimated accuracy information. </dd></dl>
</div>
</div>
<a class="anchor" id="a9fd5da138e06924e89541ce4035ce8e1"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">float8 rf_score </td>
<td>(</td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>rf_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>scoring_table_name</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rf_table_name</td><td>The name of RF table. It can't be NULL. </td></tr>
<tr><td class="paramname">scoring_table_name</td><td>The name of the table/view that keeps the data to be scored. It can't be NULL and must exist.</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>The estimated accuracy information. </dd></dl>
</div>
</div>
<a class="anchor" id="a3cf718282802b63dc0a0d19b34f6829b"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">rf_train_result rf_train </td>
<td>(</td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>split_criterion</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>training_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>result_rf_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>num_trees</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>features_per_node</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float&#160;</td>
<td class="paramname"><em>sampling_percentage</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>continuous_feature_names</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>feature_col_names</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>id_col_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>class_col_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>how2handle_missing_value</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>max_tree_depth</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float&#160;</td>
<td class="paramname"><em>node_prune_threshold</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float&#160;</td>
<td class="paramname"><em>node_split_threshold</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>verbosity</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>We discretize continuous features on local regions during training rather than discretizing on the whole dataset prior to training because local discretization takes into account the context sensitivity.</p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">split_criterion</td><td>The name of the split criterion that should be used for tree construction. The valid values are ‘infogain’, ‘gainratio’, and ‘gini’. It can't be NULL. Information gain(infogain) and gini index(gini) are biased toward multivalued attributes. Gain ratio(gainratio) adjusts for this bias. However, it tends to prefer unbalanced splits in which one partition is much smaller than the others. </td></tr>
<tr><td class="paramname">training_table_name</td><td>The name of the table/view with the training data. It can't be NULL and must exist. </td></tr>
<tr><td class="paramname">result_rf_table_name</td><td>The name of the table where the resulting trees will be stored. It can't be NULL and must not exist. </td></tr>
<tr><td class="paramname">num_trees</td><td>The number of trees to be trained. If it's NULL, 10 will be used. </td></tr>
<tr><td class="paramname">features_per_node</td><td>The number of features to be considered when finding a best split. If it's NULL, sqrt(p), where p is the number of features, will be used. </td></tr>
<tr><td class="paramname">sampling_percentage</td><td>The percentage of records sampled to train a tree. If it's NULL, 0.632 bootstrap will be used </td></tr>
<tr><td class="paramname">continuous_feature_names</td><td>A comma-separated list of the names of the features whose values are continuous. NULL means there are no continuous features. </td></tr>
<tr><td class="paramname">feature_col_names</td><td>A comma-separated list of names of the table columns, each of which defines a feature. NULL means all the columns except the ID and Class columns will be treated as features. </td></tr>
<tr><td class="paramname">id_col_name</td><td>The name of the column containing id of each record. It can't be NULL. </td></tr>
<tr><td class="paramname">class_col_name</td><td>The name of the column containing correct class of each record. It can't be NULL. </td></tr>
<tr><td class="paramname">how2handle_missing_value</td><td>The way to handle missing value. The valid values are 'explicit' and 'ignore'. It can't be NULL. </td></tr>
<tr><td class="paramname">max_tree_depth</td><td>The maximum tree depth. It can't be NULL. </td></tr>
<tr><td class="paramname">node_prune_threshold</td><td>The minimum percentage of the number of records required in a child node. It can't be NULL. The range of it is in [0.0, 1.0]. This threshold only applies to the non-root nodes. Therefore, if the percentage(p) between the sampled training set size of a tree (the number of rows) and the total training set size is less than or equal to the value of this parameter, then the tree only has one node (the root node); if its value is 1, then the percentage p is less than or equal to 1 definitely. Therefore, the tree only has one node (the root node). if its value is 0, then no nodes will be pruned by this parameter. </td></tr>
<tr><td class="paramname">node_split_threshold</td><td>The minimum percentage of the number of records required in a node in order for a further split to be possible. It can't be NULL. The range of it is in [0.0, 1.0]. If the percentage(p) between the sampled training set size of a tree (the number of rows) and the total training set size is less than the value of this parameter, then the root node will be a leaf one. Therefore, the trained tree only has one node. If the percentage p is equal to the value of this parameter, then the trained tree only has two levels, since only the root node will grow. (the root node); if its value is 0, then trees can grow extensively. </td></tr>
<tr><td class="paramname">verbosity</td><td>&gt; 0 means this function runs in verbose mode. It can't be NULL.</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>An rf_train_result object. </dd></dl>
</div>
</div>
<a class="anchor" id="a3981c021e89c0c5f40ab436d96848845"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">rf_train_result rf_train </td>
<td>(</td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>split_criterion</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>training_table_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">text&#160;</td>
<td class="paramname"><em>result_rf_table_name</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">split_criterion</td><td>The split criterion used for tree construction. The valid values are infogain, gainratio, or gini. It can't be NULL. </td></tr>
<tr><td class="paramname">training_table_name</td><td>The name of the table/view with the training data. It can't be NULL and must exist. </td></tr>
<tr><td class="paramname">result_rf_table_name</td><td>The name of the table where the resulting trees will be stored. It can't be NULL and must not exist.</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>An rf_train_result object. </dd></dl>
</div>
</div>
</div><!-- contents -->
</div><!-- doc-content -->
<!-- start footer part -->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
<ul>
<li class="navelem"><a class="el" href="dir_704eb8350b43e1ca74c0f90ed1ba450e.html">methods</a></li><li class="navelem"><a class="el" href="dir_fbc4f2259ae1d6f6cc75298ebbd15532.html">cart</a></li><li class="navelem"><a class="el" href="dir_42a199e31e82b0c41cf7961a25e597db.html">src</a></li><li class="navelem"><a class="el" href="dir_64bd18b6b0e4b6a9c2cd2ca1d5a05b4c.html">pg_gp</a></li><li class="navelem"><a class="el" href="rf_8sql__in.html">rf.sql_in</a></li>
<li class="footer">Generated on Fri Apr 8 2016 16:09:58 for MADlib by
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.10 </li>
</ul>
</div>
</body>
</html>