| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| <html xmlns="http://www.w3.org/1999/xhtml"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> |
| <title>MADlib: dt_preproc.sql_in Source File</title> |
| |
| <link href="tabs.css" rel="stylesheet" type="text/css"/> |
| <link href="doxygen.css" rel="stylesheet" type="text/css" /> |
| <link href="navtree.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="jquery.js"></script> |
| <script type="text/javascript" src="resize.js"></script> |
| <script type="text/javascript" src="navtree.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(initResizable); |
| </script> |
| <link href="search/search.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="search/search.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(function() { searchBox.OnSelectItem(0); }); |
| </script> |
| <script src="../mathjax/MathJax.js"> |
| MathJax.Hub.Config({ |
| extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"], |
| jax: ["input/TeX","output/HTML-CSS"], |
| }); |
| </script> |
| </head> |
| <body> |
| <div id="top"><!-- do not remove this div! --> |
| |
| |
| <div id="titlearea"> |
| <table cellspacing="0" cellpadding="0"> |
| <tbody> |
| <tr style="height: 56px;"> |
| |
| |
| <td style="padding-left: 0.5em;"> |
| <div id="projectname">MADlib |
|  <span id="projectnumber">0.6</span> <span style="font-size:10pt; font-style:italic"><a href="../latest/./dt__preproc_8sql__in_source.html"> A newer version is available</a></span> |
| </div> |
| <div id="projectbrief">User Documentation</div> |
| </td> |
| |
| |
| |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| |
| <!-- Generated by Doxygen 1.7.5.1 --> |
| <script type="text/javascript"> |
| var searchBox = new SearchBox("searchBox", "search",false,'Search'); |
| </script> |
| <script type="text/javascript" src="dynsections.js"></script> |
| <div id="navrow1" class="tabs"> |
| <ul class="tablist"> |
| <li><a href="index.html"><span>Main Page</span></a></li> |
| <li><a href="modules.html"><span>Modules</span></a></li> |
| <li class="current"><a href="files.html"><span>Files</span></a></li> |
| <li> |
| <div id="MSearchBox" class="MSearchBoxInactive"> |
| <span class="left"> |
| <img id="MSearchSelect" src="search/mag_sel.png" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| alt=""/> |
| <input type="text" id="MSearchField" value="Search" accesskey="S" |
| onfocus="searchBox.OnSearchFieldFocus(true)" |
| onblur="searchBox.OnSearchFieldFocus(false)" |
| onkeyup="searchBox.OnSearchFieldChange(event)"/> |
| </span><span class="right"> |
| <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a> |
| </span> |
| </div> |
| </li> |
| </ul> |
| </div> |
| <div id="navrow2" class="tabs2"> |
| <ul class="tablist"> |
| <li><a href="files.html"><span>File List</span></a></li> |
| <li><a href="globals.html"><span>File Members</span></a></li> |
| </ul> |
| </div> |
| </div> |
| <div id="side-nav" class="ui-resizable side-nav-resizable"> |
| <div id="nav-tree"> |
| <div id="nav-tree-contents"> |
| </div> |
| </div> |
| <div id="splitbar" style="-moz-user-select:none;" |
| class="ui-resizable-handle"> |
| </div> |
| </div> |
| <script type="text/javascript"> |
| initNavTree('dt__preproc_8sql__in.html',''); |
| </script> |
| <div id="doc-content"> |
| <div class="header"> |
| <div class="headertitle"> |
| <div class="title">dt_preproc.sql_in</div> </div> |
| </div> |
| <div class="contents"> |
| <a href="dt__preproc_8sql__in.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* ----------------------------------------------------------------------- */</span><span class="comment">/** </span> |
| <a name="l00002"></a>00002 <span class="comment"> *</span> |
| <a name="l00003"></a>00003 <span class="comment"> * @file dt_preproc.sql_in</span> |
| <a name="l00004"></a>00004 <span class="comment"> *</span> |
| <a name="l00005"></a>00005 <span class="comment"> * @brief Functions used in C4.5 and random forest for data preprocessing.</span> |
| <a name="l00006"></a>00006 <span class="comment"> *</span> |
| <a name="l00007"></a>00007 <span class="comment"> * @create April 5, 2012</span> |
| <a name="l00008"></a>00008 <span class="comment"> * @modified July 19, 2012</span> |
| <a name="l00009"></a>00009 <span class="comment"> *</span> |
| <a name="l00010"></a>00010 <span class="comment"> */</span><span class="comment">/* ----------------------------------------------------------------------- */</span> |
| <a name="l00011"></a>00011 |
| <a name="l00012"></a>00012 |
| <a name="l00013"></a>00013 m4_include(`SQLCommon.m4<span class="stringliteral">')</span> |
| <a name="l00014"></a>00014 <span class="stringliteral"></span> |
| <a name="l00015"></a>00015 <span class="stringliteral">/* Own macro definitions */</span> |
| <a name="l00016"></a>00016 <span class="stringliteral">m4_ifelse(</span> |
| <a name="l00017"></a>00017 <span class="stringliteral"> m4_eval(</span> |
| <a name="l00018"></a>00018 <span class="stringliteral"> m4_ifdef(`__GREENPLUM__'</span>, 1, 0) && |
| <a name="l00019"></a>00019 __DBMS_VERSION_MAJOR__ * 100 + __DBMS_VERSION_MINOR__ < 401 |
| <a name="l00020"></a>00020 ), 1, |
| <a name="l00021"></a>00021 `m4_define(`__GREENPLUM_PRE_4_1__<span class="charliteral">')'</span> |
| <a name="l00022"></a>00022 ) |
| <a name="l00023"></a>00023 m4_ifelse( |
| <a name="l00024"></a>00024 m4_eval( |
| <a name="l00025"></a>00025 m4_ifdef(`__POSTGRESQL__<span class="stringliteral">', 1, 0) &&</span> |
| <a name="l00026"></a>00026 <span class="stringliteral"> __DBMS_VERSION_MAJOR__ < 9</span> |
| <a name="l00027"></a>00027 <span class="stringliteral"> ), 1,</span> |
| <a name="l00028"></a>00028 <span class="stringliteral"> `m4_define(`__POSTGRESQL_PRE_9_0__'</span>)<span class="stringliteral">'</span> |
| <a name="l00029"></a>00029 <span class="stringliteral">)</span> |
| <a name="l00030"></a>00030 <span class="stringliteral">m4_ifelse(</span> |
| <a name="l00031"></a>00031 <span class="stringliteral"> m4_eval(</span> |
| <a name="l00032"></a>00032 <span class="stringliteral"> m4_ifdef(`__GREENPLUM__'</span>, 1, 0) && |
| <a name="l00033"></a>00033 __DBMS_VERSION_MAJOR__ * 10000 + |
| <a name="l00034"></a>00034 __DBMS_VERSION_MINOR__ * 100 + |
| <a name="l00035"></a>00035 __DBMS_VERSION_PATCH__ >= 40201 |
| <a name="l00036"></a>00036 ), 1, |
| <a name="l00037"></a>00037 `m4_define(`__GREENPLUM_GE_4_2_1__<span class="charliteral">')'</span> |
| <a name="l00038"></a>00038 ) |
| <a name="l00039"></a>00039 |
| <a name="l00040"></a>00040 <span class="comment">/*</span> |
| <a name="l00041"></a>00041 <span class="comment"> * The file contains the functions to encode a training/classification table for </span> |
| <a name="l00042"></a>00042 <span class="comment"> * C4.5 and random forest (RF). Given a training table, we encode it into 4 tables: </span> |
| <a name="l00043"></a>00043 <span class="comment"> * + A table that contains the distinct values and their assigned IDs for all</span> |
| <a name="l00044"></a>00044 <span class="comment"> * features. We call it the Key-Value(KV) table for features.</span> |
| <a name="l00045"></a>00045 <span class="comment"> * + A table that contains the distinct labels and their assigned IDs for the </span> |
| <a name="l00046"></a>00046 <span class="comment"> * class column. We call it the KV table for class.</span> |
| <a name="l00047"></a>00047 <span class="comment"> * + A table that contains metadata descriptions about the columns of the training</span> |
| <a name="l00048"></a>00048 <span class="comment"> * table. We call it the metatable.</span> |
| <a name="l00049"></a>00049 <span class="comment"> * + A table that contains an encoded version of the training table using the </span> |
| <a name="l00050"></a>00050 <span class="comment"> * KV tables. We call it the encoded table.</span> |
| <a name="l00051"></a>00051 <span class="comment"> *</span> |
| <a name="l00052"></a>00052 <span class="comment"> * For a classification table, we only need the first three tables. We will use</span> |
| <a name="l00053"></a>00053 <span class="comment"> * Golf dataset as an example to illustrate the generated tables:</span> |
| <a name="l00054"></a>00054 <span class="comment"> *</span> |
| <a name="l00055"></a>00055 <span class="comment"> * testdb=# select * from golf order by id;</span> |
| <a name="l00056"></a>00056 <span class="comment"> * id | outlook | temperature | humidity | windy | class </span> |
| <a name="l00057"></a>00057 <span class="comment"> * ----+----------+-------------+----------+--------+--------------</span> |
| <a name="l00058"></a>00058 <span class="comment"> * 1 | sunny | 85 | 85 | false | Do not Play</span> |
| <a name="l00059"></a>00059 <span class="comment"> * 2 | sunny | 80 | 90 | true | Do not Play</span> |
| <a name="l00060"></a>00060 <span class="comment"> * 3 | overcast | 83 | 78 | false | Play</span> |
| <a name="l00061"></a>00061 <span class="comment"> * 4 | rain | 70 | 96 | false | Play</span> |
| <a name="l00062"></a>00062 <span class="comment"> * 5 | rain | 68 | 80 | false | Play</span> |
| <a name="l00063"></a>00063 <span class="comment"> * 6 | rain | 65 | 70 | true | Do not Play</span> |
| <a name="l00064"></a>00064 <span class="comment"> * 7 | overcast | 64 | 65 | true | Play</span> |
| <a name="l00065"></a>00065 <span class="comment"> * 8 | sunny | 72 | 95 | false | Do not Play</span> |
| <a name="l00066"></a>00066 <span class="comment"> * 9 | sunny | 69 | 70 | false | Play</span> |
| <a name="l00067"></a>00067 <span class="comment"> * 10 | rain | 75 | 80 | false | Play</span> |
| <a name="l00068"></a>00068 <span class="comment"> * 11 | sunny | 75 | 70 | true | Play</span> |
| <a name="l00069"></a>00069 <span class="comment"> * 12 | overcast | 72 | 90 | true | Play</span> |
| <a name="l00070"></a>00070 <span class="comment"> * 13 | overcast | 81 | 75 | false | Play</span> |
| <a name="l00071"></a>00071 <span class="comment"> * 14 | rain | 71 | 80 | true | Do not Play</span> |
| <a name="l00072"></a>00072 <span class="comment"> * (14 rows)</span> |
| <a name="l00073"></a>00073 <span class="comment"> *</span> |
| <a name="l00074"></a>00074 <span class="comment"> *</span> |
| <a name="l00075"></a>00075 <span class="comment"> * The metatable contains the information of the columns in the training table.</span> |
| <a name="l00076"></a>00076 <span class="comment"> * For each column, it has a record whose structure is defined as:</span> |
| <a name="l00077"></a>00077 <span class="comment"> *</span> |
| <a name="l00078"></a>00078 <span class="comment"> * +id The ID assigned to a feature/class/id column. For the class </span> |
| <a name="l00079"></a>00079 <span class="comment"> * colum,it's 0. To be determistic, the IDs for feature columns</span> |
| <a name="l00080"></a>00080 <span class="comment"> * starts at 1 and are assigned according to the alphabet order </span> |
| <a name="l00081"></a>00081 <span class="comment"> * of the column names. The ID for the id column is the largest</span> |
| <a name="l00082"></a>00082 <span class="comment"> * feature ID plus one. </span> |
| <a name="l00083"></a>00083 <span class="comment"> * +column_name The name of the class/feature/id column.</span> |
| <a name="l00084"></a>00084 <span class="comment"> * +column_type 'c' means the column is a class. </span> |
| <a name="l00085"></a>00085 <span class="comment"> * 'f' means it's a feature column.</span> |
| <a name="l00086"></a>00086 <span class="comment"> * 'i' means it's an id column.</span> |
| <a name="l00087"></a>00087 <span class="comment"> * +is_cont 't' means the feature is continuous.</span> |
| <a name="l00088"></a>00088 <span class="comment"> * 'f' means it's discrete.</span> |
| <a name="l00089"></a>00089 <span class="comment"> * +table_oid The OID of the KV table for features/class. </span> |
| <a name="l00090"></a>00090 <span class="comment"> * For the id column, there is no KV table.</span> |
| <a name="l00091"></a>00091 <span class="comment"> * +num_dist_value The number of distinct values for a feature/class column. </span> |
| <a name="l00092"></a>00092 <span class="comment"> *</span> |
| <a name="l00093"></a>00093 <span class="comment"> * The metatable for the Golf dataset looks like this:</span> |
| <a name="l00094"></a>00094 <span class="comment"> * testdb=# select * from golf_meta order by id;</span> |
| <a name="l00095"></a>00095 <span class="comment"> * id | column_name | column_type | is_cont | table_oid | num_dist_value </span> |
| <a name="l00096"></a>00096 <span class="comment"> * ----+-------------+-------------+---------+-----------+----------------</span> |
| <a name="l00097"></a>00097 <span class="comment"> * 0 | class | c | f | 787672 | 2</span> |
| <a name="l00098"></a>00098 <span class="comment"> * 1 | humidity | f | t | 787749 | 9</span> |
| <a name="l00099"></a>00099 <span class="comment"> * 2 | outlook | f | f | 787749 | 3</span> |
| <a name="l00100"></a>00100 <span class="comment"> * 3 | temperature | f | t | 787749 | 12</span> |
| <a name="l00101"></a>00101 <span class="comment"> * 4 | windy | f | f | 787749 | 2</span> |
| <a name="l00102"></a>00102 <span class="comment"> * 5 | id | i | f | | </span> |
| <a name="l00103"></a>00103 <span class="comment"> * (6 rows)</span> |
| <a name="l00104"></a>00104 <span class="comment"> * </span> |
| <a name="l00105"></a>00105 <span class="comment"> * The KV table for features contains a record for each distinct value. The record</span> |
| <a name="l00106"></a>00106 <span class="comment"> * structure is:</span> |
| <a name="l00107"></a>00107 <span class="comment"> * +fid The ID assigned to a feature.</span> |
| <a name="l00108"></a>00108 <span class="comment"> * +fval For a discrete feature, it's the distinct value. </span> |
| <a name="l00109"></a>00109 <span class="comment"> * For a continuous feature, it's NULL.</span> |
| <a name="l00110"></a>00110 <span class="comment"> * +code For a discrete feature, it's the assigned key. </span> |
| <a name="l00111"></a>00111 <span class="comment"> * For a continuous feature, it's the average value.</span> |
| <a name="l00112"></a>00112 <span class="comment"> *</span> |
| <a name="l00113"></a>00113 <span class="comment"> * testdb=# select * from golf_kv_features order by fid, code;</span> |
| <a name="l00114"></a>00114 <span class="comment"> * fid | fval | code </span> |
| <a name="l00115"></a>00115 <span class="comment"> * -----+----------+------------------</span> |
| <a name="l00116"></a>00116 <span class="comment"> * 1 | | 80.2857142857143</span> |
| <a name="l00117"></a>00117 <span class="comment"> * 2 | overcast | 1</span> |
| <a name="l00118"></a>00118 <span class="comment"> * 2 | rain | 2</span> |
| <a name="l00119"></a>00119 <span class="comment"> * 2 | sunny | 3</span> |
| <a name="l00120"></a>00120 <span class="comment"> * 3 | | 73.5714285714286</span> |
| <a name="l00121"></a>00121 <span class="comment"> * 4 | false | 1</span> |
| <a name="l00122"></a>00122 <span class="comment"> * 4 | true | 2</span> |
| <a name="l00123"></a>00123 <span class="comment"> * (7 rows)</span> |
| <a name="l00124"></a>00124 <span class="comment"> *</span> |
| <a name="l00125"></a>00125 <span class="comment"> * The KV table for class labels contains a record for each label. The record</span> |
| <a name="l00126"></a>00126 <span class="comment"> * structure is the same as the KV table for features.</span> |
| <a name="l00127"></a>00127 <span class="comment"> * testdb=# select * from golf_kv_class order by fid, code;</span> |
| <a name="l00128"></a>00128 <span class="comment"> * fid | fval | code </span> |
| <a name="l00129"></a>00129 <span class="comment"> * -----+--------------+------</span> |
| <a name="l00130"></a>00130 <span class="comment"> * 0 | Do not Play | 1</span> |
| <a name="l00131"></a>00131 <span class="comment"> * 0 | Play | 2</span> |
| <a name="l00132"></a>00132 <span class="comment"> * </span> |
| <a name="l00133"></a>00133 <span class="comment"> * The encoded table has a record for each cell in the training table. The record</span> |
| <a name="l00134"></a>00134 <span class="comment"> * structure is:</span> |
| <a name="l00135"></a>00135 <span class="comment"> * +id The ID from the training table. </span> |
| <a name="l00136"></a>00136 <span class="comment"> * +fid The ID assigned to a feature</span> |
| <a name="l00137"></a>00137 <span class="comment"> * +fval For a discrete feature, it's the key. </span> |
| <a name="l00138"></a>00138 <span class="comment"> * For a continuous feature, it's the original feature value.</span> |
| <a name="l00139"></a>00139 <span class="comment"> * +is_cont 't' if the feature is continuous, or 'f' for the discrete one.</span> |
| <a name="l00140"></a>00140 <span class="comment"> * +class The encoded value of the class label.</span> |
| <a name="l00141"></a>00141 <span class="comment"> * </span> |
| <a name="l00142"></a>00142 <span class="comment"> * For Golf dataset, the vertical encoded table looks like this:</span> |
| <a name="l00143"></a>00143 <span class="comment"> * testdb=# select * from golf_ed order by fid, id;</span> |
| <a name="l00144"></a>00144 <span class="comment"> * id | fid | fval | is_cont | class </span> |
| <a name="l00145"></a>00145 <span class="comment"> * ----+-----+------+---------+-------</span> |
| <a name="l00146"></a>00146 <span class="comment"> * 1 | 1 | 85 | t | 1</span> |
| <a name="l00147"></a>00147 <span class="comment"> * 2 | 1 | 90 | t | 1</span> |
| <a name="l00148"></a>00148 <span class="comment"> * 3 | 1 | 78 | t | 2</span> |
| <a name="l00149"></a>00149 <span class="comment"> * 4 | 1 | 96 | t | 2</span> |
| <a name="l00150"></a>00150 <span class="comment"> * 5 | 1 | 80 | t | 2</span> |
| <a name="l00151"></a>00151 <span class="comment"> * 6 | 1 | 70 | t | 1</span> |
| <a name="l00152"></a>00152 <span class="comment"> * 7 | 1 | 65 | t | 2</span> |
| <a name="l00153"></a>00153 <span class="comment"> * 8 | 1 | 95 | t | 1</span> |
| <a name="l00154"></a>00154 <span class="comment"> * 9 | 1 | 70 | t | 2</span> |
| <a name="l00155"></a>00155 <span class="comment"> * 10 | 1 | 80 | t | 2</span> |
| <a name="l00156"></a>00156 <span class="comment"> * 11 | 1 | 70 | t | 2</span> |
| <a name="l00157"></a>00157 <span class="comment"> * 12 | 1 | 90 | t | 2</span> |
| <a name="l00158"></a>00158 <span class="comment"> * 13 | 1 | 75 | t | 2</span> |
| <a name="l00159"></a>00159 <span class="comment"> * 14 | 1 | 80 | t | 1</span> |
| <a name="l00160"></a>00160 <span class="comment"> * 1 | 2 | 3 | f | 1</span> |
| <a name="l00161"></a>00161 <span class="comment"> * 2 | 2 | 3 | f | 1</span> |
| <a name="l00162"></a>00162 <span class="comment"> * 3 | 2 | 1 | f | 2</span> |
| <a name="l00163"></a>00163 <span class="comment"> * 4 | 2 | 2 | f | 2</span> |
| <a name="l00164"></a>00164 <span class="comment"> * 5 | 2 | 2 | f | 2</span> |
| <a name="l00165"></a>00165 <span class="comment"> * 6 | 2 | 2 | f | 1</span> |
| <a name="l00166"></a>00166 <span class="comment"> * 7 | 2 | 1 | f | 2</span> |
| <a name="l00167"></a>00167 <span class="comment"> * 8 | 2 | 3 | f | 1</span> |
| <a name="l00168"></a>00168 <span class="comment"> * 9 | 2 | 3 | f | 2</span> |
| <a name="l00169"></a>00169 <span class="comment"> * 10 | 2 | 2 | f | 2</span> |
| <a name="l00170"></a>00170 <span class="comment"> * 11 | 2 | 3 | f | 2</span> |
| <a name="l00171"></a>00171 <span class="comment"> * 12 | 2 | 1 | f | 2</span> |
| <a name="l00172"></a>00172 <span class="comment"> * 13 | 2 | 1 | f | 2</span> |
| <a name="l00173"></a>00173 <span class="comment"> * 14 | 2 | 2 | f | 1</span> |
| <a name="l00174"></a>00174 <span class="comment"> * 1 | 3 | 85 | t | 1</span> |
| <a name="l00175"></a>00175 <span class="comment"> * 2 | 3 | 80 | t | 1</span> |
| <a name="l00176"></a>00176 <span class="comment"> * 3 | 3 | 83 | t | 2</span> |
| <a name="l00177"></a>00177 <span class="comment"> * 4 | 3 | 70 | t | 2</span> |
| <a name="l00178"></a>00178 <span class="comment"> * 5 | 3 | 68 | t | 2</span> |
| <a name="l00179"></a>00179 <span class="comment"> * 6 | 3 | 65 | t | 1</span> |
| <a name="l00180"></a>00180 <span class="comment"> * 7 | 3 | 64 | t | 2</span> |
| <a name="l00181"></a>00181 <span class="comment"> * 8 | 3 | 72 | t | 1</span> |
| <a name="l00182"></a>00182 <span class="comment"> * 9 | 3 | 69 | t | 2</span> |
| <a name="l00183"></a>00183 <span class="comment"> * 10 | 3 | 75 | t | 2</span> |
| <a name="l00184"></a>00184 <span class="comment"> * 11 | 3 | 75 | t | 2</span> |
| <a name="l00185"></a>00185 <span class="comment"> * 12 | 3 | 72 | t | 2</span> |
| <a name="l00186"></a>00186 <span class="comment"> * 13 | 3 | 81 | t | 2</span> |
| <a name="l00187"></a>00187 <span class="comment"> * 14 | 3 | 71 | t | 1</span> |
| <a name="l00188"></a>00188 <span class="comment"> * 1 | 4 | 1 | f | 1</span> |
| <a name="l00189"></a>00189 <span class="comment"> * 2 | 4 | 2 | f | 1</span> |
| <a name="l00190"></a>00190 <span class="comment"> * 3 | 4 | 1 | f | 2</span> |
| <a name="l00191"></a>00191 <span class="comment"> * 4 | 4 | 1 | f | 2</span> |
| <a name="l00192"></a>00192 <span class="comment"> * 5 | 4 | 1 | f | 2</span> |
| <a name="l00193"></a>00193 <span class="comment"> * 6 | 4 | 2 | f | 1</span> |
| <a name="l00194"></a>00194 <span class="comment"> * 7 | 4 | 2 | f | 2</span> |
| <a name="l00195"></a>00195 <span class="comment"> * 8 | 4 | 1 | f | 1</span> |
| <a name="l00196"></a>00196 <span class="comment"> * 9 | 4 | 1 | f | 2</span> |
| <a name="l00197"></a>00197 <span class="comment"> * 10 | 4 | 1 | f | 2</span> |
| <a name="l00198"></a>00198 <span class="comment"> * 11 | 4 | 2 | f | 2</span> |
| <a name="l00199"></a>00199 <span class="comment"> * 12 | 4 | 2 | f | 2</span> |
| <a name="l00200"></a>00200 <span class="comment"> * 13 | 4 | 1 | f | 2</span> |
| <a name="l00201"></a>00201 <span class="comment"> * 14 | 4 | 2 | f | 1</span> |
| <a name="l00202"></a>00202 <span class="comment"> * (56 rows)</span> |
| <a name="l00203"></a>00203 <span class="comment"> *</span> |
| <a name="l00204"></a>00204 <span class="comment"> * On databases that support compression, we can leverage that feature</span> |
| <a name="l00205"></a>00205 <span class="comment"> * to reduce the space required for keeping the encoded table.</span> |
| <a name="l00206"></a>00206 <span class="comment"> *</span> |
| <a name="l00207"></a>00207 <span class="comment"> * For classification, we will use the metatable and KV tables to encode </span> |
| <a name="l00208"></a>00208 <span class="comment"> * the table (horizontal table) to be classified into some like this: </span> |
| <a name="l00209"></a>00209 <span class="comment"> *</span> |
| <a name="l00210"></a>00210 <span class="comment"> * testdb# select * from golf_ed order by id;</span> |
| <a name="l00211"></a>00211 <span class="comment"> * id | fvals | class </span> |
| <a name="l00212"></a>00212 <span class="comment"> * ----+-------------+-------</span> |
| <a name="l00213"></a>00213 <span class="comment"> * 1 | {85,3,85,1} | 1</span> |
| <a name="l00214"></a>00214 <span class="comment"> * 2 | {90,3,80,2} | 1</span> |
| <a name="l00215"></a>00215 <span class="comment"> * 3 | {78,1,83,1} | 2</span> |
| <a name="l00216"></a>00216 <span class="comment"> * 4 | {96,2,70,1} | 2</span> |
| <a name="l00217"></a>00217 <span class="comment"> * 5 | {80,2,68,1} | 2</span> |
| <a name="l00218"></a>00218 <span class="comment"> * 6 | {70,2,65,2} | 1</span> |
| <a name="l00219"></a>00219 <span class="comment"> * 7 | {65,1,64,2} | 2</span> |
| <a name="l00220"></a>00220 <span class="comment"> * 8 | {95,3,72,1} | 1</span> |
| <a name="l00221"></a>00221 <span class="comment"> * 9 | {70,3,69,1} | 2</span> |
| <a name="l00222"></a>00222 <span class="comment"> * 10 | {80,2,75,1} | 2</span> |
| <a name="l00223"></a>00223 <span class="comment"> * 11 | {70,3,75,2} | 2</span> |
| <a name="l00224"></a>00224 <span class="comment"> * 12 | {90,1,72,2} | 2</span> |
| <a name="l00225"></a>00225 <span class="comment"> * 13 | {75,1,81,1} | 2</span> |
| <a name="l00226"></a>00226 <span class="comment"> * 14 | {80,2,71,2} | 1</span> |
| <a name="l00227"></a>00227 <span class="comment"> * (14 rows)</span> |
| <a name="l00228"></a>00228 <span class="comment"> *</span> |
| <a name="l00229"></a>00229 <span class="comment"> * In general, each record in the new encoded table has the following structure:</span> |
| <a name="l00230"></a>00230 <span class="comment"> * +id The ID from the classification table.</span> |
| <a name="l00231"></a>00231 <span class="comment"> * +fvals An array contains all the features' values for a given ID.</span> |
| <a name="l00232"></a>00232 <span class="comment"> * For a discrete feature, the element in the array is the key.</span> |
| <a name="l00233"></a>00233 <span class="comment"> * For a continuous feature, it's the original value.</span> |
| <a name="l00234"></a>00234 <span class="comment"> * +class The encoded value of a class label.</span> |
| <a name="l00235"></a>00235 <span class="comment"> *</span> |
| <a name="l00236"></a>00236 <span class="comment"> */</span> |
| <a name="l00237"></a>00237 |
| <a name="l00238"></a>00238 |
| <a name="l00239"></a>00239 <span class="comment">/*</span> |
| <a name="l00240"></a>00240 <span class="comment"> * The UDT for keeping the time for each step of the encoding procedure.</span> |
| <a name="l00241"></a>00241 <span class="comment"> *</span> |
| <a name="l00242"></a>00242 <span class="comment"> * pre_proc_time The time of pre-processing.</span> |
| <a name="l00243"></a>00243 <span class="comment"> * breakup_tbl_time The time of breaking up the training table.</span> |
| <a name="l00244"></a>00244 <span class="comment"> * gen_kv_time The time of generating KV-table for </span> |
| <a name="l00245"></a>00245 <span class="comment"> * features/class. </span> |
| <a name="l00246"></a>00246 <span class="comment"> * gen_enc_time The time of generating encoded table.</span> |
| <a name="l00247"></a>00247 <span class="comment"> * post_proc_time The time of post-processing.</span> |
| <a name="l00248"></a>00248 <span class="comment"> *</span> |
| <a name="l00249"></a>00249 <span class="comment"> */</span> |
| <a name="l00250"></a>00250 DROP TYPE IF EXISTS MADLIB_SCHEMA.__enc_tbl_result; |
| <a name="l00251"></a>00251 CREATE TYPE MADLIB_SCHEMA.__enc_tbl_result AS |
| <a name="l00252"></a>00252 ( |
| <a name="l00253"></a>00253 pre_proc_time INTERVAL, |
| <a name="l00254"></a>00254 breakup_tbl_time INTERVAL, |
| <a name="l00255"></a>00255 gen_kv_time INTERVAL, |
| <a name="l00256"></a>00256 gen_enc_time INTERVAL, |
| <a name="l00257"></a>00257 post_proc_time INTERVAL |
| <a name="l00258"></a>00258 ); |
| <a name="l00259"></a>00259 |
| <a name="l00260"></a>00260 |
| <a name="l00261"></a>00261 <span class="comment">/*</span> |
| <a name="l00262"></a>00262 <span class="comment"> * @brief Check if the input table has unsupported data type or not.</span> |
| <a name="l00263"></a>00263 <span class="comment"> * Check if the id column of input table has duplicated value or not.</span> |
| <a name="l00264"></a>00264 <span class="comment"> *</span> |
| <a name="l00265"></a>00265 <span class="comment"> * @param full_table_name The full table name.</span> |
| <a name="l00266"></a>00266 <span class="comment"> * @param feature_columns The array including all feature names.</span> |
| <a name="l00267"></a>00267 <span class="comment"> * @param id_column The name of the ID column. </span> |
| <a name="l00268"></a>00268 <span class="comment"> * @param class_column The name of the class column. </span> |
| <a name="l00269"></a>00269 <span class="comment"> *</span> |
| <a name="l00270"></a>00270 <span class="comment"> * @return If the table has unsupported data types, then raise exception</span> |
| <a name="l00271"></a>00271 <span class="comment"> * otherwise return nothing.</span> |
| <a name="l00272"></a>00272 <span class="comment"> *</span> |
| <a name="l00273"></a>00273 <span class="comment"> */</span> |
| <a name="l00274"></a>00274 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__validate_input_table |
| <a name="l00275"></a>00275 ( |
| <a name="l00276"></a>00276 full_table_name TEXT, |
| <a name="l00277"></a>00277 feature_columns TEXT[], |
| <a name="l00278"></a>00278 id_column TEXT, |
| <a name="l00279"></a>00279 class_column TEXT |
| <a name="l00280"></a>00280 ) |
| <a name="l00281"></a>00281 RETURNS <span class="keywordtype">void</span> AS $$ |
| <a name="l00282"></a>00282 DECLARE |
| <a name="l00283"></a>00283 rec RECORD; |
| <a name="l00284"></a>00284 stmt TEXT; |
| <a name="l00285"></a>00285 all_columns TEXT := ''; |
| <a name="l00286"></a>00286 index INT; |
| <a name="l00287"></a>00287 BEGIN |
| <a name="l00288"></a>00288 -- find the first (LIMIT 1) unsupported data type if the input table has. |
| <a name="l00289"></a>00289 stmt= 'SELECT atttypid |
| <a name="l00290"></a>00290 FROM pg_attribute |
| <a name="l00291"></a>00291 WHERE attrelid ='||quote_literal(full_table_name)||'::regclass AND |
| <a name="l00292"></a>00292 attnum > 0 AND |
| <a name="l00293"></a>00293 (not attisdropped) AND |
| <a name="l00294"></a>00294 atttypid NOT IN |
| <a name="l00295"></a>00295 ( |
| <a name="l00296"></a>00296 SELECT unnest |
| <a name="l00297"></a>00297 ( |
| <a name="l00298"></a>00298 ARRAY[ |
| <a name="l00299"></a>00299 ''SMALLINT''::regtype::oid, |
| <a name="l00300"></a>00300 ''INT''::regtype::oid, |
| <a name="l00301"></a>00301 ''BIGINT''::regtype::oid, |
| <a name="l00302"></a>00302 ''FLOAT8''::regtype::oid, |
| <a name="l00303"></a>00303 ''REAL''::regtype::oid, |
| <a name="l00304"></a>00304 ''DECIMAL''::regtype::oid, |
| <a name="l00305"></a>00305 ''INET''::regtype::oid, |
| <a name="l00306"></a>00306 ''CIDR''::regtype::oid, |
| <a name="l00307"></a>00307 ''MACADDR''::regtype::oid, |
| <a name="l00308"></a>00308 ''BOOLEAN''::regtype::oid, |
| <a name="l00309"></a>00309 ''CHAR''::regtype::oid, |
| <a name="l00310"></a>00310 ''VARCHAR''::regtype::oid, |
| <a name="l00311"></a>00311 ''TEXT''::regtype::oid, |
| <a name="l00312"></a>00312 ''"<span class="keywordtype">char</span>"''::regtype::oid, |
| <a name="l00313"></a>00313 ''DATE''::regtype::oid, |
| <a name="l00314"></a>00314 ''TIME''::regtype::oid, |
| <a name="l00315"></a>00315 ''TIMETZ''::regtype::oid, |
| <a name="l00316"></a>00316 ''TIMESTAMP''::regtype::oid, |
| <a name="l00317"></a>00317 ''TIMESTAMPTZ''::regtype::oid, |
| <a name="l00318"></a>00318 ''INTERVAL''::regtype::oid |
| <a name="l00319"></a>00319 ] |
| <a name="l00320"></a>00320 ) |
| <a name="l00321"></a>00321 ) '; |
| <a name="l00322"></a>00322 |
| <a name="l00323"></a>00323 IF (feature_columns IS NOT NULL) THEN |
| <a name="l00324"></a>00324 -- If user do not specify feature columns, we use all those columns. |
| <a name="l00325"></a>00325 -- Otherwise, we just need to check those specified columns. |
| <a name="l00326"></a>00326 index = array_lower(feature_columns, 1); |
| <a name="l00327"></a>00327 WHILE (index <= array_upper(feature_columns, 1)) LOOP |
| <a name="l00328"></a>00328 all_columns = all_columns || |
| <a name="l00329"></a>00329 quote_literal(feature_columns[index]) || |
| <a name="l00330"></a>00330 ','; |
| <a name="l00331"></a>00331 index = index+1; |
| <a name="l00332"></a>00332 END LOOP; |
| <a name="l00333"></a>00333 |
| <a name="l00334"></a>00334 all_columns = all_columns || quote_literal(id_column) || ','; |
| <a name="l00335"></a>00335 all_columns = all_columns || quote_literal(class_column); |
| <a name="l00336"></a>00336 stmt = stmt ||' AND attname IN ('||all_columns||') '; |
| <a name="l00337"></a>00337 END IF; |
| <a name="l00338"></a>00338 |
| <a name="l00339"></a>00339 stmt = stmt||' LIMIT 1;'; |
| <a name="l00340"></a>00340 |
| <a name="l00341"></a>00341 EXECUTE stmt INTO rec; |
| <a name="l00342"></a>00342 |
| <a name="l00343"></a>00343 IF (rec IS NOT NULL) THEN |
| <a name="l00344"></a>00344 -- Print the first unsupported data type, and supported types. |
| <a name="l00345"></a>00345 RAISE EXCEPTION 'Unsupported data type [%]. Supported types include: |
| <a name="l00346"></a>00346 SMALLINT, INT, BIGINT, FLOAT8, REAL, |
| <a name="l00347"></a>00347 DECIMAL, INET, CIDR, MACADDR, BOOLEAN, |
| <a name="l00348"></a>00348 CHAR, VARCHAR, TEXT, "<span class="keywordtype">char</span>", |
| <a name="l00349"></a>00349 DATE, TIME, TIMETZ, TIMESTAMP, TIMESTAMPTZ, and INTERVAL', |
| <a name="l00350"></a>00350 rec.atttypid::regtype; |
| <a name="l00351"></a>00351 END IF; |
| <a name="l00352"></a>00352 |
| <a name="l00353"></a>00353 SELECT MADLIB_SCHEMA.__format |
| <a name="l00354"></a>00354 ('SELECT % AS n |
| <a name="l00355"></a>00355 FROM % |
| <a name="l00356"></a>00356 GROUP BY % |
| <a name="l00357"></a>00357 HAVING COUNT(%) > 1 |
| <a name="l00358"></a>00358 LIMIT 1', |
| <a name="l00359"></a>00359 ARRAY[ |
| <a name="l00360"></a>00360 id_column, |
| <a name="l00361"></a>00361 full_table_name, |
| <a name="l00362"></a>00362 id_column, |
| <a name="l00363"></a>00363 id_column |
| <a name="l00364"></a>00364 ] |
| <a name="l00365"></a>00365 ) |
| <a name="l00366"></a>00366 INTO stmt; |
| <a name="l00367"></a>00367 |
| <a name="l00368"></a>00368 EXECUTE stmt INTO rec; |
| <a name="l00369"></a>00369 |
| <a name="l00370"></a>00370 -- check if the <span class="keywordtype">id</span> column has duplicated value |
| <a name="l00371"></a>00371 PERFORM MADLIB_SCHEMA.__assert |
| <a name="l00372"></a>00372 ( |
| <a name="l00373"></a>00373 rec IS NULL, |
| <a name="l00374"></a>00374 'The training table ' || full_table_name || ' must not have duplicated <span class="keywordtype">id</span>' |
| <a name="l00375"></a>00375 ); |
| <a name="l00376"></a>00376 |
| <a name="l00377"></a>00377 RETURN; |
| <a name="l00378"></a>00378 END |
| <a name="l00379"></a>00379 $$ LANGUAGE PLPGSQL; |
| <a name="l00380"></a>00380 |
| <a name="l00381"></a>00381 |
| <a name="l00382"></a>00382 <span class="comment">/*</span> |
| <a name="l00383"></a>00383 <span class="comment"> * @brief Get the class table name by the metatable name.</span> |
| <a name="l00384"></a>00384 <span class="comment"> *</span> |
| <a name="l00385"></a>00385 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00386"></a>00386 <span class="comment"> *</span> |
| <a name="l00387"></a>00387 <span class="comment"> * @return The name of the class table</span> |
| <a name="l00388"></a>00388 <span class="comment"> *</span> |
| <a name="l00389"></a>00389 <span class="comment"> */</span> |
| <a name="l00390"></a>00390 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_classtable_name |
| <a name="l00391"></a>00391 ( |
| <a name="l00392"></a>00392 meta_tbl_name TEXT |
| <a name="l00393"></a>00393 ) |
| <a name="l00394"></a>00394 RETURNS TEXT AS $$ |
| <a name="l00395"></a>00395 DECLARE |
| <a name="l00396"></a>00396 classtable_name TEXT; |
| <a name="l00397"></a>00397 curstmt TEXT; |
| <a name="l00398"></a>00398 BEGIN |
| <a name="l00399"></a>00399 |
| <a name="l00400"></a>00400 PERFORM MADLIB_SCHEMA.__assert_table |
| <a name="l00401"></a>00401 ( |
| <a name="l00402"></a>00402 meta_tbl_name, |
| <a name="l00403"></a>00403 't' |
| <a name="l00404"></a>00404 ); |
| <a name="l00405"></a>00405 |
| <a name="l00406"></a>00406 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00407"></a>00407 ( |
| <a name="l00408"></a>00408 'SELECT MADLIB_SCHEMA.__regclass_to_text |
| <a name="l00409"></a>00409 (table_oid) as table_name |
| <a name="l00410"></a>00410 FROM % |
| <a name="l00411"></a>00411 WHERE column_type = ''c''', |
| <a name="l00412"></a>00412 ARRAY[ |
| <a name="l00413"></a>00413 meta_tbl_name |
| <a name="l00414"></a>00414 ] |
| <a name="l00415"></a>00415 ); |
| <a name="l00416"></a>00416 |
| <a name="l00417"></a>00417 EXECUTE curstmt INTO classtable_name; |
| <a name="l00418"></a>00418 |
| <a name="l00419"></a>00419 RETURN classtable_name; |
| <a name="l00420"></a>00420 END |
| <a name="l00421"></a>00421 $$ LANGUAGE PLPGSQL; |
| <a name="l00422"></a>00422 |
| <a name="l00423"></a>00423 |
| <a name="l00424"></a>00424 <span class="comment">/*</span> |
| <a name="l00425"></a>00425 <span class="comment"> * @brief Drop the metatable and KV tables </span> |
| <a name="l00426"></a>00426 <span class="comment"> * for the features and the class.</span> |
| <a name="l00427"></a>00427 <span class="comment"> *</span> |
| <a name="l00428"></a>00428 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00429"></a>00429 <span class="comment"> *</span> |
| <a name="l00430"></a>00430 <span class="comment"> */</span> |
| <a name="l00431"></a>00431 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__drop_metatable |
| <a name="l00432"></a>00432 ( |
| <a name="l00433"></a>00433 meta_tbl_name TEXT |
| <a name="l00434"></a>00434 ) |
| <a name="l00435"></a>00435 RETURNS <span class="keywordtype">void</span> AS $$ |
| <a name="l00436"></a>00436 DECLARE |
| <a name="l00437"></a>00437 curstmt TEXT; |
| <a name="l00438"></a>00438 name TEXT; |
| <a name="l00439"></a>00439 BEGIN |
| <a name="l00440"></a>00440 IF (meta_tbl_name is NULL ) THEN |
| <a name="l00441"></a>00441 RETURN; |
| <a name="l00442"></a>00442 END IF; |
| <a name="l00443"></a>00443 |
| <a name="l00444"></a>00444 PERFORM MADLIB_SCHEMA.__assert_table(meta_tbl_name, 't'); |
| <a name="l00445"></a>00445 |
| <a name="l00446"></a>00446 -- get the Key-Value tables |
| <a name="l00447"></a>00447 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00448"></a>00448 ( |
| <a name="l00449"></a>00449 'SELECT MADLIB_SCHEMA.__regclass_to_text |
| <a name="l00450"></a>00450 (table_oid) as table_name |
| <a name="l00451"></a>00451 FROM |
| <a name="l00452"></a>00452 ( |
| <a name="l00453"></a>00453 SELECT table_oid |
| <a name="l00454"></a>00454 FROM % |
| <a name="l00455"></a>00455 WHERE table_oid IS NOT NULL |
| <a name="l00456"></a>00456 GROUP BY table_oid |
| <a name="l00457"></a>00457 ) t', |
| <a name="l00458"></a>00458 ARRAY[ |
| <a name="l00459"></a>00459 meta_tbl_name |
| <a name="l00460"></a>00460 ] |
| <a name="l00461"></a>00461 ); |
| <a name="l00462"></a>00462 |
| <a name="l00463"></a>00463 -- drop all the Key-Value tables |
| <a name="l00464"></a>00464 FOR name IN EXECUTE curstmt LOOP |
| <a name="l00465"></a>00465 EXECUTE 'DROP TABLE IF EXISTS ' || name || ' CASCADE;'; |
| <a name="l00466"></a>00466 END LOOP; |
| <a name="l00467"></a>00467 |
| <a name="l00468"></a>00468 -- drop the metatable |
| <a name="l00469"></a>00469 EXECUTE 'DROP TABLE ' || meta_tbl_name || ' CASCADE;'; |
| <a name="l00470"></a>00470 END |
| <a name="l00471"></a>00471 $$ LANGUAGE PLPGSQL; |
| <a name="l00472"></a>00472 |
| <a name="l00473"></a>00473 |
| <a name="l00474"></a>00474 <span class="comment">/*</span> |
| <a name="l00475"></a>00475 <span class="comment"> * @brief Create the metatable.</span> |
| <a name="l00476"></a>00476 <span class="comment"> </span> |
| <a name="l00477"></a>00477 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00478"></a>00478 <span class="comment"> *</span> |
| <a name="l00479"></a>00479 <span class="comment"> */</span> |
| <a name="l00480"></a>00480 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__create_metatable |
| <a name="l00481"></a>00481 ( |
| <a name="l00482"></a>00482 meta_tbl_name TEXT |
| <a name="l00483"></a>00483 ) |
| <a name="l00484"></a>00484 RETURNS <span class="keywordtype">void</span> AS $$ |
| <a name="l00485"></a>00485 DECLARE |
| <a name="l00486"></a>00486 curstmt TEXT; |
| <a name="l00487"></a>00487 result INT := 0; |
| <a name="l00488"></a>00488 BEGIN |
| <a name="l00489"></a>00489 -- the maximum length of an identifier is 63 |
| <a name="l00490"></a>00490 PERFORM MADLIB_SCHEMA.__assert |
| <a name="l00491"></a>00491 ( |
| <a name="l00492"></a>00492 length(MADLIB_SCHEMA.__strip_schema_name(meta_tbl_name)) <= 63, |
| <a name="l00493"></a>00493 'The maximum length of ' || |
| <a name="l00494"></a>00494 MADLIB_SCHEMA.__strip_schema_name(meta_tbl_name) || |
| <a name="l00495"></a>00495 ' is 63' |
| <a name="l00496"></a>00496 ); |
| <a name="l00497"></a>00497 |
| <a name="l00498"></a>00498 -- must not be existence |
| <a name="l00499"></a>00499 PERFORM MADLIB_SCHEMA.__assert_table(meta_tbl_name, 'f'); |
| <a name="l00500"></a>00500 |
| <a name="l00501"></a>00501 -- 'f' for feature, 'c' for class, 'i' for <span class="keywordtype">id</span> |
| <a name="l00502"></a>00502 -- 't' for continuous value, 'f' for discrete value |
| <a name="l00503"></a>00503 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00504"></a>00504 ( |
| <a name="l00505"></a>00505 'CREATE TABLE %( |
| <a name="l00506"></a>00506 <span class="keywordtype">id</span> INT, |
| <a name="l00507"></a>00507 column_name TEXT, |
| <a name="l00508"></a>00508 column_type TEXT, |
| <a name="l00509"></a>00509 is_cont BOOL, |
| <a name="l00510"></a>00510 table_oid OID, |
| <a name="l00511"></a>00511 num_dist_value INT |
| <a name="l00512"></a>00512 ) m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (<span class="keywordtype">id</span>)')', |
| <a name="l00513"></a>00513 meta_tbl_name |
| <a name="l00514"></a>00514 ); |
| <a name="l00515"></a>00515 EXECUTE curstmt; |
| <a name="l00516"></a>00516 END |
| <a name="l00517"></a>00517 $$ LANGUAGE PLPGSQL; |
| <a name="l00518"></a>00518 |
| <a name="l00519"></a>00519 |
| <a name="l00520"></a>00520 <span class="comment">/*</span> |
| <a name="l00521"></a>00521 <span class="comment"> * @brief Insert a record to the metatable</span> |
| <a name="l00522"></a>00522 <span class="comment"> * A row in the metatable represents a column's information.</span> |
| <a name="l00523"></a>00523 <span class="comment"> *</span> |
| <a name="l00524"></a>00524 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00525"></a>00525 <span class="comment"> * @param column_name The name of the column.</span> |
| <a name="l00526"></a>00526 <span class="comment"> * @param column_type The type of the column.</span> |
| <a name="l00527"></a>00527 <span class="comment"> * 'i' means id, 'c' means class, 'f' means feature.</span> |
| <a name="l00528"></a>00528 <span class="comment"> * @param is_cont True if the column is continuous.</span> |
| <a name="l00529"></a>00529 <span class="comment"> * @param table_name The full name of key-value table for the column.</span> |
| <a name="l00530"></a>00530 <span class="comment"> * The OID of this table will be stored.</span> |
| <a name="l00531"></a>00531 <span class="comment"> * @param num_dist_value The number of distinct values for the column.</span> |
| <a name="l00532"></a>00532 <span class="comment"> *</span> |
| <a name="l00533"></a>00533 <span class="comment"> * @note The null value will be included in the distinct values.</span> |
| <a name="l00534"></a>00534 <span class="comment"> *</span> |
| <a name="l00535"></a>00535 <span class="comment"> */</span> |
| <a name="l00536"></a>00536 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__insert_into_metatable |
| <a name="l00537"></a>00537 ( |
| <a name="l00538"></a>00538 meta_tbl_name TEXT, |
| <a name="l00539"></a>00539 col_index INT, |
| <a name="l00540"></a>00540 column_name TEXT, |
| <a name="l00541"></a>00541 column_type CHAR, |
| <a name="l00542"></a>00542 is_cont BOOLEAN, |
| <a name="l00543"></a>00543 table_name TEXT, |
| <a name="l00544"></a>00544 num_dist_value INT |
| <a name="l00545"></a>00545 ) |
| <a name="l00546"></a>00546 RETURNS <span class="keywordtype">void</span> AS $$ |
| <a name="l00547"></a>00547 DECLARE |
| <a name="l00548"></a>00548 curstmt TEXT := ''; |
| <a name="l00549"></a>00549 tbl_txt TEXT := 'NULL'; |
| <a name="l00550"></a>00550 BEGIN |
| <a name="l00551"></a>00551 PERFORM MADLIB_SCHEMA.__assert |
| <a name="l00552"></a>00552 ( |
| <a name="l00553"></a>00553 column_type = 'f' OR column_type = 'i' OR column_type = 'c', |
| <a name="l00554"></a>00554 'column type must be ''f'', ''i'' or ''c''' |
| <a name="l00555"></a>00555 ); |
| <a name="l00556"></a>00556 IF (table_name IS NOT NULL) THEN |
| <a name="l00557"></a>00557 tbl_txt = '''' || table_name || ''''; |
| <a name="l00558"></a>00558 END IF; |
| <a name="l00559"></a>00559 |
| <a name="l00560"></a>00560 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00561"></a>00561 ( |
| <a name="l00562"></a>00562 'INSERT INTO % VALUES |
| <a name="l00563"></a>00563 (%, ''%'', ''%'', ''%'', %::regclass, %);', |
| <a name="l00564"></a>00564 ARRAY[ |
| <a name="l00565"></a>00565 meta_tbl_name, |
| <a name="l00566"></a>00566 col_index::TEXT, |
| <a name="l00567"></a>00567 column_name, |
| <a name="l00568"></a>00568 column_type, |
| <a name="l00569"></a>00569 MADLIB_SCHEMA.__to_char(is_cont), |
| <a name="l00570"></a>00570 tbl_txt, |
| <a name="l00571"></a>00571 num_dist_value::TEXT |
| <a name="l00572"></a>00572 ] |
| <a name="l00573"></a>00573 ); |
| <a name="l00574"></a>00574 |
| <a name="l00575"></a>00575 EXECUTE curstmt; |
| <a name="l00576"></a>00576 END |
| <a name="l00577"></a>00577 $$ LANGUAGE PLPGSQL; |
| <a name="l00578"></a>00578 |
| <a name="l00579"></a>00579 |
| <a name="l00580"></a>00580 <span class="comment">/*</span> |
| <a name="l00581"></a>00581 <span class="comment"> * @brief Validate if the metatable exists or not.</span> |
| <a name="l00582"></a>00582 <span class="comment"> * Validate if the tables in "table_oid" column exists or not.</span> |
| <a name="l00583"></a>00583 <span class="comment"> *</span> |
| <a name="l00584"></a>00584 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00585"></a>00585 <span class="comment"> *</span> |
| <a name="l00586"></a>00586 <span class="comment"> */</span> |
| <a name="l00587"></a>00587 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__validate_metatable |
| <a name="l00588"></a>00588 ( |
| <a name="l00589"></a>00589 meta_tbl_name TEXT |
| <a name="l00590"></a>00590 ) |
| <a name="l00591"></a>00591 RETURNS VOID AS $$ |
| <a name="l00592"></a>00592 DECLARE |
| <a name="l00593"></a>00593 curstmt TEXT; |
| <a name="l00594"></a>00594 name TEXT; |
| <a name="l00595"></a>00595 BEGIN |
| <a name="l00596"></a>00596 PERFORM MADLIB_SCHEMA.__assert_table(meta_tbl_name, 't'); |
| <a name="l00597"></a>00597 |
| <a name="l00598"></a>00598 -- if one of those KV tables doesn't exist, |
| <a name="l00599"></a>00599 -- we raise exception. |
| <a name="l00600"></a>00600 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00601"></a>00601 ( |
| <a name="l00602"></a>00602 'SELECT MADLIB_SCHEMA.__assert_table |
| <a name="l00603"></a>00603 (MADLIB_SCHEMA.__regclass_to_text(table_oid), ''t'') |
| <a name="l00604"></a>00604 FROM |
| <a name="l00605"></a>00605 ( |
| <a name="l00606"></a>00606 SELECT table_oid |
| <a name="l00607"></a>00607 FROM % |
| <a name="l00608"></a>00608 WHERE table_oid IS NOT NULL |
| <a name="l00609"></a>00609 GROUP BY table_oid |
| <a name="l00610"></a>00610 ) t', |
| <a name="l00611"></a>00611 ARRAY[ |
| <a name="l00612"></a>00612 meta_tbl_name |
| <a name="l00613"></a>00613 ] |
| <a name="l00614"></a>00614 ); |
| <a name="l00615"></a>00615 EXECUTE curstmt; |
| <a name="l00616"></a>00616 END |
| <a name="l00617"></a>00617 $$ LANGUAGE PLPGSQL; |
| <a name="l00618"></a>00618 |
| <a name="l00619"></a>00619 <span class="comment">/*</span> |
| <a name="l00620"></a>00620 <span class="comment"> * @brief Get the number of distinct values for the feature with given ID.</span> |
| <a name="l00621"></a>00621 <span class="comment"> *</span> |
| <a name="l00622"></a>00622 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00623"></a>00623 <span class="comment"> * @param feature_id The ID of the feature in the metatable.</span> |
| <a name="l00624"></a>00624 <span class="comment"> *</span> |
| <a name="l00625"></a>00625 <span class="comment"> * @return The number of the distinct values for the given feature.</span> |
| <a name="l00626"></a>00626 <span class="comment"> *</span> |
| <a name="l00627"></a>00627 <span class="comment"> */</span> |
| <a name="l00628"></a>00628 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__distinct_feature_value |
| <a name="l00629"></a>00629 ( |
| <a name="l00630"></a>00630 meta_tbl_name TEXT, |
| <a name="l00631"></a>00631 feature_id INT |
| <a name="l00632"></a>00632 ) |
| <a name="l00633"></a>00633 RETURNS INT4 AS $$ |
| <a name="l00634"></a>00634 DECLARE |
| <a name="l00635"></a>00635 curstmt TEXT := ''; |
| <a name="l00636"></a>00636 result INT4 := 0; |
| <a name="l00637"></a>00637 BEGIN |
| <a name="l00638"></a>00638 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00639"></a>00639 ( |
| <a name="l00640"></a>00640 'SELECT num_dist_value |
| <a name="l00641"></a>00641 FROM % |
| <a name="l00642"></a>00642 WHERE column_type=''f'' AND <span class="keywordtype">id</span> = %', |
| <a name="l00643"></a>00643 meta_tbl_name, |
| <a name="l00644"></a>00644 feature_id::TEXT |
| <a name="l00645"></a>00645 ); |
| <a name="l00646"></a>00646 |
| <a name="l00647"></a>00647 EXECUTE curstmt INTO result; |
| <a name="l00648"></a>00648 |
| <a name="l00649"></a>00649 RETURN result; |
| <a name="l00650"></a>00650 END |
| <a name="l00651"></a>00651 $$ LANGUAGE PLPGSQL; |
| <a name="l00652"></a>00652 |
| <a name="l00653"></a>00653 |
| <a name="l00654"></a>00654 <span class="comment">/*</span> |
| <a name="l00655"></a>00655 <span class="comment"> * @brief Get the number of features.</span> |
| <a name="l00656"></a>00656 <span class="comment"> *</span> |
| <a name="l00657"></a>00657 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00658"></a>00658 <span class="comment"> *</span> |
| <a name="l00659"></a>00659 <span class="comment"> * @return The number of features in the training table.</span> |
| <a name="l00660"></a>00660 <span class="comment"> *</span> |
| <a name="l00661"></a>00661 <span class="comment"> */</span> |
| <a name="l00662"></a>00662 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__num_of_feature |
| <a name="l00663"></a>00663 ( |
| <a name="l00664"></a>00664 meta_tbl_name TEXT |
| <a name="l00665"></a>00665 ) |
| <a name="l00666"></a>00666 RETURNS INT4 AS $$ |
| <a name="l00667"></a>00667 DECLARE |
| <a name="l00668"></a>00668 curstmt TEXT := ''; |
| <a name="l00669"></a>00669 result INT4 := 0; |
| <a name="l00670"></a>00670 BEGIN |
| <a name="l00671"></a>00671 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00672"></a>00672 ( |
| <a name="l00673"></a>00673 'SELECT COUNT(*) |
| <a name="l00674"></a>00674 FROM % |
| <a name="l00675"></a>00675 WHERE column_type=''f''', |
| <a name="l00676"></a>00676 meta_tbl_name |
| <a name="l00677"></a>00677 ); |
| <a name="l00678"></a>00678 |
| <a name="l00679"></a>00679 EXECUTE curstmt INTO result; |
| <a name="l00680"></a>00680 |
| <a name="l00681"></a>00681 RETURN result; |
| <a name="l00682"></a>00682 END |
| <a name="l00683"></a>00683 $$ LANGUAGE PLPGSQL; |
| <a name="l00684"></a>00684 |
| <a name="l00685"></a>00685 |
| <a name="l00686"></a>00686 <span class="comment">/*</span> |
| <a name="l00687"></a>00687 <span class="comment"> * @brief Get the number of distinct class values.</span> |
| <a name="l00688"></a>00688 <span class="comment"> *</span> |
| <a name="l00689"></a>00689 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00690"></a>00690 <span class="comment"> *</span> |
| <a name="l00691"></a>00691 <span class="comment"> * @return The number of class labels in the training table.</span> |
| <a name="l00692"></a>00692 <span class="comment"> *</span> |
| <a name="l00693"></a>00693 <span class="comment"> */</span> |
| <a name="l00694"></a>00694 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__num_of_class |
| <a name="l00695"></a>00695 ( |
| <a name="l00696"></a>00696 meta_tbl_name TEXT |
| <a name="l00697"></a>00697 ) |
| <a name="l00698"></a>00698 RETURNS INT4 AS $$ |
| <a name="l00699"></a>00699 DECLARE |
| <a name="l00700"></a>00700 curstmt TEXT := ''; |
| <a name="l00701"></a>00701 result INT4 := 0; |
| <a name="l00702"></a>00702 class_table_name TEXT := ''; |
| <a name="l00703"></a>00703 BEGIN |
| <a name="l00704"></a>00704 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00705"></a>00705 ( |
| <a name="l00706"></a>00706 'SELECT MADLIB_SCHEMA.__regclass_to_text(table_oid) |
| <a name="l00707"></a>00707 FROM % |
| <a name="l00708"></a>00708 WHERE column_type=''c''', |
| <a name="l00709"></a>00709 meta_tbl_name |
| <a name="l00710"></a>00710 ); |
| <a name="l00711"></a>00711 |
| <a name="l00712"></a>00712 EXECUTE curstmt INTO class_table_name; |
| <a name="l00713"></a>00713 |
| <a name="l00714"></a>00714 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00715"></a>00715 ( |
| <a name="l00716"></a>00716 'SELECT COUNT(code) |
| <a name="l00717"></a>00717 FROM %', |
| <a name="l00718"></a>00718 class_table_name |
| <a name="l00719"></a>00719 ); |
| <a name="l00720"></a>00720 |
| <a name="l00721"></a>00721 EXECUTE curstmt INTO result; |
| <a name="l00722"></a>00722 |
| <a name="l00723"></a>00723 RETURN result; |
| <a name="l00724"></a>00724 END |
| <a name="l00725"></a>00725 $$ LANGUAGE PLPGSQL; |
| <a name="l00726"></a>00726 |
| <a name="l00727"></a>00727 |
| <a name="l00728"></a>00728 <span class="comment">/*</span> |
| <a name="l00729"></a>00729 <span class="comment"> * @brief Get the feature name by the specified feature ID.</span> |
| <a name="l00730"></a>00730 <span class="comment"> *</span> |
| <a name="l00731"></a>00731 <span class="comment"> * @param feature_index The ID of the feature.</span> |
| <a name="l00732"></a>00732 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00733"></a>00733 <span class="comment"> *</span> |
| <a name="l00734"></a>00734 <span class="comment"> * @return The feature name.</span> |
| <a name="l00735"></a>00735 <span class="comment"> *</span> |
| <a name="l00736"></a>00736 <span class="comment"> */</span> |
| <a name="l00737"></a>00737 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_feature_name |
| <a name="l00738"></a>00738 ( |
| <a name="l00739"></a>00739 feature_index INT, |
| <a name="l00740"></a>00740 meta_tbl_name TEXT |
| <a name="l00741"></a>00741 ) |
| <a name="l00742"></a>00742 RETURNS TEXT AS $$ |
| <a name="l00743"></a>00743 DECLARE |
| <a name="l00744"></a>00744 curstmt TEXT; |
| <a name="l00745"></a>00745 result TEXT := ''; |
| <a name="l00746"></a>00746 BEGIN |
| <a name="l00747"></a>00747 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00748"></a>00748 ( |
| <a name="l00749"></a>00749 'SELECT column_name |
| <a name="l00750"></a>00750 FROM % |
| <a name="l00751"></a>00751 WHERE <span class="keywordtype">id</span> = % AND column_type = ''f'';', |
| <a name="l00752"></a>00752 meta_tbl_name, |
| <a name="l00753"></a>00753 MADLIB_SCHEMA.__to_char(feature_index) |
| <a name="l00754"></a>00754 ); |
| <a name="l00755"></a>00755 |
| <a name="l00756"></a>00756 EXECUTE curstmt INTO result; |
| <a name="l00757"></a>00757 |
| <a name="l00758"></a>00758 RETURN result; |
| <a name="l00759"></a>00759 END |
| <a name="l00760"></a>00760 $$ LANGUAGE PLPGSQL; |
| <a name="l00761"></a>00761 |
| <a name="l00762"></a>00762 |
| <a name="l00763"></a>00763 <span class="comment">/*</span> |
| <a name="l00764"></a>00764 <span class="comment"> * @brief Get the column value by the specified column ID and code.</span> |
| <a name="l00765"></a>00765 <span class="comment"> *</span> |
| <a name="l00766"></a>00766 <span class="comment"> * @param column_index The ID of the column.</span> |
| <a name="l00767"></a>00767 <span class="comment"> * @param code The code of the column value.</span> |
| <a name="l00768"></a>00768 <span class="comment"> * @param column_type The type of the column.</span> |
| <a name="l00769"></a>00769 <span class="comment"> * 'i' means id, 'c' means class, 'f' means feature.</span> |
| <a name="l00770"></a>00770 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00771"></a>00771 <span class="comment"> * </span> |
| <a name="l00772"></a>00772 <span class="comment"> * @return The column's value corresponding to the give code. </span> |
| <a name="l00773"></a>00773 <span class="comment"> *</span> |
| <a name="l00774"></a>00774 <span class="comment"> */</span> |
| <a name="l00775"></a>00775 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_column_value |
| <a name="l00776"></a>00776 ( |
| <a name="l00777"></a>00777 column_index INT, |
| <a name="l00778"></a>00778 code INT, |
| <a name="l00779"></a>00779 column_type CHAR, |
| <a name="l00780"></a>00780 meta_tbl_name TEXT |
| <a name="l00781"></a>00781 ) |
| <a name="l00782"></a>00782 RETURNS TEXT AS $$ |
| <a name="l00783"></a>00783 DECLARE |
| <a name="l00784"></a>00784 curstmt TEXT; |
| <a name="l00785"></a>00785 names TEXT[]; |
| <a name="l00786"></a>00786 result TEXT := ''; |
| <a name="l00787"></a>00787 tmp_txt TEXT := ' WHERE column_type = ''c'''; |
| <a name="l00788"></a>00788 BEGIN |
| <a name="l00789"></a>00789 PERFORM MADLIB_SCHEMA.__assert |
| <a name="l00790"></a>00790 ( |
| <a name="l00791"></a>00791 code IS NOT NULL, |
| <a name="l00792"></a>00792 'the code of the value should not be null' |
| <a name="l00793"></a>00793 ); |
| <a name="l00794"></a>00794 |
| <a name="l00795"></a>00795 IF (column_type <> 'c') THEN |
| <a name="l00796"></a>00796 tmp_txt = MADLIB_SCHEMA.__format |
| <a name="l00797"></a>00797 ( |
| <a name="l00798"></a>00798 ' WHERE <span class="keywordtype">id</span> = % AND column_type = ''%''', |
| <a name="l00799"></a>00799 column_index::TEXT, |
| <a name="l00800"></a>00800 column_type::TEXT |
| <a name="l00801"></a>00801 ); |
| <a name="l00802"></a>00802 END IF; |
| <a name="l00803"></a>00803 |
| <a name="l00804"></a>00804 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00805"></a>00805 ( |
| <a name="l00806"></a>00806 'SELECT |
| <a name="l00807"></a>00807 ARRAY[column_name, |
| <a name="l00808"></a>00808 MADLIB_SCHEMA.__regclass_to_text(table_oid)] |
| <a name="l00809"></a>00809 FROM % |
| <a name="l00810"></a>00810 %', |
| <a name="l00811"></a>00811 meta_tbl_name, |
| <a name="l00812"></a>00812 tmp_txt |
| <a name="l00813"></a>00813 ); |
| <a name="l00814"></a>00814 |
| <a name="l00815"></a>00815 EXECUTE curstmt INTO names; |
| <a name="l00816"></a>00816 |
| <a name="l00817"></a>00817 PERFORM MADLIB_SCHEMA.__assert(names[1] IS NOT NULL, 'No such column name'); |
| <a name="l00818"></a>00818 PERFORM MADLIB_SCHEMA.__assert(names[2] IS NOT NULL, 'No such table name'); |
| <a name="l00819"></a>00819 |
| <a name="l00820"></a>00820 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00821"></a>00821 ( |
| <a name="l00822"></a>00822 'SELECT MADLIB_SCHEMA.__to_char(fval) |
| <a name="l00823"></a>00823 FROM % |
| <a name="l00824"></a>00824 WHERE code = %;', |
| <a name="l00825"></a>00825 names[2], |
| <a name="l00826"></a>00826 code::TEXT |
| <a name="l00827"></a>00827 ); |
| <a name="l00828"></a>00828 |
| <a name="l00829"></a>00829 EXECUTE curstmt INTO result; |
| <a name="l00830"></a>00830 |
| <a name="l00831"></a>00831 IF (result IS NULL) THEN |
| <a name="l00832"></a>00832 result = 'NULL'; |
| <a name="l00833"></a>00833 END IF; |
| <a name="l00834"></a>00834 |
| <a name="l00835"></a>00835 RETURN result; |
| <a name="l00836"></a>00836 END |
| <a name="l00837"></a>00837 $$ LANGUAGE PLPGSQL; |
| <a name="l00838"></a>00838 |
| <a name="l00839"></a>00839 |
| <a name="l00840"></a>00840 <span class="comment">/*</span> |
| <a name="l00841"></a>00841 <span class="comment"> * @brief Get the feature value by the specified feature ID and code.</span> |
| <a name="l00842"></a>00842 <span class="comment"> *</span> |
| <a name="l00843"></a>00843 <span class="comment"> * @param feature_index The ID of the feature.</span> |
| <a name="l00844"></a>00844 <span class="comment"> * @param code The code of the feature value.</span> |
| <a name="l00845"></a>00845 <span class="comment"> * @param meta_tbl_name The full name of the metatable. </span> |
| <a name="l00846"></a>00846 <span class="comment"> *</span> |
| <a name="l00847"></a>00847 <span class="comment"> * @return The value of specified code of the feature </span> |
| <a name="l00848"></a>00848 <span class="comment"> * whose id specified in feature_index.</span> |
| <a name="l00849"></a>00849 <span class="comment"> *</span> |
| <a name="l00850"></a>00850 <span class="comment"> */</span> |
| <a name="l00851"></a>00851 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_feature_value |
| <a name="l00852"></a>00852 ( |
| <a name="l00853"></a>00853 feature_index INT, |
| <a name="l00854"></a>00854 code INT, |
| <a name="l00855"></a>00855 meta_tbl_name TEXT |
| <a name="l00856"></a>00856 ) |
| <a name="l00857"></a>00857 RETURNS TEXT AS $$ |
| <a name="l00858"></a>00858 DECLARE |
| <a name="l00859"></a>00859 result TEXT := ''; |
| <a name="l00860"></a>00860 BEGIN |
| <a name="l00861"></a>00861 result = MADLIB_SCHEMA.__get_column_value |
| <a name="l00862"></a>00862 ( |
| <a name="l00863"></a>00863 feature_index, |
| <a name="l00864"></a>00864 code, |
| <a name="l00865"></a>00865 'f', |
| <a name="l00866"></a>00866 meta_tbl_name |
| <a name="l00867"></a>00867 ); |
| <a name="l00868"></a>00868 |
| <a name="l00869"></a>00869 RETURN result; |
| <a name="l00870"></a>00870 END |
| <a name="l00871"></a>00871 $$ LANGUAGE PLPGSQL; |
| <a name="l00872"></a>00872 |
| <a name="l00873"></a>00873 |
| <a name="l00874"></a>00874 <span class="comment">/*</span> |
| <a name="l00875"></a>00875 <span class="comment"> * @brief Get the ID column name.</span> |
| <a name="l00876"></a>00876 <span class="comment"> *</span> |
| <a name="l00877"></a>00877 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00878"></a>00878 <span class="comment"> *</span> |
| <a name="l00879"></a>00879 <span class="comment"> * @return The ID column name.</span> |
| <a name="l00880"></a>00880 <span class="comment"> *</span> |
| <a name="l00881"></a>00881 <span class="comment"> */</span> |
| <a name="l00882"></a>00882 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_id_column_name |
| <a name="l00883"></a>00883 ( |
| <a name="l00884"></a>00884 meta_tbl_name TEXT |
| <a name="l00885"></a>00885 ) |
| <a name="l00886"></a>00886 RETURNS TEXT AS $$ |
| <a name="l00887"></a>00887 DECLARE |
| <a name="l00888"></a>00888 curstmt TEXT; |
| <a name="l00889"></a>00889 result TEXT := ''; |
| <a name="l00890"></a>00890 BEGIN |
| <a name="l00891"></a>00891 PERFORM MADLIB_SCHEMA.__assert_table |
| <a name="l00892"></a>00892 ( |
| <a name="l00893"></a>00893 meta_tbl_name, |
| <a name="l00894"></a>00894 't' |
| <a name="l00895"></a>00895 ); |
| <a name="l00896"></a>00896 |
| <a name="l00897"></a>00897 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00898"></a>00898 ( |
| <a name="l00899"></a>00899 'SELECT column_name |
| <a name="l00900"></a>00900 FROM % |
| <a name="l00901"></a>00901 WHERE column_type = ''i'' |
| <a name="l00902"></a>00902 LIMIT 1', |
| <a name="l00903"></a>00903 meta_tbl_name |
| <a name="l00904"></a>00904 ); |
| <a name="l00905"></a>00905 |
| <a name="l00906"></a>00906 EXECUTE curstmt INTO result; |
| <a name="l00907"></a>00907 |
| <a name="l00908"></a>00908 RETURN result; |
| <a name="l00909"></a>00909 END |
| <a name="l00910"></a>00910 $$ LANGUAGE PLPGSQL; |
| <a name="l00911"></a>00911 |
| <a name="l00912"></a>00912 |
| <a name="l00913"></a>00913 <span class="comment">/*</span> |
| <a name="l00914"></a>00914 <span class="comment"> * @brief Get the class column name.</span> |
| <a name="l00915"></a>00915 <span class="comment"> *</span> |
| <a name="l00916"></a>00916 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00917"></a>00917 <span class="comment"> *</span> |
| <a name="l00918"></a>00918 <span class="comment"> * @return The class column name. </span> |
| <a name="l00919"></a>00919 <span class="comment"> *</span> |
| <a name="l00920"></a>00920 <span class="comment"> */</span> |
| <a name="l00921"></a>00921 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_class_column_name |
| <a name="l00922"></a>00922 ( |
| <a name="l00923"></a>00923 meta_tbl_name TEXT |
| <a name="l00924"></a>00924 ) |
| <a name="l00925"></a>00925 RETURNS TEXT AS $$ |
| <a name="l00926"></a>00926 DECLARE |
| <a name="l00927"></a>00927 curstmt TEXT; |
| <a name="l00928"></a>00928 result TEXT := ''; |
| <a name="l00929"></a>00929 BEGIN |
| <a name="l00930"></a>00930 PERFORM MADLIB_SCHEMA.__assert_table |
| <a name="l00931"></a>00931 ( |
| <a name="l00932"></a>00932 meta_tbl_name, |
| <a name="l00933"></a>00933 't' |
| <a name="l00934"></a>00934 ); |
| <a name="l00935"></a>00935 |
| <a name="l00936"></a>00936 curstmt = MADLIB_SCHEMA.__format |
| <a name="l00937"></a>00937 ( |
| <a name="l00938"></a>00938 'SELECT column_name |
| <a name="l00939"></a>00939 FROM % |
| <a name="l00940"></a>00940 WHERE column_type = ''c'' LIMIT 1', |
| <a name="l00941"></a>00941 meta_tbl_name |
| <a name="l00942"></a>00942 ); |
| <a name="l00943"></a>00943 |
| <a name="l00944"></a>00944 EXECUTE curstmt INTO result; |
| <a name="l00945"></a>00945 |
| <a name="l00946"></a>00946 RETURN result; |
| <a name="l00947"></a>00947 END |
| <a name="l00948"></a>00948 $$ LANGUAGE PLPGSQL; |
| <a name="l00949"></a>00949 |
| <a name="l00950"></a>00950 |
| <a name="l00951"></a>00951 <span class="comment">/*</span> |
| <a name="l00952"></a>00952 <span class="comment"> * @brief Get the class value by the specified code.</span> |
| <a name="l00953"></a>00953 <span class="comment"> *</span> |
| <a name="l00954"></a>00954 <span class="comment"> * @param code The code of the class value.</span> |
| <a name="l00955"></a>00955 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l00956"></a>00956 <span class="comment"> *</span> |
| <a name="l00957"></a>00957 <span class="comment"> * @return The class value corresponding to the code. </span> |
| <a name="l00958"></a>00958 <span class="comment"> *</span> |
| <a name="l00959"></a>00959 <span class="comment"> */</span> |
| <a name="l00960"></a>00960 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_class_value |
| <a name="l00961"></a>00961 ( |
| <a name="l00962"></a>00962 code INT, |
| <a name="l00963"></a>00963 meta_tbl_name TEXT |
| <a name="l00964"></a>00964 ) |
| <a name="l00965"></a>00965 RETURNS TEXT AS $$ |
| <a name="l00966"></a>00966 DECLARE |
| <a name="l00967"></a>00967 result TEXT := ''; |
| <a name="l00968"></a>00968 BEGIN |
| <a name="l00969"></a>00969 result = MADLIB_SCHEMA.__get_column_value(0, code, 'c', meta_tbl_name); |
| <a name="l00970"></a>00970 |
| <a name="l00971"></a>00971 RETURN result; |
| <a name="l00972"></a>00972 END |
| <a name="l00973"></a>00973 $$ LANGUAGE PLPGSQL; |
| <a name="l00974"></a>00974 |
| <a name="l00975"></a>00975 |
| <a name="l00976"></a>00976 <span class="comment">/*</span> |
| <a name="l00977"></a>00977 <span class="comment"> * @brief breakup each record from the training table.</span> |
| <a name="l00978"></a>00978 <span class="comment"> * For example, we have the training table t(id, f1, f2, f3, class), </span> |
| <a name="l00979"></a>00979 <span class="comment"> * then the breakup table is bt(id, fid, fval, is_cont, class). </span> |
| <a name="l00980"></a>00980 <span class="comment"> * The id column of the two tables is the same. Each feature will be</span> |
| <a name="l00981"></a>00981 <span class="comment"> * encoded to continuous numeric number. Assume that t has values</span> |
| <a name="l00982"></a>00982 <span class="comment"> * (1, 'a', 1, 10, '+') </span> |
| <a name="l00983"></a>00983 <span class="comment"> * (2, 'b', 2, 8, '-')</span> |
| <a name="l00984"></a>00984 <span class="comment"> * (3, 'd', null, 2, '+') </span> |
| <a name="l00985"></a>00985 <span class="comment"> * and all of them are discrete features, then the values of bt are </span> |
| <a name="l00986"></a>00986 <span class="comment"> * (1, 1, 'a', 'f', '+') </span> |
| <a name="l00987"></a>00987 <span class="comment"> * (2, 1, 'b', 'f', '-') </span> |
| <a name="l00988"></a>00988 <span class="comment"> * (3, 1, 'd', 'f', '+')</span> |
| <a name="l00989"></a>00989 <span class="comment"> * (1, 2, 1, 'f', '+') </span> |
| <a name="l00990"></a>00990 <span class="comment"> * (2, 2, 2, 'f', '-') </span> |
| <a name="l00991"></a>00991 <span class="comment"> * (3, 2, null, 'f', '+')</span> |
| <a name="l00992"></a>00992 <span class="comment"> * (1, 3, 10, 'f', '+') </span> |
| <a name="l00993"></a>00993 <span class="comment"> * (2, 3, 8, 'f', '-') </span> |
| <a name="l00994"></a>00994 <span class="comment"> * (3, 3, 2, 'f', '+')</span> |
| <a name="l00995"></a>00995 <span class="comment"> *</span> |
| <a name="l00996"></a>00996 <span class="comment"> * @param input_tbl_name The full name of the input training table.</span> |
| <a name="l00997"></a>00997 <span class="comment"> * @param breakup_tbl_name The name of the breakup table.</span> |
| <a name="l00998"></a>00998 <span class="comment"> * @param kv_cls_name The name of the key-value table for class column.</span> |
| <a name="l00999"></a>00999 <span class="comment"> * @param id_col_name The name of the ID column. </span> |
| <a name="l01000"></a>01000 <span class="comment"> * @param attr_col_names The array contains all the features' names.</span> |
| <a name="l01001"></a>01001 <span class="comment"> * @param is_conts The subscript of the array denotes the feature index.</span> |
| <a name="l01002"></a>01002 <span class="comment"> * Each value of the array denotes the feature is</span> |
| <a name="l01003"></a>01003 <span class="comment"> * continuous ('t') or discrete ('f')</span> |
| <a name="l01004"></a>01004 <span class="comment"> * @param verbosity > 0 means this function runs in verbose mode. </span> |
| <a name="l01005"></a>01005 <span class="comment"> *</span> |
| <a name="l01006"></a>01006 <span class="comment"> * @return The name of the breakup table, which will be used to generate the encoded</span> |
| <a name="l01007"></a>01007 <span class="comment"> * table.</span> |
| <a name="l01008"></a>01008 <span class="comment"> *</span> |
| <a name="l01009"></a>01009 <span class="comment"> */</span> |
| <a name="l01010"></a>01010 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__breakup_table |
| <a name="l01011"></a>01011 ( |
| <a name="l01012"></a>01012 input_tbl_name TEXT, |
| <a name="l01013"></a>01013 breakup_tbl_name TEXT, |
| <a name="l01014"></a>01014 kv_cls_name TEXT, |
| <a name="l01015"></a>01015 id_col_name TEXT, |
| <a name="l01016"></a>01016 cls_col_name TEXT, |
| <a name="l01017"></a>01017 attr_col_names TEXT[], |
| <a name="l01018"></a>01018 is_conts BOOL[], |
| <a name="l01019"></a>01019 h2hmv_routine_id INT, |
| <a name="l01020"></a>01020 verbosity INT |
| <a name="l01021"></a>01021 ) |
| <a name="l01022"></a>01022 RETURNS VOID AS $$ |
| <a name="l01023"></a>01023 DECLARE |
| <a name="l01024"></a>01024 curstmt TEXT; |
| <a name="l01025"></a>01025 exec_begin TIMESTAMP; |
| <a name="l01026"></a>01026 where_txt TEXT := ''; |
| <a name="l01027"></a>01027 fval_txt TEXT := 'fval'; |
| <a name="l01028"></a>01028 BEGIN |
| <a name="l01029"></a>01029 exec_begin = clock_timestamp(); |
| <a name="l01030"></a>01030 |
| <a name="l01031"></a>01031 EXECUTE 'DROP TABLE IF EXISTS ' || breakup_tbl_name; |
| <a name="l01032"></a>01032 |
| <a name="l01033"></a>01033 m4_changequote(`>>>', `<<<') |
| <a name="l01034"></a>01034 m4_ifdef(>>>__GREENPLUM_GE_4_2_1__<<<, >>> |
| <a name="l01035"></a>01035 -- if the DB is GPDB and its <a class="code" href="utilities_8sql__in.html#a9ce7ffa76acc8a0638e9bc48be99f36a" title="Return MADlib build information.">version</a> is greater than or equal |
| <a name="l01036"></a>01036 -- to 4.2, then we will use RLE compression for the encoded table. |
| <a name="l01037"></a>01037 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01038"></a>01038 ( |
| <a name="l01039"></a>01039 'CREATE TEMP TABLE % |
| <a name="l01040"></a>01040 ( |
| <a name="l01041"></a>01041 <span class="keywordtype">id</span> BIGINT ENCODING (compresstype=RLE_TYPE), |
| <a name="l01042"></a>01042 fid INT ENCODING (compresstype=RLE_TYPE), |
| <a name="l01043"></a>01043 fval TEXT ENCODING (compresstype=RLE_TYPE), |
| <a name="l01044"></a>01044 is_cont BOOL ENCODING (compresstype=RLE_TYPE), |
| <a name="l01045"></a>01045 class INT ENCODING (compresstype=RLE_TYPE) |
| <a name="l01046"></a>01046 ) |
| <a name="l01047"></a>01047 WITH(appendonly=true, orientation=column) |
| <a name="l01048"></a>01048 DISTRIBUTED BY(<span class="keywordtype">id</span>)', |
| <a name="l01049"></a>01049 ARRAY[ |
| <a name="l01050"></a>01050 breakup_tbl_name |
| <a name="l01051"></a>01051 ] |
| <a name="l01052"></a>01052 ); |
| <a name="l01053"></a>01053 <<<, >>> |
| <a name="l01054"></a>01054 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01055"></a>01055 ( |
| <a name="l01056"></a>01056 'CREATE TEMP TABLE % |
| <a name="l01057"></a>01057 ( |
| <a name="l01058"></a>01058 <span class="keywordtype">id</span> BIGINT, |
| <a name="l01059"></a>01059 fid INT, |
| <a name="l01060"></a>01060 fval TEXT, |
| <a name="l01061"></a>01061 is_cont BOOL, |
| <a name="l01062"></a>01062 class INT |
| <a name="l01063"></a>01063 ) |
| <a name="l01064"></a>01064 m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (<span class="keywordtype">id</span>)')', |
| <a name="l01065"></a>01065 ARRAY[ |
| <a name="l01066"></a>01066 breakup_tbl_name |
| <a name="l01067"></a>01067 ] |
| <a name="l01068"></a>01068 ); |
| <a name="l01069"></a>01069 <<<) |
| <a name="l01070"></a>01070 m4_changequote(>>>`<<<, >>>'<<<) |
| <a name="l01071"></a>01071 |
| <a name="l01072"></a>01072 EXECUTE curstmt; |
| <a name="l01073"></a>01073 |
| <a name="l01074"></a>01074 -- the supported missing value representation (' ', '?' and NULL) will |
| <a name="l01075"></a>01075 -- be replace with NULL for easy processing later. |
| <a name="l01076"></a>01076 -- the function __to_char is needed because on some databases an explicit |
| <a name="l01077"></a>01077 -- cast to text is unavailable. |
| <a name="l01078"></a>01078 IF (h2hmv_routine_id = 1) THEN |
| <a name="l01079"></a>01079 where_txt = ' WHERE NULLIF(NULLIF(btrim(fval, '' ''), ''?''), '''') IS NOT NULL'; |
| <a name="l01080"></a>01080 ELSE |
| <a name="l01081"></a>01081 fval_txt = ' CASE WHEN NULLIF(NULLIF(btrim(fval, '' ''), ''?''), '''') IS NULL THEN |
| <a name="l01082"></a>01082 NULL |
| <a name="l01083"></a>01083 ELSE |
| <a name="l01084"></a>01084 fval |
| <a name="l01085"></a>01085 END '; |
| <a name="l01086"></a>01086 END IF; |
| <a name="l01087"></a>01087 |
| <a name="l01088"></a>01088 IF (cls_col_name IS NULL) THEN |
| <a name="l01089"></a>01089 -- if the kv_cls_name is null, then the class column will be null |
| <a name="l01090"></a>01090 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01091"></a>01091 ( |
| <a name="l01092"></a>01092 'INSERT INTO %(<span class="keywordtype">id</span>, fid, fval, is_cont, class) |
| <a name="l01093"></a>01093 SELECT <span class="keywordtype">id</span>, fid, % as fval, is_cont, class |
| <a name="l01094"></a>01094 FROM |
| <a name="l01095"></a>01095 ( |
| <a name="l01096"></a>01096 SELECT %, generate_series(1, %) as fid, |
| <a name="l01097"></a>01097 unnest(array[MADLIB_SCHEMA.__to_char(%)]) as fval, |
| <a name="l01098"></a>01098 unnest(array[''%''::BOOL]::BOOL[]) as is_cont, NULL as class |
| <a name="l01099"></a>01099 FROM |
| <a name="l01100"></a>01100 % t1 |
| <a name="l01101"></a>01101 ) t |
| <a name="l01102"></a>01102 %', |
| <a name="l01103"></a>01103 ARRAY[ |
| <a name="l01104"></a>01104 breakup_tbl_name, |
| <a name="l01105"></a>01105 fval_txt, |
| <a name="l01106"></a>01106 id_col_name, |
| <a name="l01107"></a>01107 array_upper(is_conts, 1)::TEXT, |
| <a name="l01108"></a>01108 array_to_string(attr_col_names, '), MADLIB_SCHEMA.__to_char('), |
| <a name="l01109"></a>01109 array_to_string(is_conts, ''','''), |
| <a name="l01110"></a>01110 input_tbl_name, |
| <a name="l01111"></a>01111 where_txt |
| <a name="l01112"></a>01112 ] |
| <a name="l01113"></a>01113 ); |
| <a name="l01114"></a>01114 |
| <a name="l01115"></a>01115 ELSE |
| <a name="l01116"></a>01116 -- for scoring, as the class column may have some values which are not |
| <a name="l01117"></a>01117 -- appear in the training table, we need use left join here to ensure all |
| <a name="l01118"></a>01118 -- the rows of the input table were breakup. Here, we simple encode those |
| <a name="l01119"></a>01119 -- values to 0. Therefore, during scoring, the samples with 0 (encoded |
| <a name="l01120"></a>01120 -- value) as class label will be recognized as mis-classified. |
| <a name="l01121"></a>01121 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01122"></a>01122 ( |
| <a name="l01123"></a>01123 'INSERT INTO %(<span class="keywordtype">id</span>, fid, fval, is_cont, class) |
| <a name="l01124"></a>01124 SELECT <span class="keywordtype">id</span>, fid, % as fval, is_cont, class |
| <a name="l01125"></a>01125 FROM |
| <a name="l01126"></a>01126 ( |
| <a name="l01127"></a>01127 SELECT %, generate_series(1, %) as fid, |
| <a name="l01128"></a>01128 unnest(array[MADLIB_SCHEMA.__to_char(%)]) as fval, |
| <a name="l01129"></a>01129 unnest(array[''%''::BOOL]::BOOL[]) as is_cont, |
| <a name="l01130"></a>01130 coalesce(code, 0::INT) as class |
| <a name="l01131"></a>01131 FROM |
| <a name="l01132"></a>01132 % t1 LEFT JOIN % t2 |
| <a name="l01133"></a>01133 ON MADLIB_SCHEMA.__to_char(t1.%) = t2.fval |
| <a name="l01134"></a>01134 ) t |
| <a name="l01135"></a>01135 %', |
| <a name="l01136"></a>01136 ARRAY[ |
| <a name="l01137"></a>01137 breakup_tbl_name, |
| <a name="l01138"></a>01138 fval_txt, |
| <a name="l01139"></a>01139 id_col_name, |
| <a name="l01140"></a>01140 array_upper(is_conts, 1)::TEXT, |
| <a name="l01141"></a>01141 array_to_string(attr_col_names, '), MADLIB_SCHEMA.__to_char('), |
| <a name="l01142"></a>01142 array_to_string(is_conts, ''','''), |
| <a name="l01143"></a>01143 input_tbl_name, |
| <a name="l01144"></a>01144 kv_cls_name, |
| <a name="l01145"></a>01145 cls_col_name, |
| <a name="l01146"></a>01146 where_txt |
| <a name="l01147"></a>01147 ] |
| <a name="l01148"></a>01148 ); |
| <a name="l01149"></a>01149 END IF; |
| <a name="l01150"></a>01150 |
| <a name="l01151"></a>01151 EXECUTE curstmt; |
| <a name="l01152"></a>01152 |
| <a name="l01153"></a>01153 IF (verbosity > 0) THEN |
| <a name="l01154"></a>01154 RAISE INFO '%', curstmt; |
| <a name="l01155"></a>01155 RAISE INFO 'time of breaking up the training table:%', |
| <a name="l01156"></a>01156 clock_timestamp() - exec_begin; |
| <a name="l01157"></a>01157 END IF; |
| <a name="l01158"></a>01158 END |
| <a name="l01159"></a>01159 $$ LANGUAGE PLPGSQL; |
| <a name="l01160"></a>01160 |
| <a name="l01161"></a>01161 |
| <a name="l01162"></a>01162 <span class="comment">/*</span> |
| <a name="l01163"></a>01163 <span class="comment"> * @brief Generate the vertical encoded table from the breakup table.</span> |
| <a name="l01164"></a>01164 <span class="comment"> *</span> |
| <a name="l01165"></a>01165 <span class="comment"> * @param breakup_tbl_name The full name of the breakup table.</span> |
| <a name="l01166"></a>01166 <span class="comment"> * @param enc_tbl_name The name of the encoded table. its schema is:</span> |
| <a name="l01167"></a>01167 <span class="comment"> * id BIGINT, </span> |
| <a name="l01168"></a>01168 <span class="comment"> * fid INT, </span> |
| <a name="l01169"></a>01169 <span class="comment"> * fval FLOAT8, </span> |
| <a name="l01170"></a>01170 <span class="comment"> * is_cont BOOL, </span> |
| <a name="l01171"></a>01171 <span class="comment"> * class INT</span> |
| <a name="l01172"></a>01172 <span class="comment"> * @param kv_attr_name The name of the key-value table contains the encoded</span> |
| <a name="l01173"></a>01173 <span class="comment"> * result for all the features. For continuous feature,</span> |
| <a name="l01174"></a>01174 <span class="comment"> * it kept the average value of it if in 'explicit' mode;</span> |
| <a name="l01175"></a>01175 <span class="comment"> * nothing will kept if in 'ignore' mode.</span> |
| <a name="l01176"></a>01176 <span class="comment"> * @param is_tbl_tmp If ture we will create the encoded table as a temp one.</span> |
| <a name="l01177"></a>01177 <span class="comment"> * @param verbosity > 0 means this function runs in verbose mode. </span> |
| <a name="l01178"></a>01178 <span class="comment"> *</span> |
| <a name="l01179"></a>01179 <span class="comment"> */</span> |
| <a name="l01180"></a>01180 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gen_vertical_encoded_table |
| <a name="l01181"></a>01181 ( |
| <a name="l01182"></a>01182 breakup_tbl_name TEXT, |
| <a name="l01183"></a>01183 enc_tbl_name TEXT, |
| <a name="l01184"></a>01184 kv_attr_name TEXT, |
| <a name="l01185"></a>01185 is_tbl_tmp BOOL, |
| <a name="l01186"></a>01186 verbosity INT |
| <a name="l01187"></a>01187 ) |
| <a name="l01188"></a>01188 RETURNS VOID AS $$ |
| <a name="l01189"></a>01189 DECLARE |
| <a name="l01190"></a>01190 curstmt TEXT; |
| <a name="l01191"></a>01191 exec_begin TIMESTAMP; |
| <a name="l01192"></a>01192 tmp_txt TEXT = ''; |
| <a name="l01193"></a>01193 BEGIN |
| <a name="l01194"></a>01194 IF (is_tbl_tmp) THEN |
| <a name="l01195"></a>01195 tmp_txt = ' TEMP '; |
| <a name="l01196"></a>01196 END IF; |
| <a name="l01197"></a>01197 |
| <a name="l01198"></a>01198 EXECUTE 'DROP TABLE IF EXISTS ' || enc_tbl_name; |
| <a name="l01199"></a>01199 |
| <a name="l01200"></a>01200 m4_changequote(`>>>', `<<<') |
| <a name="l01201"></a>01201 m4_ifdef(>>>__GREENPLUM_GE_4_2_1__<<<, >>> |
| <a name="l01202"></a>01202 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01203"></a>01203 ( |
| <a name="l01204"></a>01204 'CREATE % TABLE % |
| <a name="l01205"></a>01205 ( |
| <a name="l01206"></a>01206 <span class="keywordtype">id</span> BIGINT ENCODING (compresstype=RLE_TYPE), |
| <a name="l01207"></a>01207 fid INT ENCODING (compresstype=RLE_TYPE), |
| <a name="l01208"></a>01208 fval FLOAT8 ENCODING (compresstype=RLE_TYPE), |
| <a name="l01209"></a>01209 is_cont BOOL ENCODING (compresstype=RLE_TYPE), |
| <a name="l01210"></a>01210 class INT ENCODING (compresstype=RLE_TYPE) |
| <a name="l01211"></a>01211 ) |
| <a name="l01212"></a>01212 WITH(appendonly=true, orientation=column) |
| <a name="l01213"></a>01213 DISTRIBUTED BY(<span class="keywordtype">id</span>)', |
| <a name="l01214"></a>01214 ARRAY[ |
| <a name="l01215"></a>01215 tmp_txt, |
| <a name="l01216"></a>01216 enc_tbl_name |
| <a name="l01217"></a>01217 ] |
| <a name="l01218"></a>01218 ); |
| <a name="l01219"></a>01219 <<<, >>> |
| <a name="l01220"></a>01220 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01221"></a>01221 ( |
| <a name="l01222"></a>01222 'CREATE % TABLE % |
| <a name="l01223"></a>01223 ( |
| <a name="l01224"></a>01224 <span class="keywordtype">id</span> BIGINT, |
| <a name="l01225"></a>01225 fid INT, |
| <a name="l01226"></a>01226 fval FLOAT8, |
| <a name="l01227"></a>01227 is_cont BOOL, |
| <a name="l01228"></a>01228 class INT |
| <a name="l01229"></a>01229 ) |
| <a name="l01230"></a>01230 m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (<span class="keywordtype">id</span>)')', |
| <a name="l01231"></a>01231 ARRAY[ |
| <a name="l01232"></a>01232 tmp_txt, |
| <a name="l01233"></a>01233 enc_tbl_name |
| <a name="l01234"></a>01234 ] |
| <a name="l01235"></a>01235 ); |
| <a name="l01236"></a>01236 <<<) |
| <a name="l01237"></a>01237 m4_changequote(>>>`<<<, >>>'<<<) |
| <a name="l01238"></a>01238 |
| <a name="l01239"></a>01239 IF (verbosity > 0) THEN |
| <a name="l01240"></a>01240 RAISE INFO '%', curstmt; |
| <a name="l01241"></a>01241 END IF; |
| <a name="l01242"></a>01242 EXECUTE curstmt; |
| <a name="l01243"></a>01243 |
| <a name="l01244"></a>01244 -- Generating the encoded table through join the breakup table with |
| <a name="l01245"></a>01245 -- the KV table for all the features |
| <a name="l01246"></a>01246 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01247"></a>01247 ( |
| <a name="l01248"></a>01248 'INSERT INTO %(<span class="keywordtype">id</span>, fid, fval, is_cont, class) |
| <a name="l01249"></a>01249 SELECT p.<span class="keywordtype">id</span> AS <span class="keywordtype">id</span>, p.fid AS fid, |
| <a name="l01250"></a>01250 CASE WHEN (p.is_cont AND p.fval IS NOT NULL) THEN |
| <a name="l01251"></a>01251 p.fval::FLOAT8 |
| <a name="l01252"></a>01252 ELSE |
| <a name="l01253"></a>01253 m.code::FLOAT8 |
| <a name="l01254"></a>01254 END AS fval, |
| <a name="l01255"></a>01255 p.is_cont AS is_cont, |
| <a name="l01256"></a>01256 p.class::INT AS class |
| <a name="l01257"></a>01257 FROM |
| <a name="l01258"></a>01258 % p LEFT JOIN % m |
| <a name="l01259"></a>01259 ON |
| <a name="l01260"></a>01260 m.fid = p.fid AND |
| <a name="l01261"></a>01261 (coalesce(m.fval, '''') = (coalesce(p.fval, '''')))', |
| <a name="l01262"></a>01262 ARRAY[ |
| <a name="l01263"></a>01263 enc_tbl_name, |
| <a name="l01264"></a>01264 breakup_tbl_name, |
| <a name="l01265"></a>01265 kv_attr_name |
| <a name="l01266"></a>01266 ] |
| <a name="l01267"></a>01267 ); |
| <a name="l01268"></a>01268 |
| <a name="l01269"></a>01269 IF (verbosity > 0) THEN |
| <a name="l01270"></a>01270 RAISE INFO '%', curstmt; |
| <a name="l01271"></a>01271 END IF; |
| <a name="l01272"></a>01272 EXECUTE curstmt; |
| <a name="l01273"></a>01273 END |
| <a name="l01274"></a>01274 $$ LANGUAGE PLPGSQL; |
| <a name="l01275"></a>01275 |
| <a name="l01276"></a>01276 |
| <a name="l01277"></a>01277 <span class="comment">/*</span> |
| <a name="l01278"></a>01278 <span class="comment"> * @brief Generate the horizontal table from a given vertical table.</span> |
| <a name="l01279"></a>01279 <span class="comment"> *</span> |
| <a name="l01280"></a>01280 <span class="comment"> * @param hor_tbl_name The full name of the horizontal table.</span> |
| <a name="l01281"></a>01281 <span class="comment"> * @param ver_tbl_name The full name of the vertical table.</span> |
| <a name="l01282"></a>01282 <span class="comment"> * @param meta_tbl_name The full name of the meta data table.</span> |
| <a name="l01283"></a>01283 <span class="comment"> * @param verbosity > 0 means this function runs in verbose mode. </span> |
| <a name="l01284"></a>01284 <span class="comment"> *</span> |
| <a name="l01285"></a>01285 <span class="comment"> */</span> |
| <a name="l01286"></a>01286 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gen_horizontal_encoded_table |
| <a name="l01287"></a>01287 ( |
| <a name="l01288"></a>01288 hor_tbl_name TEXT, |
| <a name="l01289"></a>01289 ver_tbl_name TEXT, |
| <a name="l01290"></a>01290 attr_count INT, |
| <a name="l01291"></a>01291 verbosity INT |
| <a name="l01292"></a>01292 ) |
| <a name="l01293"></a>01293 RETURNS VOID AS $$ |
| <a name="l01294"></a>01294 DECLARE |
| <a name="l01295"></a>01295 curstmt TEXT; |
| <a name="l01296"></a>01296 exec_begin TIMESTAMP; |
| <a name="l01297"></a>01297 BEGIN |
| <a name="l01298"></a>01298 exec_begin = clock_timestamp(); |
| <a name="l01299"></a>01299 |
| <a name="l01300"></a>01300 EXECUTE 'DROP TABLE IF EXISTS ' || hor_tbl_name; |
| <a name="l01301"></a>01301 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01302"></a>01302 ( |
| <a name="l01303"></a>01303 'CREATE TEMP TABLE %(<span class="keywordtype">id</span>, fvals, class) AS |
| <a name="l01304"></a>01304 SELECT |
| <a name="l01305"></a>01305 <span class="keywordtype">id</span>, |
| <a name="l01306"></a>01306 MADLIB_SCHEMA.__array_indexed_agg(fval, %, fid) as fvals, |
| <a name="l01307"></a>01307 min(class)::INT as class |
| <a name="l01308"></a>01308 FROM % |
| <a name="l01309"></a>01309 GROUP BY <span class="keywordtype">id</span> |
| <a name="l01310"></a>01310 m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (<span class="keywordtype">id</span>)')', |
| <a name="l01311"></a>01311 ARRAY[ |
| <a name="l01312"></a>01312 hor_tbl_name, |
| <a name="l01313"></a>01313 attr_count::TEXT, |
| <a name="l01314"></a>01314 ver_tbl_name |
| <a name="l01315"></a>01315 ] |
| <a name="l01316"></a>01316 ); |
| <a name="l01317"></a>01317 EXECUTE curstmt; |
| <a name="l01318"></a>01318 |
| <a name="l01319"></a>01319 IF (verbosity > 0) THEN |
| <a name="l01320"></a>01320 RAISE INFO 'time of generating horizontal table from vertical table:%', |
| <a name="l01321"></a>01321 clock_timestamp() - exec_begin; |
| <a name="l01322"></a>01322 END IF; |
| <a name="l01323"></a>01323 END |
| <a name="l01324"></a>01324 $$ LANGUAGE PLPGSQL; |
| <a name="l01325"></a>01325 |
| <a name="l01326"></a>01326 |
| <a name="l01327"></a>01327 <span class="comment">/*</span> |
| <a name="l01328"></a>01328 <span class="comment"> * @brief Encode the continuous and discrete features and the class column.</span> |
| <a name="l01329"></a>01329 <span class="comment"> * In 'ignore' mode, for each discrete feature/class, we will use </span> |
| <a name="l01330"></a>01330 <span class="comment"> * continuous integer to encode each distinct value (null value </span> |
| <a name="l01331"></a>01331 <span class="comment"> * will be excluded). Continuous feature will not be processed. </span> |
| <a name="l01332"></a>01332 <span class="comment"> * In 'explicit' mode, null value will be included for discrete </span> |
| <a name="l01333"></a>01333 <span class="comment"> * feature. For continuous feature, null value will be replaced by</span> |
| <a name="l01334"></a>01334 <span class="comment"> * the average value of this feature.</span> |
| <a name="l01335"></a>01335 <span class="comment"> * </span> |
| <a name="l01336"></a>01336 <span class="comment"> * @param kv_attr_name The name of the key-value table contains the encoded</span> |
| <a name="l01337"></a>01337 <span class="comment"> * result for all the features. For continuous feature,</span> |
| <a name="l01338"></a>01338 <span class="comment"> * it kept the average value of it if in 'explicit' mode;</span> |
| <a name="l01339"></a>01339 <span class="comment"> * nothing will kept if in 'ignore' mode.</span> |
| <a name="l01340"></a>01340 <span class="comment"> * @param breakup_tbl_name The name of the breakup table from raw training table.</span> |
| <a name="l01341"></a>01341 <span class="comment"> * @param h2hmv_routine_id The ID of the routine which specifies </span> |
| <a name="l01342"></a>01342 <span class="comment"> * How to handle missing value(h2hmv).</span> |
| <a name="l01343"></a>01343 <span class="comment"> * @param verbosity > 0 means this function runs in verbose mode.</span> |
| <a name="l01344"></a>01344 <span class="comment"> *</span> |
| <a name="l01345"></a>01345 <span class="comment"> *</span> |
| <a name="l01346"></a>01346 <span class="comment"> */</span> |
| <a name="l01347"></a>01347 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__encode_columns |
| <a name="l01348"></a>01348 ( |
| <a name="l01349"></a>01349 kv_attr_name TEXT, |
| <a name="l01350"></a>01350 breakup_tbl_name TEXT, |
| <a name="l01351"></a>01351 h2hmv_routine_id INT, |
| <a name="l01352"></a>01352 verbosity INT |
| <a name="l01353"></a>01353 ) |
| <a name="l01354"></a>01354 RETURNS VOID AS $$ |
| <a name="l01355"></a>01355 DECLARE |
| <a name="l01356"></a>01356 curstmt TEXT; |
| <a name="l01357"></a>01357 tmp_txt TEXT = ''; |
| <a name="l01358"></a>01358 BEGIN |
| <a name="l01359"></a>01359 |
| <a name="l01360"></a>01360 -- This table will be used to generate the KV table |
| <a name="l01361"></a>01361 -- for the discrete features and retrieve the number |
| <a name="l01362"></a>01362 -- of distinct values for a feature outside of this |
| <a name="l01363"></a>01363 -- function. Therefore, don't drop this table in this |
| <a name="l01364"></a>01364 -- function. |
| <a name="l01365"></a>01365 EXECUTE 'DROP TABLE IF EXISTS tmp_dist_table'; |
| <a name="l01366"></a>01366 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01367"></a>01367 ( |
| <a name="l01368"></a>01368 'CREATE TEMP TABLE tmp_dist_table AS |
| <a name="l01369"></a>01369 SELECT fid, fval, is_cont |
| <a name="l01370"></a>01370 FROM % |
| <a name="l01371"></a>01371 GROUP BY fid, fval, is_cont', |
| <a name="l01372"></a>01372 ARRAY[ |
| <a name="l01373"></a>01373 breakup_tbl_name |
| <a name="l01374"></a>01374 ] |
| <a name="l01375"></a>01375 ); |
| <a name="l01376"></a>01376 IF (verbosity > 0) THEN |
| <a name="l01377"></a>01377 RAISE INFO '%', curstmt; |
| <a name="l01378"></a>01378 END IF; |
| <a name="l01379"></a>01379 |
| <a name="l01380"></a>01380 EXECUTE curstmt; |
| <a name="l01381"></a>01381 |
| <a name="l01382"></a>01382 -- create the KV table for all the features and |
| <a name="l01383"></a>01383 -- populate the keys of the discrete features |
| <a name="l01384"></a>01384 -- to the table. |
| <a name="l01385"></a>01385 EXECUTE 'DROP TABLE IF EXISTS ' || kv_attr_name; |
| <a name="l01386"></a>01386 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01387"></a>01387 ( |
| <a name="l01388"></a>01388 'CREATE TABLE %(fid, fval, code) AS |
| <a name="l01389"></a>01389 SELECT |
| <a name="l01390"></a>01390 fid, |
| <a name="l01391"></a>01391 fval, |
| <a name="l01392"></a>01392 (rank() OVER (PARTITION BY fid ORDER BY fval))::FLOAT8 AS code |
| <a name="l01393"></a>01393 FROM tmp_dist_table |
| <a name="l01394"></a>01394 WHERE (NOT is_cont) |
| <a name="l01395"></a>01395 m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (fid, fval)')', |
| <a name="l01396"></a>01396 ARRAY[ |
| <a name="l01397"></a>01397 kv_attr_name |
| <a name="l01398"></a>01398 ] |
| <a name="l01399"></a>01399 ); |
| <a name="l01400"></a>01400 IF (verbosity > 0) THEN |
| <a name="l01401"></a>01401 RAISE INFO '%', curstmt; |
| <a name="l01402"></a>01402 END IF; |
| <a name="l01403"></a>01403 EXECUTE curstmt; |
| <a name="l01404"></a>01404 |
| <a name="l01405"></a>01405 -- In "explicit" mode, we need to replace the missing |
| <a name="l01406"></a>01406 -- value with the average value. Therefore, we keep |
| <a name="l01407"></a>01407 -- those values to the KV table. |
| <a name="l01408"></a>01408 IF (h2hmv_routine_id = 2) THEN |
| <a name="l01409"></a>01409 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01410"></a>01410 ( |
| <a name="l01411"></a>01411 'INSERT INTO %(fid, fval, code) |
| <a name="l01412"></a>01412 SELECT |
| <a name="l01413"></a>01413 fid, |
| <a name="l01414"></a>01414 null, |
| <a name="l01415"></a>01415 coalesce(<a class="code" href="linalg_8sql__in.html#a1aa37f73fb1cd8d7d106aa518dd8c0b4" title="Compute the average of vectors.">avg</a>(fval::FLOAT8), 0.0) |
| <a name="l01416"></a>01416 FROM |
| <a name="l01417"></a>01417 % s |
| <a name="l01418"></a>01418 WHERE is_cont |
| <a name="l01419"></a>01419 GROUP BY fid', |
| <a name="l01420"></a>01420 ARRAY[ |
| <a name="l01421"></a>01421 kv_attr_name, |
| <a name="l01422"></a>01422 breakup_tbl_name |
| <a name="l01423"></a>01423 ] |
| <a name="l01424"></a>01424 ); |
| <a name="l01425"></a>01425 IF (verbosity > 0) THEN |
| <a name="l01426"></a>01426 RAISE INFO '%', curstmt; |
| <a name="l01427"></a>01427 END IF; |
| <a name="l01428"></a>01428 |
| <a name="l01429"></a>01429 EXECUTE curstmt; |
| <a name="l01430"></a>01430 END IF; |
| <a name="l01431"></a>01431 END |
| <a name="l01432"></a>01432 $$ LANGUAGE PLPGSQL; |
| <a name="l01433"></a>01433 |
| <a name="l01434"></a>01434 |
| <a name="l01435"></a>01435 <span class="comment">/*</span> |
| <a name="l01436"></a>01436 <span class="comment"> * @brief Encode a table for training in C4.5 and RF.</span> |
| <a name="l01437"></a>01437 <span class="comment"> *</span> |
| <a name="l01438"></a>01438 <span class="comment"> * @param input_tbl_name The full name of the input table.</span> |
| <a name="l01439"></a>01439 <span class="comment"> * @param id_col_name The name of id column.</span> |
| <a name="l01440"></a>01440 <span class="comment"> * @param feature_names An array contains all the feature. If it's null, </span> |
| <a name="l01441"></a>01441 <span class="comment"> * we will get all the columns of the input table.</span> |
| <a name="l01442"></a>01442 <span class="comment"> * @param cls_col_name The name of class column.</span> |
| <a name="l01443"></a>01443 <span class="comment"> * @param cont_attr_names An array contains all the continuous feature.</span> |
| <a name="l01444"></a>01444 <span class="comment"> * Null means no continuous feature.</span> |
| <a name="l01445"></a>01445 <span class="comment"> * @param enc_table_name The full name of the encoded table.</span> |
| <a name="l01446"></a>01446 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l01447"></a>01447 <span class="comment"> * @param h2hmv_routine_id The ID of the routine which specifies </span> |
| <a name="l01448"></a>01448 <span class="comment"> * How to handle missing value(h2hmv).</span> |
| <a name="l01449"></a>01449 <span class="comment"> * @param verbosity > 0 means this function runs in verbose mode. </span> |
| <a name="l01450"></a>01450 <span class="comment"> *</span> |
| <a name="l01451"></a>01451 <span class="comment"> */</span> |
| <a name="l01452"></a>01452 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__encode_table |
| <a name="l01453"></a>01453 ( |
| <a name="l01454"></a>01454 input_tbl_name TEXT, |
| <a name="l01455"></a>01455 id_col_name TEXT, |
| <a name="l01456"></a>01456 feature_names TEXT[], |
| <a name="l01457"></a>01457 cls_col_name TEXT, |
| <a name="l01458"></a>01458 cont_attr_names TEXT[], |
| <a name="l01459"></a>01459 enc_table_name TEXT, |
| <a name="l01460"></a>01460 meta_tbl_name TEXT, |
| <a name="l01461"></a>01461 h2hmv_routine_id INT, |
| <a name="l01462"></a>01462 verbosity INT |
| <a name="l01463"></a>01463 ) |
| <a name="l01464"></a>01464 RETURNS VOID AS $$ |
| <a name="l01465"></a>01465 DECLARE |
| <a name="l01466"></a>01466 curstmt TEXT := ''; |
| <a name="l01467"></a>01467 attr_col_names TEXT[]; |
| <a name="l01468"></a>01468 lit_attr_col_names TEXT[]; |
| <a name="l01469"></a>01469 kv_attr_name TEXT := enc_table_name || '_col'; |
| <a name="l01470"></a>01470 kv_cls_name TEXT := enc_table_name || '_class'; |
| <a name="l01471"></a>01471 is_conts BOOL[]; |
| <a name="l01472"></a>01472 breakup_tbl_name TEXT := 'tmp_breakup_table'; |
| <a name="l01473"></a>01473 exec_begin TIMESTAMP; |
| <a name="l01474"></a>01474 ret MADLIB_SCHEMA.__enc_tbl_result; |
| <a name="l01475"></a>01475 BEGIN |
| <a name="l01476"></a>01476 exec_begin = clock_timestamp(); |
| <a name="l01477"></a>01477 |
| <a name="l01478"></a>01478 -- validate the training table |
| <a name="l01479"></a>01479 PERFORM MADLIB_SCHEMA.__validate_input_table |
| <a name="l01480"></a>01480 ( |
| <a name="l01481"></a>01481 input_tbl_name, |
| <a name="l01482"></a>01482 feature_names, |
| <a name="l01483"></a>01483 id_col_name, |
| <a name="l01484"></a>01484 cls_col_name |
| <a name="l01485"></a>01485 ); |
| <a name="l01486"></a>01486 |
| <a name="l01487"></a>01487 -- create metatable |
| <a name="l01488"></a>01488 PERFORM MADLIB_SCHEMA.__create_metatable(meta_tbl_name); |
| <a name="l01489"></a>01489 |
| <a name="l01490"></a>01490 -- retrieve all the features' names |
| <a name="l01491"></a>01491 IF (feature_names IS NULL) THEN |
| <a name="l01492"></a>01492 m4_changequote(`>>>', `<<<') |
| <a name="l01493"></a>01493 m4_ifdef(`__HAS_ORDERED_AGGREGATES__', >>> |
| <a name="l01494"></a>01494 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01495"></a>01495 ( |
| <a name="l01496"></a>01496 'SELECT <a class="code" href="array__ops_8sql__in.html#af9f60293134ba4ce05b1f97f6faba822" title="ARRAY_AGG aggregate for compatibility with GPDB < 4.1 and Postgres < 9.0 This is a slower solution th...">array_agg</a>(quote_ident(attname) ORDER BY attname) as attnames |
| <a name="l01497"></a>01497 FROM pg_attribute |
| <a name="l01498"></a>01498 WHERE attrelid = ''%''::regclass and attnum > 0 AND |
| <a name="l01499"></a>01499 attname <> ''%'' AND |
| <a name="l01500"></a>01500 attname <> ''%'' AND |
| <a name="l01501"></a>01501 NOT attisdropped;', |
| <a name="l01502"></a>01502 ARRAY[ |
| <a name="l01503"></a>01503 input_tbl_name, |
| <a name="l01504"></a>01504 id_col_name, |
| <a name="l01505"></a>01505 cls_col_name |
| <a name="l01506"></a>01506 ] |
| <a name="l01507"></a>01507 ); |
| <a name="l01508"></a>01508 |
| <a name="l01509"></a>01509 EXECUTE curstmt INTO attr_col_names; |
| <a name="l01510"></a>01510 <<<, >>> |
| <a name="l01511"></a>01511 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01512"></a>01512 ( |
| <a name="l01513"></a>01513 'SELECT ARRAY |
| <a name="l01514"></a>01514 ( |
| <a name="l01515"></a>01515 SELECT quote_ident(attname) |
| <a name="l01516"></a>01516 FROM pg_attribute |
| <a name="l01517"></a>01517 WHERE attrelid = ''%''::regclass and attnum > 0 AND |
| <a name="l01518"></a>01518 attname <> ''%'' AND |
| <a name="l01519"></a>01519 attname <> ''%'' AND |
| <a name="l01520"></a>01520 NOT attisdropped |
| <a name="l01521"></a>01521 ORDER BY attname |
| <a name="l01522"></a>01522 LIMIT ALL |
| <a name="l01523"></a>01523 )', |
| <a name="l01524"></a>01524 ARRAY[ |
| <a name="l01525"></a>01525 input_tbl_name, |
| <a name="l01526"></a>01526 id_col_name, |
| <a name="l01527"></a>01527 cls_col_name |
| <a name="l01528"></a>01528 ] |
| <a name="l01529"></a>01529 ); |
| <a name="l01530"></a>01530 EXECUTE curstmt INTO attr_col_names; |
| <a name="l01531"></a>01531 <<<) |
| <a name="l01532"></a>01532 m4_changequote(>>>`<<<, >>>'<<<) |
| <a name="l01533"></a>01533 ELSE |
| <a name="l01534"></a>01534 attr_col_names = MADLIB_SCHEMA.__array_sort(feature_names); |
| <a name="l01535"></a>01535 END IF; |
| <a name="l01536"></a>01536 |
| <a name="l01537"></a>01537 -- an array contains if a feature is continuous or not |
| <a name="l01538"></a>01538 -- the subscript is corresponding to the feature's ID |
| <a name="l01539"></a>01539 is_conts = MADLIB_SCHEMA.__array_elem_in(cont_attr_names, attr_col_names); |
| <a name="l01540"></a>01540 |
| <a name="l01541"></a>01541 ret.pre_proc_time = clock_timestamp() - exec_begin; |
| <a name="l01542"></a>01542 exec_begin = clock_timestamp(); |
| <a name="l01543"></a>01543 |
| <a name="l01544"></a>01544 -- create the KV table for the class column. |
| <a name="l01545"></a>01545 EXECUTE 'DROP TABLE IF EXISTS ' || kv_cls_name; |
| <a name="l01546"></a>01546 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01547"></a>01547 ( |
| <a name="l01548"></a>01548 'CREATE TABLE % AS |
| <a name="l01549"></a>01549 SELECT 0 as fid, |
| <a name="l01550"></a>01550 MADLIB_SCHEMA.__to_char(%) AS fval, |
| <a name="l01551"></a>01551 rank() OVER (ORDER BY %) AS code |
| <a name="l01552"></a>01552 FROM |
| <a name="l01553"></a>01553 ( |
| <a name="l01554"></a>01554 SELECT % FROM % GROUP BY % |
| <a name="l01555"></a>01555 ) t |
| <a name="l01556"></a>01556 m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (fval)')', |
| <a name="l01557"></a>01557 ARRAY[ |
| <a name="l01558"></a>01558 kv_cls_name, |
| <a name="l01559"></a>01559 cls_col_name, |
| <a name="l01560"></a>01560 cls_col_name, |
| <a name="l01561"></a>01561 cls_col_name, |
| <a name="l01562"></a>01562 input_tbl_name, |
| <a name="l01563"></a>01563 cls_col_name |
| <a name="l01564"></a>01564 ] |
| <a name="l01565"></a>01565 ); |
| <a name="l01566"></a>01566 IF (verbosity > 0) THEN |
| <a name="l01567"></a>01567 RAISE INFO '%', curstmt; |
| <a name="l01568"></a>01568 END IF; |
| <a name="l01569"></a>01569 EXECUTE curstmt; |
| <a name="l01570"></a>01570 |
| <a name="l01571"></a>01571 ret.gen_kv_time = clock_timestamp() - exec_begin; |
| <a name="l01572"></a>01572 exec_begin = clock_timestamp(); |
| <a name="l01573"></a>01573 |
| <a name="l01574"></a>01574 -- breakup each record of the training table and keep the result |
| <a name="l01575"></a>01575 -- into a new table. |
| <a name="l01576"></a>01576 PERFORM MADLIB_SCHEMA.__breakup_table |
| <a name="l01577"></a>01577 ( |
| <a name="l01578"></a>01578 input_tbl_name, |
| <a name="l01579"></a>01579 breakup_tbl_name, |
| <a name="l01580"></a>01580 kv_cls_name, |
| <a name="l01581"></a>01581 id_col_name, |
| <a name="l01582"></a>01582 cls_col_name, |
| <a name="l01583"></a>01583 attr_col_names, |
| <a name="l01584"></a>01584 is_conts, |
| <a name="l01585"></a>01585 h2hmv_routine_id, |
| <a name="l01586"></a>01586 verbosity |
| <a name="l01587"></a>01587 ); |
| <a name="l01588"></a>01588 |
| <a name="l01589"></a>01589 ret.breakup_tbl_time= clock_timestamp() - exec_begin; |
| <a name="l01590"></a>01590 exec_begin = clock_timestamp(); |
| <a name="l01591"></a>01591 |
| <a name="l01592"></a>01592 -- generate the KV table for both continuous features |
| <a name="l01593"></a>01593 -- and discrete features. |
| <a name="l01594"></a>01594 PERFORM MADLIB_SCHEMA.__encode_columns |
| <a name="l01595"></a>01595 ( |
| <a name="l01596"></a>01596 kv_attr_name, |
| <a name="l01597"></a>01597 breakup_tbl_name, |
| <a name="l01598"></a>01598 h2hmv_routine_id, |
| <a name="l01599"></a>01599 verbosity |
| <a name="l01600"></a>01600 ); |
| <a name="l01601"></a>01601 |
| <a name="l01602"></a>01602 ret.gen_kv_time = ret.gen_kv_time + (clock_timestamp() - exec_begin); |
| <a name="l01603"></a>01603 exec_begin = clock_timestamp(); |
| <a name="l01604"></a>01604 |
| <a name="l01605"></a>01605 -- generate the encoded table using the breakup table |
| <a name="l01606"></a>01606 -- and KV table for all the features. |
| <a name="l01607"></a>01607 PERFORM MADLIB_SCHEMA.__gen_vertical_encoded_table |
| <a name="l01608"></a>01608 ( |
| <a name="l01609"></a>01609 breakup_tbl_name, |
| <a name="l01610"></a>01610 enc_table_name, |
| <a name="l01611"></a>01611 kv_attr_name, |
| <a name="l01612"></a>01612 'f'::BOOL, |
| <a name="l01613"></a>01613 verbosity |
| <a name="l01614"></a>01614 ); |
| <a name="l01615"></a>01615 |
| <a name="l01616"></a>01616 ret.gen_enc_time = clock_timestamp() - exec_begin; |
| <a name="l01617"></a>01617 exec_begin = clock_timestamp(); |
| <a name="l01618"></a>01618 |
| <a name="l01619"></a>01619 SELECT ARRAY(SELECT quote_literal(unnest(attr_col_names))) INTO lit_attr_col_names; |
| <a name="l01620"></a>01620 |
| <a name="l01621"></a>01621 -- put the features' meta information to the metatable |
| <a name="l01622"></a>01622 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01623"></a>01623 ( |
| <a name="l01624"></a>01624 'INSERT INTO % |
| <a name="l01625"></a>01625 SELECT fid as <span class="keywordtype">id</span>, (ARRAY[%])[fid] as column_name, |
| <a name="l01626"></a>01626 ''f'' as column_type, |
| <a name="l01627"></a>01627 is_cont, |
| <a name="l01628"></a>01628 ''%''::regclass::OID, |
| <a name="l01629"></a>01629 count(fid) as num_dist_value |
| <a name="l01630"></a>01630 FROM % t |
| <a name="l01631"></a>01631 GROUP BY fid, is_cont', |
| <a name="l01632"></a>01632 ARRAY[ |
| <a name="l01633"></a>01633 meta_tbl_name, |
| <a name="l01634"></a>01634 array_to_string(lit_attr_col_names, ','), |
| <a name="l01635"></a>01635 kv_attr_name, |
| <a name="l01636"></a>01636 'tmp_dist_table' |
| <a name="l01637"></a>01637 ] |
| <a name="l01638"></a>01638 ); |
| <a name="l01639"></a>01639 |
| <a name="l01640"></a>01640 EXECUTE curstmt; |
| <a name="l01641"></a>01641 |
| <a name="l01642"></a>01642 IF (h2hmv_routine_id = 1) THEN |
| <a name="l01643"></a>01643 -- retrieve the information of the columns (all the values in those |
| <a name="l01644"></a>01644 -- columns are missing), and insert them to the meta table. |
| <a name="l01645"></a>01645 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01646"></a>01646 ( |
| <a name="l01647"></a>01647 'INSERT INTO % |
| <a name="l01648"></a>01648 SELECT <span class="keywordtype">id</span>, (ARRAY[%])[<span class="keywordtype">id</span>] as column_name, |
| <a name="l01649"></a>01649 ''f'' as column_type, ''t'', NULL, 0 |
| <a name="l01650"></a>01650 FROM ( |
| <a name="l01651"></a>01651 SELECT generate_series(1, %) <span class="keywordtype">id</span> |
| <a name="l01652"></a>01652 EXCEPT |
| <a name="l01653"></a>01653 SELECT <span class="keywordtype">id</span> FROM % WHERE column_type = ''f'' |
| <a name="l01654"></a>01654 ) t', |
| <a name="l01655"></a>01655 ARRAY[ |
| <a name="l01656"></a>01656 meta_tbl_name, |
| <a name="l01657"></a>01657 array_to_string(lit_attr_col_names, ','), |
| <a name="l01658"></a>01658 array_upper(attr_col_names, 1)::TEXT, |
| <a name="l01659"></a>01659 meta_tbl_name |
| <a name="l01660"></a>01660 ] |
| <a name="l01661"></a>01661 ); |
| <a name="l01662"></a>01662 EXECUTE curstmt; |
| <a name="l01663"></a>01663 END IF; |
| <a name="l01664"></a>01664 |
| <a name="l01665"></a>01665 -- no need this table |
| <a name="l01666"></a>01666 EXECUTE 'DROP TABLE IF EXISTS tmp_dist_table'; |
| <a name="l01667"></a>01667 |
| <a name="l01668"></a>01668 -- put the class's meta information to the metatable |
| <a name="l01669"></a>01669 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01670"></a>01670 ( |
| <a name="l01671"></a>01671 'INSERT INTO % |
| <a name="l01672"></a>01672 SELECT 0 as <span class="keywordtype">id</span>,''%'', |
| <a name="l01673"></a>01673 ''c'' as column_type, |
| <a name="l01674"></a>01674 ''f''::BOOL, |
| <a name="l01675"></a>01675 ''%''::regclass::OID, |
| <a name="l01676"></a>01676 count(code) as num_dist_value |
| <a name="l01677"></a>01677 FROM % t |
| <a name="l01678"></a>01678 GROUP BY fid', |
| <a name="l01679"></a>01679 ARRAY[ |
| <a name="l01680"></a>01680 meta_tbl_name, |
| <a name="l01681"></a>01681 cls_col_name, |
| <a name="l01682"></a>01682 kv_cls_name, |
| <a name="l01683"></a>01683 kv_cls_name |
| <a name="l01684"></a>01684 ] |
| <a name="l01685"></a>01685 ); |
| <a name="l01686"></a>01686 |
| <a name="l01687"></a>01687 EXECUTE curstmt; |
| <a name="l01688"></a>01688 |
| <a name="l01689"></a>01689 -- put the <span class="keywordtype">id</span>'s meta information to the metatable |
| <a name="l01690"></a>01690 PERFORM MADLIB_SCHEMA.__insert_into_metatable |
| <a name="l01691"></a>01691 ( |
| <a name="l01692"></a>01692 meta_tbl_name, |
| <a name="l01693"></a>01693 array_upper(attr_col_names, 1) + 1, |
| <a name="l01694"></a>01694 id_col_name, |
| <a name="l01695"></a>01695 'i', 'f', NULL, 0 |
| <a name="l01696"></a>01696 ); |
| <a name="l01697"></a>01697 |
| <a name="l01698"></a>01698 -- analyze the table, so that later the optimizer has the statistics |
| <a name="l01699"></a>01699 -- information about this table |
| <a name="l01700"></a>01700 EXECUTE 'ANALYZE ' || enc_table_name; |
| <a name="l01701"></a>01701 |
| <a name="l01702"></a>01702 ret.post_proc_time = clock_timestamp() - exec_begin; |
| <a name="l01703"></a>01703 |
| <a name="l01704"></a>01704 IF (verbosity > 0) THEN |
| <a name="l01705"></a>01705 RAISE INFO 'time of encoding: %', ret; |
| <a name="l01706"></a>01706 END IF; |
| <a name="l01707"></a>01707 END |
| <a name="l01708"></a>01708 $$ LANGUAGE PLPGSQL; |
| <a name="l01709"></a>01709 |
| <a name="l01710"></a>01710 |
| <a name="l01711"></a>01711 <span class="comment">/*</span> |
| <a name="l01712"></a>01712 <span class="comment"> * @brief Encode a table for classification/scoring.</span> |
| <a name="l01713"></a>01713 <span class="comment"> *</span> |
| <a name="l01714"></a>01714 <span class="comment"> * @param input_tbl_name The full name of the input table.</span> |
| <a name="l01715"></a>01715 <span class="comment"> * @param enc_tbl_name The full name of the encoded table.</span> |
| <a name="l01716"></a>01716 <span class="comment"> * @param meta_tbl_name The full name of the metatable.</span> |
| <a name="l01717"></a>01717 <span class="comment"> * @param h2hmv_routine_id The ID of the routine which specifies </span> |
| <a name="l01718"></a>01718 <span class="comment"> * how to handle missing value(h2hmv). </span> |
| <a name="l01719"></a>01719 <span class="comment"> * @param verbosity > 0 means this function runs in verbose mode. </span> |
| <a name="l01720"></a>01720 <span class="comment"> *</span> |
| <a name="l01721"></a>01721 <span class="comment"> */</span> |
| <a name="l01722"></a>01722 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__encode_table |
| <a name="l01723"></a>01723 ( |
| <a name="l01724"></a>01724 input_tbl_name TEXT, |
| <a name="l01725"></a>01725 enc_tbl_name TEXT, |
| <a name="l01726"></a>01726 meta_tbl_name TEXT, |
| <a name="l01727"></a>01727 h2hmv_routine_id INT, |
| <a name="l01728"></a>01728 verbosity INT |
| <a name="l01729"></a>01729 ) |
| <a name="l01730"></a>01730 RETURNS VOID AS $$ |
| <a name="l01731"></a>01731 DECLARE |
| <a name="l01732"></a>01732 curstmt TEXT; |
| <a name="l01733"></a>01733 attr_col_names TEXT[]; |
| <a name="l01734"></a>01734 cls_col_name TEXT; |
| <a name="l01735"></a>01735 id_col_name TEXT; |
| <a name="l01736"></a>01736 kv_attr_name TEXT; |
| <a name="l01737"></a>01737 kv_cls_name TEXT; |
| <a name="l01738"></a>01738 is_conts BOOL[]; |
| <a name="l01739"></a>01739 exec_begin TIMESTAMP; |
| <a name="l01740"></a>01740 breakup_tbl_name TEXT := 'tmp_breakup_table'; |
| <a name="l01741"></a>01741 BEGIN |
| <a name="l01742"></a>01742 exec_begin = clock_timestamp(); |
| <a name="l01743"></a>01743 |
| <a name="l01744"></a>01744 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01745"></a>01745 ( |
| <a name="l01746"></a>01746 'SELECT column_name FROM % WHERE column_type=''i''', |
| <a name="l01747"></a>01747 meta_tbl_name |
| <a name="l01748"></a>01748 ); |
| <a name="l01749"></a>01749 EXECUTE curstmt INTO id_col_name; |
| <a name="l01750"></a>01750 |
| <a name="l01751"></a>01751 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01752"></a>01752 ( |
| <a name="l01753"></a>01753 'SELECT column_name FROM % WHERE column_type=''c''', |
| <a name="l01754"></a>01754 meta_tbl_name |
| <a name="l01755"></a>01755 ); |
| <a name="l01756"></a>01756 EXECUTE curstmt INTO cls_col_name; |
| <a name="l01757"></a>01757 |
| <a name="l01758"></a>01758 IF (NOT MADLIB_SCHEMA.__column_exists(input_tbl_name, cls_col_name)) THEN |
| <a name="l01759"></a>01759 cls_col_name = NULL; |
| <a name="l01760"></a>01760 END IF; |
| <a name="l01761"></a>01761 |
| <a name="l01762"></a>01762 m4_changequote(`>>>', `<<<') |
| <a name="l01763"></a>01763 m4_ifdef(`__HAS_ORDERED_AGGREGATES__', >>> |
| <a name="l01764"></a>01764 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01765"></a>01765 ( |
| <a name="l01766"></a>01766 'SELECT <a class="code" href="array__ops_8sql__in.html#af9f60293134ba4ce05b1f97f6faba822" title="ARRAY_AGG aggregate for compatibility with GPDB < 4.1 and Postgres < 9.0 This is a slower solution th...">array_agg</a>(column_name order by <span class="keywordtype">id</span>) |
| <a name="l01767"></a>01767 FROM % WHERE column_type=''f''', |
| <a name="l01768"></a>01768 meta_tbl_name |
| <a name="l01769"></a>01769 ); |
| <a name="l01770"></a>01770 EXECUTE curstmt INTO attr_col_names; |
| <a name="l01771"></a>01771 |
| <a name="l01772"></a>01772 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01773"></a>01773 ( |
| <a name="l01774"></a>01774 'SELECT |
| <a name="l01775"></a>01775 <a class="code" href="array__ops_8sql__in.html#af9f60293134ba4ce05b1f97f6faba822" title="ARRAY_AGG aggregate for compatibility with GPDB < 4.1 and Postgres < 9.0 This is a slower solution th...">array_agg</a>(is_cont order by <span class="keywordtype">id</span>) |
| <a name="l01776"></a>01776 FROM % |
| <a name="l01777"></a>01777 WHERE column_type=''f''', |
| <a name="l01778"></a>01778 meta_tbl_name |
| <a name="l01779"></a>01779 ); |
| <a name="l01780"></a>01780 EXECUTE curstmt INTO is_conts; |
| <a name="l01781"></a>01781 <<<, >>> |
| <a name="l01782"></a>01782 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01783"></a>01783 ( |
| <a name="l01784"></a>01784 'SELECT ARRAY |
| <a name="l01785"></a>01785 ( |
| <a name="l01786"></a>01786 SELECT column_name |
| <a name="l01787"></a>01787 FROM % WHERE column_type=''f'' |
| <a name="l01788"></a>01788 ORDER BY <span class="keywordtype">id</span> |
| <a name="l01789"></a>01789 LIMIT ALL |
| <a name="l01790"></a>01790 )', |
| <a name="l01791"></a>01791 meta_tbl_name |
| <a name="l01792"></a>01792 ); |
| <a name="l01793"></a>01793 EXECUTE curstmt INTO attr_col_names; |
| <a name="l01794"></a>01794 |
| <a name="l01795"></a>01795 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01796"></a>01796 ( |
| <a name="l01797"></a>01797 'SELECT ARRAY |
| <a name="l01798"></a>01798 ( |
| <a name="l01799"></a>01799 SELECT is_cont |
| <a name="l01800"></a>01800 FROM % |
| <a name="l01801"></a>01801 WHERE column_type=''f'' |
| <a name="l01802"></a>01802 ORDER BY <span class="keywordtype">id</span> |
| <a name="l01803"></a>01803 LIMIT ALL |
| <a name="l01804"></a>01804 )', |
| <a name="l01805"></a>01805 meta_tbl_name |
| <a name="l01806"></a>01806 ); |
| <a name="l01807"></a>01807 EXECUTE curstmt INTO is_conts; |
| <a name="l01808"></a>01808 <<<) |
| <a name="l01809"></a>01809 m4_changequote(>>>`<<<, >>>'<<<) |
| <a name="l01810"></a>01810 |
| <a name="l01811"></a>01811 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01812"></a>01812 ( |
| <a name="l01813"></a>01813 'SELECT |
| <a name="l01814"></a>01814 MADLIB_SCHEMA.__regclass_to_text(table_oid) as table_name |
| <a name="l01815"></a>01815 FROM % |
| <a name="l01816"></a>01816 WHERE column_type=''f'' limit 1', |
| <a name="l01817"></a>01817 meta_tbl_name |
| <a name="l01818"></a>01818 ); |
| <a name="l01819"></a>01819 EXECUTE curstmt INTO kv_attr_name; |
| <a name="l01820"></a>01820 |
| <a name="l01821"></a>01821 curstmt = MADLIB_SCHEMA.__format |
| <a name="l01822"></a>01822 ( |
| <a name="l01823"></a>01823 'SELECT |
| <a name="l01824"></a>01824 MADLIB_SCHEMA.__regclass_to_text(table_oid) as table_name |
| <a name="l01825"></a>01825 FROM % |
| <a name="l01826"></a>01826 WHERE column_type=''c'' limit 1', |
| <a name="l01827"></a>01827 meta_tbl_name |
| <a name="l01828"></a>01828 ); |
| <a name="l01829"></a>01829 EXECUTE curstmt INTO kv_cls_name; |
| <a name="l01830"></a>01830 |
| <a name="l01831"></a>01831 PERFORM MADLIB_SCHEMA.__validate_input_table |
| <a name="l01832"></a>01832 ( |
| <a name="l01833"></a>01833 input_tbl_name, |
| <a name="l01834"></a>01834 NULL, |
| <a name="l01835"></a>01835 id_col_name, |
| <a name="l01836"></a>01836 NULL |
| <a name="l01837"></a>01837 ); |
| <a name="l01838"></a>01838 |
| <a name="l01839"></a>01839 -- breakup each record from the classification/scoring |
| <a name="l01840"></a>01840 -- table and kept the results into a new table. |
| <a name="l01841"></a>01841 PERFORM MADLIB_SCHEMA.__breakup_table |
| <a name="l01842"></a>01842 ( |
| <a name="l01843"></a>01843 input_tbl_name, |
| <a name="l01844"></a>01844 breakup_tbl_name, |
| <a name="l01845"></a>01845 kv_cls_name, |
| <a name="l01846"></a>01846 id_col_name, |
| <a name="l01847"></a>01847 cls_col_name, |
| <a name="l01848"></a>01848 attr_col_names, |
| <a name="l01849"></a>01849 is_conts, |
| <a name="l01850"></a>01850 h2hmv_routine_id, |
| <a name="l01851"></a>01851 verbosity |
| <a name="l01852"></a>01852 ); |
| <a name="l01853"></a>01853 |
| <a name="l01854"></a>01854 -- generate the vertical encoded table. |
| <a name="l01855"></a>01855 PERFORM MADLIB_SCHEMA.__gen_vertical_encoded_table |
| <a name="l01856"></a>01856 ( |
| <a name="l01857"></a>01857 breakup_tbl_name, |
| <a name="l01858"></a>01858 'dt_tmp_ver_table', |
| <a name="l01859"></a>01859 kv_attr_name, |
| <a name="l01860"></a>01860 't'::BOOL, |
| <a name="l01861"></a>01861 verbosity |
| <a name="l01862"></a>01862 ); |
| <a name="l01863"></a>01863 |
| <a name="l01864"></a>01864 -- generate the horizontal encoded table. |
| <a name="l01865"></a>01865 EXECUTE 'DROP TABLE IF EXISTS ' || enc_tbl_name; |
| <a name="l01866"></a>01866 PERFORM MADLIB_SCHEMA.__gen_horizontal_encoded_table |
| <a name="l01867"></a>01867 ( |
| <a name="l01868"></a>01868 enc_tbl_name, |
| <a name="l01869"></a>01869 'dt_tmp_ver_table', |
| <a name="l01870"></a>01870 array_upper(is_conts, 1), |
| <a name="l01871"></a>01871 verbosity |
| <a name="l01872"></a>01872 ); |
| <a name="l01873"></a>01873 |
| <a name="l01874"></a>01874 EXECUTE 'DROP TABLE IF EXISTS dt_tmp_ver_table'; |
| <a name="l01875"></a>01875 |
| <a name="l01876"></a>01876 IF (verbosity > 0) THEN |
| <a name="l01877"></a>01877 RAISE INFO 'Encoding time:%', clock_timestamp() - exec_begin; |
| <a name="l01878"></a>01878 END IF; |
| <a name="l01879"></a>01879 END |
| <a name="l01880"></a>01880 $$ LANGUAGE PLPGSQL; |
| </pre></div></div> |
| </div> |
| <div id="nav-path" class="navpath"> |
| <ul> |
| <li class="navelem"><a class="el" href="dt__preproc_8sql__in.html">dt_preproc.sql_in</a> </li> |
| <!-- window showing the filter options --> |
| <div id="MSearchSelectWindow" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| onkeydown="return searchBox.OnSearchSelectKey(event)"> |
| <a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Functions</a></div> |
| |
| <!-- iframe showing the search results (closed by default) --> |
| <div id="MSearchResultsWindow"> |
| <iframe src="javascript:void(0)" frameborder="0" |
| name="MSearchResults" id="MSearchResults"> |
| </iframe> |
| </div> |
| |
| |
| <li class="footer">Generated on Tue Apr 2 2013 14:57:03 for MADlib by |
| <a href="http://www.doxygen.org/index.html"> |
| <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.5.1 </li> |
| </ul> |
| </div> |
| |
| |
| </body> |
| </html> |