| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| <html xmlns="http://www.w3.org/1999/xhtml"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> |
| <title>MADlib: lmf.sql_in Source File</title> |
| |
| <link href="tabs.css" rel="stylesheet" type="text/css"/> |
| <link href="doxygen.css" rel="stylesheet" type="text/css" /> |
| <link href="navtree.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="jquery.js"></script> |
| <script type="text/javascript" src="resize.js"></script> |
| <script type="text/javascript" src="navtree.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(initResizable); |
| </script> |
| <link href="search/search.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="search/search.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(function() { searchBox.OnSelectItem(0); }); |
| </script> |
| <script src="../mathjax/MathJax.js"> |
| MathJax.Hub.Config({ |
| extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"], |
| jax: ["input/TeX","output/HTML-CSS"], |
| }); |
| </script> |
| </head> |
| <body> |
| <div id="top"><!-- do not remove this div! --> |
| |
| |
| <div id="titlearea"> |
| <table cellspacing="0" cellpadding="0"> |
| <tbody> |
| <tr style="height: 56px;"> |
| |
| |
| <td style="padding-left: 0.5em;"> |
| <div id="projectname">MADlib |
|  <span id="projectnumber">0.6</span> <span style="font-size:10pt; font-style:italic"><a href="../latest/./lmf_8sql__in_source.html"> A newer version is available</a></span> |
| </div> |
| <div id="projectbrief">User Documentation</div> |
| </td> |
| |
| |
| |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| |
| <!-- Generated by Doxygen 1.7.5.1 --> |
| <script type="text/javascript"> |
| var searchBox = new SearchBox("searchBox", "search",false,'Search'); |
| </script> |
| <script type="text/javascript" src="dynsections.js"></script> |
| <div id="navrow1" class="tabs"> |
| <ul class="tablist"> |
| <li><a href="index.html"><span>Main Page</span></a></li> |
| <li><a href="modules.html"><span>Modules</span></a></li> |
| <li class="current"><a href="files.html"><span>Files</span></a></li> |
| <li> |
| <div id="MSearchBox" class="MSearchBoxInactive"> |
| <span class="left"> |
| <img id="MSearchSelect" src="search/mag_sel.png" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| alt=""/> |
| <input type="text" id="MSearchField" value="Search" accesskey="S" |
| onfocus="searchBox.OnSearchFieldFocus(true)" |
| onblur="searchBox.OnSearchFieldFocus(false)" |
| onkeyup="searchBox.OnSearchFieldChange(event)"/> |
| </span><span class="right"> |
| <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a> |
| </span> |
| </div> |
| </li> |
| </ul> |
| </div> |
| <div id="navrow2" class="tabs2"> |
| <ul class="tablist"> |
| <li><a href="files.html"><span>File List</span></a></li> |
| <li><a href="globals.html"><span>File Members</span></a></li> |
| </ul> |
| </div> |
| </div> |
| <div id="side-nav" class="ui-resizable side-nav-resizable"> |
| <div id="nav-tree"> |
| <div id="nav-tree-contents"> |
| </div> |
| </div> |
| <div id="splitbar" style="-moz-user-select:none;" |
| class="ui-resizable-handle"> |
| </div> |
| </div> |
| <script type="text/javascript"> |
| initNavTree('lmf_8sql__in.html',''); |
| </script> |
| <div id="doc-content"> |
| <div class="header"> |
| <div class="headertitle"> |
| <div class="title">lmf.sql_in</div> </div> |
| </div> |
| <div class="contents"> |
| <a href="lmf_8sql__in.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* ----------------------------------------------------------------------- */</span><span class="comment">/**</span> |
| <a name="l00002"></a>00002 <span class="comment"> *</span> |
| <a name="l00003"></a>00003 <span class="comment"> * @file lmf.sql_in</span> |
| <a name="l00004"></a>00004 <span class="comment"> *</span> |
| <a name="l00005"></a>00005 <span class="comment"> * @brief SQL functions for low-rank matrix factorization</span> |
| <a name="l00006"></a>00006 <span class="comment"> * @date June 2012</span> |
| <a name="l00007"></a>00007 <span class="comment"> *</span> |
| <a name="l00008"></a>00008 <span class="comment"> * @sa For a brief introduction to Low-rank Matrix Factorization, see the module</span> |
| <a name="l00009"></a>00009 <span class="comment"> * description \ref grp_lmf.</span> |
| <a name="l00010"></a>00010 <span class="comment"> *</span> |
| <a name="l00011"></a>00011 <span class="comment"> */</span><span class="comment">/* ----------------------------------------------------------------------- */</span> |
| <a name="l00012"></a>00012 |
| <a name="l00013"></a>00013 m4_include(`SQLCommon.m4<span class="stringliteral">')</span> |
| <a name="l00014"></a>00014 <span class="stringliteral"></span><span class="comment"></span> |
| <a name="l00015"></a>00015 <span class="comment">/**</span> |
| <a name="l00016"></a>00016 <span class="comment">@addtogroup grp_lmf</span> |
| <a name="l00017"></a>00017 <span class="comment"></span> |
| <a name="l00018"></a>00018 <span class="comment"></span> |
| <a name="l00019"></a>00019 <span class="comment">@about</span> |
| <a name="l00020"></a>00020 <span class="comment"></span> |
| <a name="l00021"></a>00021 <span class="comment">This module implements "factor model" for representing an incomplete matrix using a low-rank approximation [1].</span> |
| <a name="l00022"></a>00022 <span class="comment">Mathematically, this model seeks to find matrices U and V (also referred as factors) that, for any given incomplete matrix A, minimizes:</span> |
| <a name="l00023"></a>00023 <span class="comment">\f[ \|\boldsymbol A - \boldsymbol UV^{T} \|_2 \f]</span> |
| <a name="l00024"></a>00024 <span class="comment">subject to \f$rank(\boldsymbol UV^{T}) \leq r\f$, where \f$\|\cdot\|_2\f$ denotes the Frobenius norm.</span> |
| <a name="l00025"></a>00025 <span class="comment">Let \f$A\f$ be a \f$m \times n\f$ matrix, then \f$U\f$ will be \f$m \times r\f$ and \f$V\f$ will be \f$n \times r\f$, in dimension, and \f$1 \leq r \ll \min(m, n)\f$.</span> |
| <a name="l00026"></a>00026 <span class="comment">This model is not intended to do the full decomposition, or to be used as part of inverse procedure.</span> |
| <a name="l00027"></a>00027 <span class="comment">This model has been widely used in recommendation systems (e.g., Netflix [2]) and feature selection (e.g., image processing [3]).</span> |
| <a name="l00028"></a>00028 <span class="comment"></span> |
| <a name="l00029"></a>00029 <span class="comment"></span> |
| <a name="l00030"></a>00030 <span class="comment">@input</span> |
| <a name="l00031"></a>00031 <span class="comment"></span> |
| <a name="l00032"></a>00032 <span class="comment">The <b>input matrix</b> is expected to be of the following form:</span> |
| <a name="l00033"></a>00033 <span class="comment"><pre>{TABLE|VIEW} <em>input_table</em> (</span> |
| <a name="l00034"></a>00034 <span class="comment"> <em>row</em> INTEGER,</span> |
| <a name="l00035"></a>00035 <span class="comment"> <em>col</em> INTEGER,</span> |
| <a name="l00036"></a>00036 <span class="comment"> <em>value</em> DOUBLE PRECISION</span> |
| <a name="l00037"></a>00037 <span class="comment">)</pre></span> |
| <a name="l00038"></a>00038 <span class="comment"></span> |
| <a name="l00039"></a>00039 <span class="comment">Input is contained in a table that describes an incomplete matrix, by having available entries specified as (row, column, value).</span> |
| <a name="l00040"></a>00040 <span class="comment">The input matrix is expected to be based 1, which means row >= 1, and col >= 1.</span> |
| <a name="l00041"></a>00041 <span class="comment">NULL values are not expected.</span> |
| <a name="l00042"></a>00042 <span class="comment"></span> |
| <a name="l00043"></a>00043 <span class="comment"></span> |
| <a name="l00044"></a>00044 <span class="comment">@usage</span> |
| <a name="l00045"></a>00045 <span class="comment"></span> |
| <a name="l00046"></a>00046 <span class="comment">Please find descriptions of SQL functions in lmf.sql_in</span> |
| <a name="l00047"></a>00047 <span class="comment"></span> |
| <a name="l00048"></a>00048 <span class="comment">Output factors matrix U and V are in flatten format.</span> |
| <a name="l00049"></a>00049 <span class="comment"><pre>RESULT AS (</span> |
| <a name="l00050"></a>00050 <span class="comment"> matrix_u DOUBLE PRECISION[],</span> |
| <a name="l00051"></a>00051 <span class="comment"> matrix_v DOUBLE PRECISION[],</span> |
| <a name="l00052"></a>00052 <span class="comment"> rmse DOUBLE PRECISION</span> |
| <a name="l00053"></a>00053 <span class="comment">);</pre></span> |
| <a name="l00054"></a>00054 <span class="comment"></span> |
| <a name="l00055"></a>00055 <span class="comment">Features correspond to row i is</span> |
| <a name="l00056"></a>00056 <span class="comment"><code>matrix_u[i:i][1:r]</code>.</span> |
| <a name="l00057"></a>00057 <span class="comment">Features correspond to column j is</span> |
| <a name="l00058"></a>00058 <span class="comment"><code>matrix_v[j:j][1:r]</code>.</span> |
| <a name="l00059"></a>00059 <span class="comment"></span> |
| <a name="l00060"></a>00060 <span class="comment"></span> |
| <a name="l00061"></a>00061 <span class="comment">@examp</span> |
| <a name="l00062"></a>00062 <span class="comment"></span> |
| <a name="l00063"></a>00063 <span class="comment">-# Prepare an input table/view:</span> |
| <a name="l00064"></a>00064 <span class="comment">\code</span> |
| <a name="l00065"></a>00065 <span class="comment">CREATE TABLE lmf_data (</span> |
| <a name="l00066"></a>00066 <span class="comment"> column INT,</span> |
| <a name="l00067"></a>00067 <span class="comment"> row INT,</span> |
| <a name="l00068"></a>00068 <span class="comment"> value FLOAT8</span> |
| <a name="l00069"></a>00069 <span class="comment">);</span> |
| <a name="l00070"></a>00070 <span class="comment">\endcode</span> |
| <a name="l00071"></a>00071 <span class="comment">-# Populate the input table with some data. e.g.:</span> |
| <a name="l00072"></a>00072 <span class="comment">\code</span> |
| <a name="l00073"></a>00073 <span class="comment">INSERT INTO lmf_data VALUES (1, 1, 5.0);</span> |
| <a name="l00074"></a>00074 <span class="comment">INSERT INTO lmf_data VALUES (3, 100, 1.0);</span> |
| <a name="l00075"></a>00075 <span class="comment">INSERT INTO lmf_data VALUES (999, 10000, 2.0);</span> |
| <a name="l00076"></a>00076 <span class="comment">\endcode</span> |
| <a name="l00077"></a>00077 <span class="comment">-# Call lmf_igd_run() stored procedure, e.g.:</span> |
| <a name="l00078"></a>00078 <span class="comment">\code</span> |
| <a name="l00079"></a>00079 <span class="comment">SELECT madlib.lmf_igd_run(</span> |
| <a name="l00080"></a>00080 <span class="comment">'lmf_model', -- result table</span> |
| <a name="l00081"></a>00081 <span class="comment">'lmf_data', -- input table</span> |
| <a name="l00082"></a>00082 <span class="comment">'row', 'col', 'value', -- table column names</span> |
| <a name="l00083"></a>00083 <span class="comment">999, -- row dimension</span> |
| <a name="l00084"></a>00084 <span class="comment">10000, -- column dimension</span> |
| <a name="l00085"></a>00085 <span class="comment">3, -- rank (number of features)</span> |
| <a name="l00086"></a>00086 <span class="comment">0.1, -- stepsize</span> |
| <a name="l00087"></a>00087 <span class="comment">2, -- initial value scale factor</span> |
| <a name="l00088"></a>00088 <span class="comment">10, -- maximal number of iterations</span> |
| <a name="l00089"></a>00089 <span class="comment">1e-9); -- error tolerance</span> |
| <a name="l00090"></a>00090 <span class="comment">\endcode</span> |
| <a name="l00091"></a>00091 <span class="comment">Example output (the exact result may not be the same):</span> |
| <a name="l00092"></a>00092 <span class="comment">\code</span> |
| <a name="l00093"></a>00093 <span class="comment">NOTICE:</span> |
| <a name="l00094"></a>00094 <span class="comment">Finished low-rank matrix factorization using incremental gradient</span> |
| <a name="l00095"></a>00095 <span class="comment">DETAIL:</span> |
| <a name="l00096"></a>00096 <span class="comment"> * table : lmf_data (row, col, value)</span> |
| <a name="l00097"></a>00097 <span class="comment">Results:</span> |
| <a name="l00098"></a>00098 <span class="comment"> * RMSE = 4.31144557397543e-05</span> |
| <a name="l00099"></a>00099 <span class="comment">Output:</span> |
| <a name="l00100"></a>00100 <span class="comment"> * view : SELECT * FROM lmf_model WHERE id = 1</span> |
| <a name="l00101"></a>00101 <span class="comment"> lmf_igd_run</span> |
| <a name="l00102"></a>00102 <span class="comment">-------------</span> |
| <a name="l00103"></a>00103 <span class="comment"> 1</span> |
| <a name="l00104"></a>00104 <span class="comment">(1 row)</span> |
| <a name="l00105"></a>00105 <span class="comment">\endcode</span> |
| <a name="l00106"></a>00106 <span class="comment">-# Sanity check of the result. You may need a model id returned and also indicated by the function lmf_igd_run(), assuming 1 here, e.g.:</span> |
| <a name="l00107"></a>00107 <span class="comment">\code</span> |
| <a name="l00108"></a>00108 <span class="comment">SELECT array_dims(matrix_u), array_dims(matrix_v) FROM lmf_model WHERE id = 1;</span> |
| <a name="l00109"></a>00109 <span class="comment">\endcode</span> |
| <a name="l00110"></a>00110 <span class="comment">Example output:</span> |
| <a name="l00111"></a>00111 <span class="comment">\code</span> |
| <a name="l00112"></a>00112 <span class="comment"> array_dims | array_dims</span> |
| <a name="l00113"></a>00113 <span class="comment">--------------+----------------</span> |
| <a name="l00114"></a>00114 <span class="comment"> [1:999][1:3] | [1:10000][1:3]</span> |
| <a name="l00115"></a>00115 <span class="comment">(1 row)</span> |
| <a name="l00116"></a>00116 <span class="comment">\endcode</span> |
| <a name="l00117"></a>00117 <span class="comment">-# Query the result value, e.g.:</span> |
| <a name="l00118"></a>00118 <span class="comment">\code</span> |
| <a name="l00119"></a>00119 <span class="comment">SELECT matrix_u[2:2][1:3] AS row_2_features FROM lmf_model WHERE id = 1;</span> |
| <a name="l00120"></a>00120 <span class="comment">\endcode</span> |
| <a name="l00121"></a>00121 <span class="comment">Example output (the exact result may not be the same):</span> |
| <a name="l00122"></a>00122 <span class="comment">\code</span> |
| <a name="l00123"></a>00123 <span class="comment"> row_2_features</span> |
| <a name="l00124"></a>00124 <span class="comment">----------------------------------------------------------</span> |
| <a name="l00125"></a>00125 <span class="comment"> {{0.51117920037359,0.169582297094166,0.837417622096837}}</span> |
| <a name="l00126"></a>00126 <span class="comment">(1 row)</span> |
| <a name="l00127"></a>00127 <span class="comment">\endcode</span> |
| <a name="l00128"></a>00128 <span class="comment"></span> |
| <a name="l00129"></a>00129 <span class="comment"></span> |
| <a name="l00130"></a>00130 <span class="comment">@literature</span> |
| <a name="l00131"></a>00131 <span class="comment"></span> |
| <a name="l00132"></a>00132 <span class="comment">[1] N. Srebro and T. Jaakkola. “Weighted Low-Rank Approximations.” In: ICML. Ed. by T. Fawcett and N. Mishra. AAAI Press, 2003, pp. 720–727. isbn: 1-57735-189-4.</span> |
| <a name="l00133"></a>00133 <span class="comment"></span> |
| <a name="l00134"></a>00134 <span class="comment">[2] Simon Funk, Netflix Update: Try This at Home, December 11 2006, http://sifter.org/~simon/journal/20061211.html</span> |
| <a name="l00135"></a>00135 <span class="comment"></span> |
| <a name="l00136"></a>00136 <span class="comment">[3] J. Wright, A. Ganesh, S. Rao, Y. Peng, and Y. Ma. “Robust Principal Component Analysis: Exact Recovery of Corrupted Low-Rank Matrices via Convex Optimization.” In: NIPS. Ed. by Y. Bengio, D. Schuurmans, J. D. Lafferty, C. K. I. Williams, and A. Culotta. Curran Associates, Inc., 2009, pp. 2080–2088. isbn: 9781615679119.</span> |
| <a name="l00137"></a>00137 <span class="comment"></span> |
| <a name="l00138"></a>00138 <span class="comment">*/</span> |
| <a name="l00139"></a>00139 |
| <a name="l00140"></a>00140 CREATE TYPE MADLIB_SCHEMA.lmf_result AS ( |
| <a name="l00141"></a>00141 matrix_u DOUBLE PRECISION[], |
| <a name="l00142"></a>00142 matrix_v DOUBLE PRECISION[], |
| <a name="l00143"></a>00143 rmse DOUBLE PRECISION |
| <a name="l00144"></a>00144 ); |
| <a name="l00145"></a>00145 |
| <a name="l00146"></a>00146 -------------------------------------------------------------------------- |
| <a name="l00147"></a>00147 -- create SQL functions for IGD optimizer |
| <a name="l00148"></a>00148 -------------------------------------------------------------------------- |
| <a name="l00149"></a>00149 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_transition( |
| <a name="l00150"></a>00150 state DOUBLE PRECISION[], |
| <a name="l00151"></a>00151 row_num SMALLINT, |
| <a name="l00152"></a>00152 column_num SMALLINT, |
| <a name="l00153"></a>00153 val DOUBLE PRECISION, |
| <a name="l00154"></a>00154 previous_state DOUBLE PRECISION[], |
| <a name="l00155"></a>00155 row_dim SMALLINT, |
| <a name="l00156"></a>00156 column_dim SMALLINT, |
| <a name="l00157"></a>00157 max_rank SMALLINT, |
| <a name="l00158"></a>00158 stepsize DOUBLE PRECISION, |
| <a name="l00159"></a>00159 scale_factor DOUBLE PRECISION) |
| <a name="l00160"></a>00160 RETURNS DOUBLE PRECISION[] |
| <a name="l00161"></a>00161 AS 'MODULE_PATHNAME<span class="stringliteral">'</span> |
| <a name="l00162"></a>00162 <span class="stringliteral">LANGUAGE C IMMUTABLE;</span> |
| <a name="l00163"></a>00163 <span class="stringliteral"></span> |
| <a name="l00164"></a>00164 <span class="stringliteral">CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_merge(</span> |
| <a name="l00165"></a>00165 <span class="stringliteral"> state1 DOUBLE PRECISION[],</span> |
| <a name="l00166"></a>00166 <span class="stringliteral"> state2 DOUBLE PRECISION[])</span> |
| <a name="l00167"></a>00167 <span class="stringliteral">RETURNS DOUBLE PRECISION[]</span> |
| <a name="l00168"></a>00168 <span class="stringliteral">AS '</span>MODULE_PATHNAME<span class="stringliteral">'</span> |
| <a name="l00169"></a>00169 <span class="stringliteral">LANGUAGE C IMMUTABLE STRICT;</span> |
| <a name="l00170"></a>00170 <span class="stringliteral"></span> |
| <a name="l00171"></a>00171 <span class="stringliteral">CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_final(</span> |
| <a name="l00172"></a>00172 <span class="stringliteral"> state DOUBLE PRECISION[])</span> |
| <a name="l00173"></a>00173 <span class="stringliteral">RETURNS DOUBLE PRECISION[]</span> |
| <a name="l00174"></a>00174 <span class="stringliteral">AS '</span>MODULE_PATHNAME<span class="stringliteral">'</span> |
| <a name="l00175"></a>00175 <span class="stringliteral">LANGUAGE C IMMUTABLE STRICT;</span> |
| <a name="l00176"></a>00176 <span class="stringliteral"></span><span class="comment"></span> |
| <a name="l00177"></a>00177 <span class="comment">/**</span> |
| <a name="l00178"></a>00178 <span class="comment"> * @internal</span> |
| <a name="l00179"></a>00179 <span class="comment"> * @brief Perform one iteration of the incremental gradient</span> |
| <a name="l00180"></a>00180 <span class="comment"> * method for computing low-rank matrix factorization</span> |
| <a name="l00181"></a>00181 <span class="comment"> */</span> |
| <a name="l00182"></a>00182 CREATE AGGREGATE MADLIB_SCHEMA.lmf_igd_step( |
| <a name="l00183"></a>00183 /*+ row_num */ SMALLINT, |
| <a name="l00184"></a>00184 /*+ column_num */ SMALLINT, |
| <a name="l00185"></a>00185 /*+ val */ DOUBLE PRECISION, |
| <a name="l00186"></a>00186 /*+ previous_state */ DOUBLE PRECISION[], |
| <a name="l00187"></a>00187 /*+ row_dim */ SMALLINT, |
| <a name="l00188"></a>00188 /*+ column_dim */ SMALLINT, |
| <a name="l00189"></a>00189 /*+ max_rank */ SMALLINT, |
| <a name="l00190"></a>00190 /*+ stepsize */ DOUBLE PRECISION, |
| <a name="l00191"></a>00191 /*+ scale_factor */ DOUBLE PRECISION) ( |
| <a name="l00192"></a>00192 STYPE=DOUBLE PRECISION[], |
| <a name="l00193"></a>00193 SFUNC=MADLIB_SCHEMA.lmf_igd_transition, |
| <a name="l00194"></a>00194 -- m4_ifdef(`__GREENPLUM__',`PREFUNC=MADLIB_SCHEMA.lmf_igd_merge,<span class="stringliteral">')</span> |
| <a name="l00195"></a>00195 <span class="stringliteral"> FINALFUNC=MADLIB_SCHEMA.lmf_igd_final,</span> |
| <a name="l00196"></a>00196 <span class="stringliteral"> INITCOND='</span>{0,0,0,0,0,0,0,0,0}<span class="stringliteral">'</span> |
| <a name="l00197"></a>00197 <span class="stringliteral">);</span> |
| <a name="l00198"></a>00198 <span class="stringliteral"></span> |
| <a name="l00199"></a>00199 <span class="stringliteral">CREATE FUNCTION MADLIB_SCHEMA.internal_lmf_igd_distance(</span> |
| <a name="l00200"></a>00200 <span class="stringliteral"> /*+ state1 */ DOUBLE PRECISION[],</span> |
| <a name="l00201"></a>00201 <span class="stringliteral"> /*+ state2 */ DOUBLE PRECISION[])</span> |
| <a name="l00202"></a>00202 <span class="stringliteral">RETURNS DOUBLE PRECISION AS</span> |
| <a name="l00203"></a>00203 <span class="stringliteral">'</span>MODULE_PATHNAME<span class="stringliteral">'</span> |
| <a name="l00204"></a>00204 <span class="stringliteral">LANGUAGE c IMMUTABLE STRICT;</span> |
| <a name="l00205"></a>00205 <span class="stringliteral"></span> |
| <a name="l00206"></a>00206 <span class="stringliteral">CREATE FUNCTION MADLIB_SCHEMA.internal_lmf_igd_result(</span> |
| <a name="l00207"></a>00207 <span class="stringliteral"> /*+ state */ DOUBLE PRECISION[])</span> |
| <a name="l00208"></a>00208 <span class="stringliteral">RETURNS MADLIB_SCHEMA.lmf_result AS</span> |
| <a name="l00209"></a>00209 <span class="stringliteral">'</span>MODULE_PATHNAME<span class="stringliteral">'</span> |
| <a name="l00210"></a>00210 <span class="stringliteral">LANGUAGE c IMMUTABLE STRICT;</span> |
| <a name="l00211"></a>00211 <span class="stringliteral"></span> |
| <a name="l00212"></a>00212 <span class="stringliteral"></span> |
| <a name="l00213"></a>00213 <span class="stringliteral">CREATE FUNCTION MADLIB_SCHEMA.internal_execute_using_lmf_igd_args(</span> |
| <a name="l00214"></a>00214 <span class="stringliteral"> sql VARCHAR, INTEGER, INTEGER, INTEGER, DOUBLE PRECISION,</span> |
| <a name="l00215"></a>00215 <span class="stringliteral"> DOUBLE PRECISION, INTEGER, DOUBLE PRECISION</span> |
| <a name="l00216"></a>00216 <span class="stringliteral">) RETURNS VOID</span> |
| <a name="l00217"></a>00217 <span class="stringliteral">IMMUTABLE</span> |
| <a name="l00218"></a>00218 <span class="stringliteral">CALLED ON NULL INPUT</span> |
| <a name="l00219"></a>00219 <span class="stringliteral">LANGUAGE c</span> |
| <a name="l00220"></a>00220 <span class="stringliteral">AS '</span>MODULE_PATHNAME<span class="stringliteral">', '</span>exec_sql_using<span class="stringliteral">';</span> |
| <a name="l00221"></a>00221 <span class="stringliteral"></span> |
| <a name="l00222"></a>00222 <span class="stringliteral">CREATE FUNCTION MADLIB_SCHEMA.internal_compute_lmf_igd(</span> |
| <a name="l00223"></a>00223 <span class="stringliteral"> rel_args VARCHAR,</span> |
| <a name="l00224"></a>00224 <span class="stringliteral"> rel_state VARCHAR,</span> |
| <a name="l00225"></a>00225 <span class="stringliteral"> rel_source VARCHAR,</span> |
| <a name="l00226"></a>00226 <span class="stringliteral"> col_row VARCHAR,</span> |
| <a name="l00227"></a>00227 <span class="stringliteral"> col_column VARCHAR,</span> |
| <a name="l00228"></a>00228 <span class="stringliteral"> col_value VARCHAR)</span> |
| <a name="l00229"></a>00229 <span class="stringliteral">RETURNS INTEGER</span> |
| <a name="l00230"></a>00230 <span class="stringliteral">AS $$PythonFunction(convex, lmf_igd, compute_lmf_igd)$$</span> |
| <a name="l00231"></a>00231 <span class="stringliteral">LANGUAGE plpythonu VOLATILE;</span> |
| <a name="l00232"></a>00232 <span class="stringliteral"></span><span class="comment"></span> |
| <a name="l00233"></a>00233 <span class="comment">/**</span> |
| <a name="l00234"></a>00234 <span class="comment"> * @brief Low-rank matrix factorization of a incomplete matrix into two factors</span> |
| <a name="l00235"></a>00235 <span class="comment"> *</span> |
| <a name="l00236"></a>00236 <span class="comment"> * This function takes as input the table representation of a incomplete matrix</span> |
| <a name="l00237"></a>00237 <span class="comment"> * in the sparse (i, j, value) format and decomposes it into the specified set</span> |
| <a name="l00238"></a>00238 <span class="comment"> * of most significant features of matrices of U and V matrix. The input matrix</span> |
| <a name="l00239"></a>00239 <span class="comment"> * is expected to have dimension [1:row_dim][1:column_dim], but in sparse</span> |
| <a name="l00240"></a>00240 <span class="comment"> * format.</span> |
| <a name="l00241"></a>00241 <span class="comment"> *</span> |
| <a name="l00242"></a>00242 <span class="comment"> * @param rel_output Name of the table that the factors will be appended to</span> |
| <a name="l00243"></a>00243 <span class="comment"> * @param rel_source Name of the table/view with the source data</span> |
| <a name="l00244"></a>00244 <span class="comment"> * @param col_row Name of the column containing cell row number</span> |
| <a name="l00245"></a>00245 <span class="comment"> * @param col_column Name of the column containing cell column number</span> |
| <a name="l00246"></a>00246 <span class="comment"> * @param col_value Name of the column containing cell value</span> |
| <a name="l00247"></a>00247 <span class="comment"> * @param row_dim Maximum number of rows of input</span> |
| <a name="l00248"></a>00248 <span class="comment"> * @param column_dim Maximum number of columns of input</span> |
| <a name="l00249"></a>00249 <span class="comment"> * @param max_rank Rank of desired approximation</span> |
| <a name="l00250"></a>00250 <span class="comment"> * @param stepsize Hyper-parameter that decides how aggressive that the gradient steps are</span> |
| <a name="l00251"></a>00251 <span class="comment"> * @param scale_factor Hyper-parameter that decides scale of initial factors</span> |
| <a name="l00252"></a>00252 <span class="comment"> * @param num_iterations Maximum number if iterations to perform regardless of convergence</span> |
| <a name="l00253"></a>00253 <span class="comment"> * @param tolerance Acceptable level of error in convergence.</span> |
| <a name="l00254"></a>00254 <span class="comment"> *</span> |
| <a name="l00255"></a>00255 <span class="comment"> */</span> |
| <a name="l00256"></a>00256 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_run( |
| <a name="l00257"></a>00257 rel_output VARCHAR, |
| <a name="l00258"></a>00258 rel_source REGCLASS, |
| <a name="l00259"></a>00259 col_row VARCHAR, |
| <a name="l00260"></a>00260 col_column VARCHAR, |
| <a name="l00261"></a>00261 col_value VARCHAR, |
| <a name="l00262"></a>00262 row_dim INTEGER /*+ DEFAULT 'SELECT max(col_row) FROM rel_source' */, |
| <a name="l00263"></a>00263 column_dim INTEGER <span class="comment">/*+ DEFAULT 'SELECT max(col_col) FROM rel_source' */</span>, |
| <a name="l00264"></a>00264 max_rank INTEGER <span class="comment">/*+ DEFAULT 20 */</span>, |
| <a name="l00265"></a>00265 stepsize DOUBLE PRECISION <span class="comment">/*+ DEFAULT 0.01 */</span>, |
| <a name="l00266"></a>00266 scale_factor DOUBLE PRECISION <span class="comment">/*+ DEFAULT 0.1 */</span>, |
| <a name="l00267"></a>00267 num_iterations INTEGER <span class="comment">/*+ DEFAULT 10 */</span>, |
| <a name="l00268"></a>00268 tolerance DOUBLE PRECISION <span class="comment">/*+ DEFAULT 0.0001 */</span>) |
| <a name="l00269"></a>00269 RETURNS INTEGER AS $$ |
| <a name="l00270"></a>00270 DECLARE |
| <a name="l00271"></a>00271 iteration_run INTEGER; |
| <a name="l00272"></a>00272 model_id INTEGER; |
| <a name="l00273"></a>00273 rmse DOUBLE PRECISION; |
| <a name="l00274"></a>00274 old_messages VARCHAR; |
| <a name="l00275"></a>00275 BEGIN |
| <a name="l00276"></a>00276 RAISE NOTICE 'Matrix % to be factorized: % x %', rel_source, row_dim, column_dim; |
| <a name="l00277"></a>00277 |
| <a name="l00278"></a><a class="code" href="lmf_8sql__in.html#ac1acb1f0e1f7008118f21c83546a4602">00278</a> -- We first setup the argument table. Rationale: We want to avoid all data |
| <a name="l00279"></a>00279 -- conversion between native types and Python code. Instead, we use Python |
| <a name="l00280"></a>00280 -- as a pure driver layer. |
| <a name="l00281"></a>00281 old_messages := |
| <a name="l00282"></a>00282 (SELECT setting FROM pg_settings WHERE name = 'client_min_messages'); |
| <a name="l00283"></a>00283 EXECUTE 'SET client_min_messages TO warning'; |
| <a name="l00284"></a>00284 PERFORM MADLIB_SCHEMA.<a class="code" href="utilities_8sql__in.html#a56501b6f9fabe65d7a6a6beb70a0e000" title="Create the temporary schema if it does not exist yet.">create_schema_pg_temp</a>(); |
| <a name="l00285"></a>00285 -- Unfortunately, the EXECUTE USING syntax is only available starting |
| <a name="l00286"></a>00286 -- PostgreSQL 8.4: |
| <a name="l00287"></a>00287 -- http:<span class="comment">//www.postgresql.org/docs/8.4/static/plpgsql-statements.html#PLPGSQL-STATEMENTS-EXECUTING-DYN</span> |
| <a name="l00288"></a>00288 -- We therefore have to emulate. |
| <a name="l00289"></a>00289 PERFORM MADLIB_SCHEMA.internal_execute_using_lmf_igd_args($sql$ |
| <a name="l00290"></a>00290 DROP TABLE IF EXISTS pg_temp._madlib_lmf_igd_args; |
| <a name="l00291"></a>00291 CREATE TABLE pg_temp._madlib_lmf_igd_args AS |
| <a name="l00292"></a>00292 SELECT |
| <a name="l00293"></a>00293 $1 AS row_dim, |
| <a name="l00294"></a>00294 $2 AS column_dim, |
| <a name="l00295"></a>00295 $3 AS max_rank, |
| <a name="l00296"></a>00296 $4 AS stepsize, |
| <a name="l00297"></a>00297 $5 AS scale_factor, |
| <a name="l00298"></a>00298 $6 AS num_iterations, |
| <a name="l00299"></a>00299 $7 AS tolerance; |
| <a name="l00300"></a>00300 $sql$, |
| <a name="l00301"></a>00301 row_dim, column_dim, max_rank, stepsize, |
| <a name="l00302"></a>00302 scale_factor, num_iterations, tolerance); |
| <a name="l00303"></a>00303 EXECUTE 'SET client_min_messages TO ' || old_messages; |
| <a name="l00304"></a>00304 |
| <a name="l00305"></a>00305 -- Perform acutal computation. |
| <a name="l00306"></a>00306 -- Unfortunately, Greenplum and PostgreSQL <= 8.2 do not have conversion |
| <a name="l00307"></a>00307 -- operators from regclass to varchar/text. |
| <a name="l00308"></a>00308 iteration_run := MADLIB_SCHEMA.internal_compute_lmf_igd( |
| <a name="l00309"></a>00309 '_madlib_lmf_igd_args', '_madlib_lmf_igd_state', |
| <a name="l00310"></a>00310 textin(regclassout(rel_source)), col_row, col_column, col_value); |
| <a name="l00311"></a>00311 |
| <a name="l00312"></a>00312 -- create result table if it does not exist |
| <a name="l00313"></a>00313 BEGIN |
| <a name="l00314"></a>00314 EXECUTE 'SELECT 1 FROM ' || rel_output || ' LIMIT 0'; |
| <a name="l00315"></a>00315 EXCEPTION |
| <a name="l00316"></a>00316 WHEN undefined_table THEN |
| <a name="l00317"></a>00317 EXECUTE ' |
| <a name="l00318"></a>00318 CREATE TABLE ' || rel_output || ' ( |
| <a name="l00319"></a>00319 <span class="keywordtype">id</span> SERIAL, |
| <a name="l00320"></a>00320 matrix_u DOUBLE PRECISION[], |
| <a name="l00321"></a>00321 matrix_v DOUBLE PRECISION[], |
| <a name="l00322"></a>00322 rmse DOUBLE PRECISION)'; |
| <a name="l00323"></a>00323 END; |
| <a name="l00324"></a>00324 |
| <a name="l00325"></a>00325 -- A work-around for GPDB not supporting RETURNING for INSERT |
| <a name="l00326"></a>00326 -- We generate an <span class="keywordtype">id</span> using nextval before INSERT |
| <a name="l00327"></a>00327 EXECUTE ' |
| <a name="l00328"></a>00328 SELECT nextval(' || quote_literal(rel_output || '_id_seq') ||'::regclass)' |
| <a name="l00329"></a>00329 INTO model_id; |
| <a name="l00330"></a>00330 |
| <a name="l00331"></a>00331 -- output model |
| <a name="l00332"></a>00332 -- Retrieve result from state table and insert it |
| <a name="l00333"></a>00333 EXECUTE ' |
| <a name="l00334"></a>00334 INSERT INTO ' || rel_output || ' |
| <a name="l00335"></a>00335 SELECT ' || model_id || ', (result).* |
| <a name="l00336"></a>00336 FROM ( |
| <a name="l00337"></a>00337 SELECT MADLIB_SCHEMA.internal_lmf_igd_result(_state) AS result |
| <a name="l00338"></a>00338 FROM _madlib_lmf_igd_state |
| <a name="l00339"></a>00339 WHERE _iteration = ' || iteration_run || ' |
| <a name="l00340"></a>00340 ) subq'; |
| <a name="l00341"></a>00341 |
| <a name="l00342"></a>00342 EXECUTE ' |
| <a name="l00343"></a>00343 SELECT rmse |
| <a name="l00344"></a>00344 FROM ' || rel_output || ' |
| <a name="l00345"></a>00345 WHERE <span class="keywordtype">id</span> = ' || model_id |
| <a name="l00346"></a>00346 INTO rmse; |
| <a name="l00347"></a>00347 |
| <a name="l00348"></a>00348 -- return description |
| <a name="l00349"></a>00349 RAISE NOTICE ' |
| <a name="l00350"></a>00350 Finished low-rank matrix factorization using incremental gradient |
| <a name="l00351"></a>00351 * table : % (%, %, %) |
| <a name="l00352"></a>00352 Results: |
| <a name="l00353"></a>00353 * RMSE = % |
| <a name="l00354"></a>00354 Output: |
| <a name="l00355"></a>00355 * view : SELECT * FROM % WHERE <span class="keywordtype">id</span> = %', |
| <a name="l00356"></a>00356 rel_source, col_row, col_column, col_value, rmse, rel_output, model_id; |
| <a name="l00357"></a>00357 |
| <a name="l00358"></a>00358 RETURN model_id; |
| <a name="l00359"></a>00359 END; |
| <a name="l00360"></a>00360 $$ LANGUAGE plpgsql VOLATILE; |
| <a name="l00361"></a>00361 |
| <a name="l00362"></a>00362 CREATE FUNCTION MADLIB_SCHEMA.<a class="code" href="lmf_8sql__in.html#ac1acb1f0e1f7008118f21c83546a4602" title="Low-rank matrix factorization of a incomplete matrix into two factors.">lmf_igd_run</a>( |
| <a name="l00363"></a>00363 rel_output VARCHAR, |
| <a name="l00364"></a>00364 rel_source REGCLASS, |
| <a name="l00365"></a>00365 col_row VARCHAR, |
| <a name="l00366"></a>00366 col_column VARCHAR, |
| <a name="l00367"></a>00367 col_value VARCHAR, |
| <a name="l00368"></a>00368 row_dim INTEGER, |
| <a name="l00369"></a>00369 column_dim INTEGER, |
| <a name="l00370"></a>00370 max_rank INTEGER, |
| <a name="l00371"></a>00371 stepsize DOUBLE PRECISION, |
| <a name="l00372"></a>00372 scale_factor DOUBLE PRECISION) |
| <a name="l00373"></a>00373 RETURNS INTEGER AS $$ |
| <a name="l00374"></a>00374 SELECT MADLIB_SCHEMA.<a class="code" href="lmf_8sql__in.html#ac1acb1f0e1f7008118f21c83546a4602" title="Low-rank matrix factorization of a incomplete matrix into two factors.">lmf_igd_run</a>($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, 10, 0.0001); |
| <a name="l00375"></a>00375 $$ LANGUAGE sql VOLATILE; |
| <a name="l00376"></a>00376 |
| <a name="l00377"></a>00377 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_run( |
| <a name="l00378"></a>00378 rel_output VARCHAR, |
| <a name="l00379"></a>00379 rel_source REGCLASS, |
| <a name="l00380"></a>00380 col_row VARCHAR, |
| <a name="l00381"></a>00381 col_column VARCHAR, |
| <a name="l00382"></a>00382 col_value VARCHAR, |
| <a name="l00383"></a>00383 row_dim INTEGER, |
| <a name="l00384"></a>00384 column_dim INTEGER, |
| <a name="l00385"></a>00385 max_rank INTEGER, |
| <a name="l00386"></a>00386 stepsize DOUBLE PRECISION) |
| <a name="l00387"></a>00387 RETURNS INTEGER AS $$ |
| <a name="l00388"></a>00388 -- set scale_factor as default 0.1 |
| <a name="l00389"></a>00389 SELECT MADLIB_SCHEMA.lmf_igd_run($1, $2, $3, $4, $5, $6, $7, $8, $9, 0.1); |
| <a name="l00390"></a>00390 $$ LANGUAGE sql VOLATILE; |
| <a name="l00391"></a>00391 |
| <a name="l00392"></a>00392 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_run( |
| <a name="l00393"></a>00393 rel_output VARCHAR, |
| <a name="l00394"></a>00394 rel_source REGCLASS, |
| <a name="l00395"></a>00395 col_row VARCHAR, |
| <a name="l00396"></a>00396 col_column VARCHAR, |
| <a name="l00397"></a>00397 col_value VARCHAR, |
| <a name="l00398"></a>00398 row_dim INTEGER, |
| <a name="l00399"></a>00399 column_dim INTEGER, |
| <a name="l00400"></a>00400 max_rank INTEGER) |
| <a name="l00401"></a>00401 RETURNS INTEGER AS $$ |
| <a name="l00402"></a>00402 -- set stepsize as default 0.01 |
| <a name="l00403"></a>00403 SELECT MADLIB_SCHEMA.lmf_igd_run($1, $2, $3, $4, $5, $6, $7, $8, 0.01); |
| <a name="l00404"></a>00404 $$ LANGUAGE sql VOLATILE; |
| <a name="l00405"></a>00405 |
| <a name="l00406"></a>00406 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_run( |
| <a name="l00407"></a>00407 rel_output VARCHAR, |
| <a name="l00408"></a>00408 rel_source REGCLASS, |
| <a name="l00409"></a>00409 col_row VARCHAR, |
| <a name="l00410"></a>00410 col_column VARCHAR, |
| <a name="l00411"></a>00411 col_value TEXT) |
| <a name="l00412"></a>00412 RETURNS INTEGER AS $$ |
| <a name="l00413"></a>00413 DECLARE |
| <a name="l00414"></a>00414 row_dim INTEGER; |
| <a name="l00415"></a>00415 column_dim INTEGER; |
| <a name="l00416"></a>00416 BEGIN |
| <a name="l00417"></a>00417 EXECUTE ' |
| <a name="l00418"></a>00418 SELECT max(' || col_row || '), max(' || col_column || ') |
| <a name="l00419"></a>00419 FROM ' || textin(regclassout(rel_source)) |
| <a name="l00420"></a>00420 INTO row_dim, column_dim; |
| <a name="l00421"></a>00421 |
| <a name="l00422"></a>00422 RETURN (SELECT MADLIB_SCHEMA.lmf_igd_run($1, $2, $3, $4, $5, row_dim, column_dim, 20)); |
| <a name="l00423"></a>00423 END; |
| <a name="l00424"></a>00424 $$ LANGUAGE plpgsql VOLATILE; |
| <a name="l00425"></a>00425 |
| </pre></div></div> |
| </div> |
| <div id="nav-path" class="navpath"> |
| <ul> |
| <li class="navelem"><a class="el" href="lmf_8sql__in.html">lmf.sql_in</a> </li> |
| <!-- window showing the filter options --> |
| <div id="MSearchSelectWindow" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| onkeydown="return searchBox.OnSearchSelectKey(event)"> |
| <a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Functions</a></div> |
| |
| <!-- iframe showing the search results (closed by default) --> |
| <div id="MSearchResultsWindow"> |
| <iframe src="javascript:void(0)" frameborder="0" |
| name="MSearchResults" id="MSearchResults"> |
| </iframe> |
| </div> |
| |
| |
| <li class="footer">Generated on Tue Apr 2 2013 14:57:03 for MADlib by |
| <a href="http://www.doxygen.org/index.html"> |
| <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.5.1 </li> |
| </ul> |
| </div> |
| |
| |
| </body> |
| </html> |