| <!-- HTML header for doxygen 1.8.4--> |
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| <html xmlns="http://www.w3.org/1999/xhtml"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> |
| <meta http-equiv="X-UA-Compatible" content="IE=9"/> |
| <meta name="generator" content="Doxygen 1.8.4"/> |
| <meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/> |
| <title>MADlib: lda.sql_in File Reference</title> |
| <link href="tabs.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="jquery.js"></script> |
| <script type="text/javascript" src="dynsections.js"></script> |
| <link href="navtree.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="resize.js"></script> |
| <script type="text/javascript" src="navtree.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(initResizable); |
| $(window).load(resizeHeight); |
| </script> |
| <link href="search/search.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="search/search.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(function() { searchBox.OnSelectItem(0); }); |
| </script> |
| <script type="text/x-mathjax-config"> |
| MathJax.Hub.Config({ |
| extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"], |
| jax: ["input/TeX","output/HTML-CSS"], |
| }); |
| </script><script src="../mathjax/MathJax.js"></script> |
| <link href="doxygen.css" rel="stylesheet" type="text/css" /> |
| <link href="madlib_extra.css" rel="stylesheet" type="text/css"/> |
| <!-- google analytics --> |
| <script> |
| (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
| m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); |
| ga('create', 'UA-45382226-1', 'auto'); |
| ga('send', 'pageview'); |
| </script> |
| </head> |
| <body> |
| <div id="top"><!-- do not remove this div, it is closed by doxygen! --> |
| <div id="titlearea"> |
| <table cellspacing="0" cellpadding="0"> |
| <tbody> |
| <tr style="height: 56px;"> |
| <td id="projectlogo"><a href="http://madlib.incubator.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td> |
| <td style="padding-left: 0.5em;"> |
| <div id="projectname"> |
| <span id="projectnumber">1.6</span> <span style="font-size:10pt; font-style:italic"><a href="../latest/./lda_8sql__in.html"> A newer version is available</a></span> |
| </div> |
| <div id="projectbrief">User Documentation</div> |
| </td> |
| <td> <div id="MSearchBox" class="MSearchBoxInactive"> |
| <span class="left"> |
| <img id="MSearchSelect" src="search/mag_sel.png" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| alt=""/> |
| <input type="text" id="MSearchField" value="Search" accesskey="S" |
| onfocus="searchBox.OnSearchFieldFocus(true)" |
| onblur="searchBox.OnSearchFieldFocus(false)" |
| onkeyup="searchBox.OnSearchFieldChange(event)"/> |
| </span><span class="right"> |
| <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a> |
| </span> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <!-- end header part --> |
| <!-- Generated by Doxygen 1.8.4 --> |
| <script type="text/javascript"> |
| var searchBox = new SearchBox("searchBox", "search",false,'Search'); |
| </script> |
| </div><!-- top --> |
| <div id="side-nav" class="ui-resizable side-nav-resizable"> |
| <div id="nav-tree"> |
| <div id="nav-tree-contents"> |
| <div id="nav-sync" class="sync"></div> |
| </div> |
| </div> |
| <div id="splitbar" style="-moz-user-select:none;" |
| class="ui-resizable-handle"> |
| </div> |
| </div> |
| <script type="text/javascript"> |
| $(document).ready(function(){initNavTree('lda_8sql__in.html','');}); |
| </script> |
| <div id="doc-content"> |
| <!-- window showing the filter options --> |
| <div id="MSearchSelectWindow" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| onkeydown="return searchBox.OnSearchSelectKey(event)"> |
| <a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark"> </span>Groups</a></div> |
| |
| <!-- iframe showing the search results (closed by default) --> |
| <div id="MSearchResultsWindow"> |
| <iframe src="javascript:void(0)" frameborder="0" |
| name="MSearchResults" id="MSearchResults"> |
| </iframe> |
| </div> |
| |
| <div class="header"> |
| <div class="summary"> |
| <a href="#func-members">Functions</a> </div> |
| <div class="headertitle"> |
| <div class="title">lda.sql_in File Reference</div> </div> |
| </div><!--header--> |
| <div class="contents"> |
| |
| <p>SQL functions for Latent Dirichlet Allocation. |
| <a href="#details">More...</a></p> |
| <table class="memberdecls"> |
| <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a> |
| Functions</h2></td></tr> |
| <tr class="memitem:aeb7593251a4dedb695494f65dc2d1f80"><td class="memItemLeft" align="right" valign="top">set< lda_result > </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#aeb7593251a4dedb695494f65dc2d1f80">lda_train</a> (text data_table, text model_table, text output_data_table, int4 voc_size, int4 topic_num, int4 iter_num, float8 alpha, float8 beta)</td></tr> |
| <tr class="memdesc:aeb7593251a4dedb695494f65dc2d1f80"><td class="mdescLeft"> </td><td class="mdescRight">This UDF provides an entry for the lda training process. <a href="#aeb7593251a4dedb695494f65dc2d1f80">More...</a><br/></td></tr> |
| <tr class="separator:aeb7593251a4dedb695494f65dc2d1f80"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:af1fde06c39dd12bb9e5544997f815323"><td class="memItemLeft" align="right" valign="top">set< lda_result > </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#af1fde06c39dd12bb9e5544997f815323">lda_predict</a> (text data_table, text model_table, text output_table)</td></tr> |
| <tr class="memdesc:af1fde06c39dd12bb9e5544997f815323"><td class="mdescLeft"> </td><td class="mdescRight">This UDF provides an entry for the lda predicton process. <a href="#af1fde06c39dd12bb9e5544997f815323">More...</a><br/></td></tr> |
| <tr class="separator:af1fde06c39dd12bb9e5544997f815323"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:aaa89e30c8fd0ba41b6feee01ee195330"><td class="memItemLeft" align="right" valign="top">set< lda_result > </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#aaa89e30c8fd0ba41b6feee01ee195330">lda_predict</a> (text data_table, text model_table, text output_table, int4 iter_num)</td></tr> |
| <tr class="memdesc:aaa89e30c8fd0ba41b6feee01ee195330"><td class="mdescLeft"> </td><td class="mdescRight">A overloaded version which allows users to specify iter_num. <a href="#aaa89e30c8fd0ba41b6feee01ee195330">More...</a><br/></td></tr> |
| <tr class="separator:aaa89e30c8fd0ba41b6feee01ee195330"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:ace20b6314e5b4e98929a98a6f05d925d"><td class="memItemLeft" align="right" valign="top">set< lda_result > </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#ace20b6314e5b4e98929a98a6f05d925d">lda_get_topic_word_count</a> (text model_table, text output_table)</td></tr> |
| <tr class="memdesc:ace20b6314e5b4e98929a98a6f05d925d"><td class="mdescLeft"> </td><td class="mdescRight">This UDF computes the per-topic word counts. <a href="#ace20b6314e5b4e98929a98a6f05d925d">More...</a><br/></td></tr> |
| <tr class="separator:ace20b6314e5b4e98929a98a6f05d925d"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a455699e72328d6b105c61a3c9e7ae5dc"><td class="memItemLeft" align="right" valign="top">set< lda_result > </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#a455699e72328d6b105c61a3c9e7ae5dc">lda_get_word_topic_count</a> (text model_table, text output_table)</td></tr> |
| <tr class="memdesc:a455699e72328d6b105c61a3c9e7ae5dc"><td class="mdescLeft"> </td><td class="mdescRight">This UDF computes the per-word topic counts. <a href="#a455699e72328d6b105c61a3c9e7ae5dc">More...</a><br/></td></tr> |
| <tr class="separator:a455699e72328d6b105c61a3c9e7ae5dc"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a4ac16d87d50821aadcab0d75d65b3f1b"><td class="memItemLeft" align="right" valign="top">set< lda_result > </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#a4ac16d87d50821aadcab0d75d65b3f1b">lda_get_topic_desc</a> (text model_table, text vocab_table, text desc_table, int4 top_k)</td></tr> |
| <tr class="memdesc:a4ac16d87d50821aadcab0d75d65b3f1b"><td class="mdescLeft"> </td><td class="mdescRight">This UDF gets the description for each topic (top-k words) <a href="#a4ac16d87d50821aadcab0d75d65b3f1b">More...</a><br/></td></tr> |
| <tr class="separator:a4ac16d87d50821aadcab0d75d65b3f1b"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a3cdff7ba417e1a9c1b221f23bc2aac64"><td class="memItemLeft" align="right" valign="top">int4[] </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#a3cdff7ba417e1a9c1b221f23bc2aac64">__lda_random_assign</a> (int4 word_count, int4 topic_num)</td></tr> |
| <tr class="memdesc:a3cdff7ba417e1a9c1b221f23bc2aac64"><td class="mdescLeft"> </td><td class="mdescRight">This UDF assigns topics to words in a document randomly. <a href="#a3cdff7ba417e1a9c1b221f23bc2aac64">More...</a><br/></td></tr> |
| <tr class="separator:a3cdff7ba417e1a9c1b221f23bc2aac64"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:ab2640fce837da813a3b6fff38dc713d8"><td class="memItemLeft" align="right" valign="top">int4[] </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#ab2640fce837da813a3b6fff38dc713d8">__lda_gibbs_sample</a> (int4[] words, int4[] counts, int4[] doc_topic, int8[] model, float8 alpha, float8 beta, int4 voc_size, int4 topic_num, int4 iter_num)</td></tr> |
| <tr class="memdesc:ab2640fce837da813a3b6fff38dc713d8"><td class="mdescLeft"> </td><td class="mdescRight">This UDF learns the topics of words in a document and is the main step of a Gibbs sampling iteration. The model parameter (including the per-word topic counts and corpus-level topic counts) is passed to this function in the first call and then transfered to the rest calls through fcinfo->flinfo->fn_extra to allow the immediate update. <a href="#ab2640fce837da813a3b6fff38dc713d8">More...</a><br/></td></tr> |
| <tr class="separator:ab2640fce837da813a3b6fff38dc713d8"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:ad72ac6a8db5e26ef6ca8e7c2563bec94"><td class="memItemLeft" align="right" valign="top">int8[] </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#ad72ac6a8db5e26ef6ca8e7c2563bec94">__lda_count_topic_sfunc</a> (int8[] state, int4[] words, int4[] counts, int4[] topic_assignment, int4 voc_size, int4 topic_num)</td></tr> |
| <tr class="memdesc:ad72ac6a8db5e26ef6ca8e7c2563bec94"><td class="mdescLeft"> </td><td class="mdescRight">This UDF is the sfunc for the aggregator computing the topic counts for each word and the topic count in the whole corpus. It scans the topic assignments in a document and updates the topic counts. <a href="#ad72ac6a8db5e26ef6ca8e7c2563bec94">More...</a><br/></td></tr> |
| <tr class="separator:ad72ac6a8db5e26ef6ca8e7c2563bec94"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:ae9987628d56799cd5fadccc92dff3110"><td class="memItemLeft" align="right" valign="top">int8[] </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#ae9987628d56799cd5fadccc92dff3110">__lda_count_topic_prefunc</a> (int8[] state1, int8[] state2)</td></tr> |
| <tr class="memdesc:ae9987628d56799cd5fadccc92dff3110"><td class="mdescLeft"> </td><td class="mdescRight">This UDF is the prefunc for the aggregator computing the per-word topic counts. <a href="#ae9987628d56799cd5fadccc92dff3110">More...</a><br/></td></tr> |
| <tr class="separator:ae9987628d56799cd5fadccc92dff3110"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:acdae83c5837155ea3663b34db4521e86"><td class="memItemLeft" align="right" valign="top">aggregate int8[] </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#acdae83c5837155ea3663b34db4521e86">__lda_count_topic_agg</a> (int4[], int4[], int4[], int4, int4)</td></tr> |
| <tr class="memdesc:acdae83c5837155ea3663b34db4521e86"><td class="mdescLeft"> </td><td class="mdescRight">This uda computes the word topic counts by scanning and summing up topic assignments in each document. <a href="#acdae83c5837155ea3663b34db4521e86">More...</a><br/></td></tr> |
| <tr class="separator:acdae83c5837155ea3663b34db4521e86"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a25c3ef12d9808d8a38c5fd2630f3b5a9"><td class="memItemLeft" align="right" valign="top">float8 </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#a25c3ef12d9808d8a38c5fd2630f3b5a9">lda_get_perplexity</a> (text model_table, text output_data_table)</td></tr> |
| <tr class="memdesc:a25c3ef12d9808d8a38c5fd2630f3b5a9"><td class="mdescLeft"> </td><td class="mdescRight">This UDF computes the perplexity given the output data table and the model table. <a href="#a25c3ef12d9808d8a38c5fd2630f3b5a9">More...</a><br/></td></tr> |
| <tr class="separator:a25c3ef12d9808d8a38c5fd2630f3b5a9"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:ac140e980888677549a4114bda8f20c90"><td class="memItemLeft" align="right" valign="top">int8[] </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#ac140e980888677549a4114bda8f20c90">__lda_perplexity_sfunc</a> (int8[] state, int4[] words, int4[] counts, int4[] doc_topic, int8[][] model, float8 alpha, float8 beta, int4 voc_size, int4 topic_num)</td></tr> |
| <tr class="memdesc:ac140e980888677549a4114bda8f20c90"><td class="mdescLeft"> </td><td class="mdescRight">This UDF is the sfunc for the aggregator computing the perpleixty. <a href="#ac140e980888677549a4114bda8f20c90">More...</a><br/></td></tr> |
| <tr class="separator:ac140e980888677549a4114bda8f20c90"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a78a90c564e975e9a170b83ddeb6bd11c"><td class="memItemLeft" align="right" valign="top">int8[] </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#a78a90c564e975e9a170b83ddeb6bd11c">__lda_perplexity_prefunc</a> (int8[] state1, int8[] state2)</td></tr> |
| <tr class="memdesc:a78a90c564e975e9a170b83ddeb6bd11c"><td class="mdescLeft"> </td><td class="mdescRight">This UDF is the prefunc for the aggregator computing the perplexity. <a href="#a78a90c564e975e9a170b83ddeb6bd11c">More...</a><br/></td></tr> |
| <tr class="separator:a78a90c564e975e9a170b83ddeb6bd11c"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:abf691da12a45408203b82940644dc34e"><td class="memItemLeft" align="right" valign="top">float8 </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#abf691da12a45408203b82940644dc34e">__lda_perplexity_ffunc</a> (int8[] state)</td></tr> |
| <tr class="memdesc:abf691da12a45408203b82940644dc34e"><td class="mdescLeft"> </td><td class="mdescRight">This UDF is the finalfunc for the aggregator computing the perplexity. <a href="#abf691da12a45408203b82940644dc34e">More...</a><br/></td></tr> |
| <tr class="separator:abf691da12a45408203b82940644dc34e"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:aa9bc2a38ba29636740deb01aad241050"><td class="memItemLeft" align="right" valign="top">aggregate int8[] </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#aa9bc2a38ba29636740deb01aad241050">__lda_perplexity_agg</a> (int4[], int4[], int4[], int8[], float8, float8, int4, int4)</td></tr> |
| <tr class="separator:aa9bc2a38ba29636740deb01aad241050"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a503b42a0e873896dc4313dbc49899a1f"><td class="memItemLeft" align="right" valign="top">set< int8[]> </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#a503b42a0e873896dc4313dbc49899a1f">__lda_util_unnest</a> (int8[][] arr)</td></tr> |
| <tr class="memdesc:a503b42a0e873896dc4313dbc49899a1f"><td class="mdescLeft"> </td><td class="mdescRight">Unnest a 2-D array into a set of 1-D arrays. <a href="#a503b42a0e873896dc4313dbc49899a1f">More...</a><br/></td></tr> |
| <tr class="separator:a503b42a0e873896dc4313dbc49899a1f"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a2db0019220f10a1fca78ed77a5c2dcd5"><td class="memItemLeft" align="right" valign="top">int8[][] </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#a2db0019220f10a1fca78ed77a5c2dcd5">__lda_util_transpose</a> (int8[][] matrix)</td></tr> |
| <tr class="memdesc:a2db0019220f10a1fca78ed77a5c2dcd5"><td class="mdescLeft"> </td><td class="mdescRight">Transpose a 2-D array. <a href="#a2db0019220f10a1fca78ed77a5c2dcd5">More...</a><br/></td></tr> |
| <tr class="separator:a2db0019220f10a1fca78ed77a5c2dcd5"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a9b522ce2d080956d7fb3d5ae79195b8a"><td class="memItemLeft" align="right" valign="top">float8[] </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#a9b522ce2d080956d7fb3d5ae79195b8a">__lda_util_norm_with_smoothing</a> (float8[] arr, float8 smooth)</td></tr> |
| <tr class="memdesc:a9b522ce2d080956d7fb3d5ae79195b8a"><td class="mdescLeft"> </td><td class="mdescRight">L1 normalization with smoothing. <a href="#a9b522ce2d080956d7fb3d5ae79195b8a">More...</a><br/></td></tr> |
| <tr class="separator:a9b522ce2d080956d7fb3d5ae79195b8a"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:aa9729b6bcc20d00936b6831382747c2f"><td class="memItemLeft" align="right" valign="top">int4[] </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#aa9729b6bcc20d00936b6831382747c2f">__lda_util_index_sort</a> (float8[] arr)</td></tr> |
| <tr class="memdesc:aa9729b6bcc20d00936b6831382747c2f"><td class="mdescLeft"> </td><td class="mdescRight">This UDF returns the index of elements in a sorted order. <a href="#aa9729b6bcc20d00936b6831382747c2f">More...</a><br/></td></tr> |
| <tr class="separator:aa9729b6bcc20d00936b6831382747c2f"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a0032ed0be0c41a1b6ed08dca93f4a59f"><td class="memItemLeft" align="right" valign="top">set< lda_result > </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#a0032ed0be0c41a1b6ed08dca93f4a59f">__lda_util_norm_vocab</a> (text vocab_table, text output_vocab_table)</td></tr> |
| <tr class="memdesc:a0032ed0be0c41a1b6ed08dca93f4a59f"><td class="mdescLeft"> </td><td class="mdescRight">This UDF checks the vocabulary and converts non-continous wordids into continuous integers ranging from 0 to voc_size - 1. <a href="#a0032ed0be0c41a1b6ed08dca93f4a59f">More...</a><br/></td></tr> |
| <tr class="separator:a0032ed0be0c41a1b6ed08dca93f4a59f"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a85990f0bc07a2ab8834301561662939e"><td class="memItemLeft" align="right" valign="top">set< lda_result > </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#a85990f0bc07a2ab8834301561662939e">__lda_util_norm_dataset</a> (text data_table, text norm_vocab_table, text output_data_table)</td></tr> |
| <tr class="memdesc:a85990f0bc07a2ab8834301561662939e"><td class="mdescLeft"> </td><td class="mdescRight">This UDF converts the data table according to the normalized vocabulary, and all rows with non-positive count values will be removed. <a href="#a85990f0bc07a2ab8834301561662939e">More...</a><br/></td></tr> |
| <tr class="separator:a85990f0bc07a2ab8834301561662939e"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:aa4fd0a274f1c400014f2ea9549507436"><td class="memItemLeft" align="right" valign="top">set< lda_result > </td><td class="memItemRight" valign="bottom"><a class="el" href="lda_8sql__in.html#aa4fd0a274f1c400014f2ea9549507436">__lda_util_conorm_data</a> (text data_table, text vocab_table, text output_data_table, text output_vocab_table)</td></tr> |
| <tr class="memdesc:aa4fd0a274f1c400014f2ea9549507436"><td class="mdescLeft"> </td><td class="mdescRight">This UDF extracts the list of wordids from the data table and joins it with the vocabulary table to get the list of common wordids, next it will normalize the vocabulary based on the common wordids and then normalize the data table based on the normalized vocabulary. <a href="#aa4fd0a274f1c400014f2ea9549507436">More...</a><br/></td></tr> |
| <tr class="separator:aa4fd0a274f1c400014f2ea9549507436"><td class="memSeparator" colspan="2"> </td></tr> |
| </table> |
| <a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2> |
| <div class="textblock"><dl class="section date"><dt>Date</dt><dd>Dec 2012</dd></dl> |
| <dl class="section see"><dt>See Also</dt><dd>For an introduction to Latent Dirichlet Allocation models, see the module description <a class="el" href="group__grp__lda.html">Latent Dirichlet Allocation</a>. </dd></dl> |
| </div><h2 class="groupheader">Function Documentation</h2> |
| <a class="anchor" id="acdae83c5837155ea3663b34db4521e86"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">aggregate int8 [] __lda_count_topic_agg </td> |
| <td>(</td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname">[], </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname">[], </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname">[], </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname">, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">words</td><td>The unique words in the document </td></tr> |
| <tr><td class="paramname">counts</td><td>The counts of each unique words in the document </td></tr> |
| <tr><td class="paramname">topic_assignment</td><td>The topic assignments in the document </td></tr> |
| <tr><td class="paramname">voc_size</td><td>The size of vocabulary </td></tr> |
| <tr><td class="paramname">topic_num</td><td>The number of topics </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The word topic counts (a 1-d array embeding a 2-d array) </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="ae9987628d56799cd5fadccc92dff3110"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int8 [] __lda_count_topic_prefunc </td> |
| <td>(</td> |
| <td class="paramtype">int8[] </td> |
| <td class="paramname"><em>state1</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int8[] </td> |
| <td class="paramname"><em>state2</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">state1</td><td>The local word topic counts </td></tr> |
| <tr><td class="paramname">state2</td><td>The local word topic counts </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The element-wise sum of two local states </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="ad72ac6a8db5e26ef6ca8e7c2563bec94"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int8 [] __lda_count_topic_sfunc </td> |
| <td>(</td> |
| <td class="paramtype">int8[] </td> |
| <td class="paramname"><em>state</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4[] </td> |
| <td class="paramname"><em>words</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4[] </td> |
| <td class="paramname"><em>counts</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4[] </td> |
| <td class="paramname"><em>topic_assignment</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>voc_size</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>topic_num</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">state</td><td>The topic counts </td></tr> |
| <tr><td class="paramname">words</td><td>The unique words in the document </td></tr> |
| <tr><td class="paramname">counts</td><td>The counts of each unique words in the document (sum(counts) = word_count) </td></tr> |
| <tr><td class="paramname">topic_assignment</td><td>The topic assignments in the document </td></tr> |
| <tr><td class="paramname">voc_size</td><td>The size of vocabulary </td></tr> |
| <tr><td class="paramname">topic_num</td><td>The number of topics </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The updated state </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="ab2640fce837da813a3b6fff38dc713d8"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int4 [] __lda_gibbs_sample </td> |
| <td>(</td> |
| <td class="paramtype">int4[] </td> |
| <td class="paramname"><em>words</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4[] </td> |
| <td class="paramname"><em>counts</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4[] </td> |
| <td class="paramname"><em>doc_topic</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int8[] </td> |
| <td class="paramname"><em>model</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">float8 </td> |
| <td class="paramname"><em>alpha</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">float8 </td> |
| <td class="paramname"><em>beta</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>voc_size</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>topic_num</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>iter_num</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">words</td><td>The set of unique words in the document </td></tr> |
| <tr><td class="paramname">counts</td><td>The counts of each unique words in the document (sum(counts) = word_count) </td></tr> |
| <tr><td class="paramname">doc_topic</td><td>The current per-doc topic counts and topic assignments </td></tr> |
| <tr><td class="paramname">model</td><td>The current model (including the per-word topic counts and the corpus-level topic counts) </td></tr> |
| <tr><td class="paramname">alpha</td><td>The Dirichlet parameter for per-document topic multinomial </td></tr> |
| <tr><td class="paramname">beta</td><td>The Dirichlet parameter for per-topic word multinomial </td></tr> |
| <tr><td class="paramname">voc_size</td><td>The size of vocabulary </td></tr> |
| <tr><td class="paramname">topic_num</td><td>The number of topics </td></tr> |
| <tr><td class="paramname">iter_num</td><td>The number of iterations </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The learned topic counts and topic assignments </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="aa9bc2a38ba29636740deb01aad241050"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">aggregate int8 [] __lda_perplexity_agg </td> |
| <td>(</td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname">[], </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname">[], </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname">[], </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int8 </td> |
| <td class="paramname">[], </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">float8 </td> |
| <td class="paramname">, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">float8 </td> |
| <td class="paramname">, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname">, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| </div> |
| </div> |
| <a class="anchor" id="abf691da12a45408203b82940644dc34e"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">float8 __lda_perplexity_ffunc </td> |
| <td>(</td> |
| <td class="paramtype">int8[] </td> |
| <td class="paramname"><em>state</em>)</td><td></td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">state</td><td>The merged state </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The perpleixty </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="a78a90c564e975e9a170b83ddeb6bd11c"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int8 [] __lda_perplexity_prefunc </td> |
| <td>(</td> |
| <td class="paramtype">int8[] </td> |
| <td class="paramname"><em>state1</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int8[] </td> |
| <td class="paramname"><em>state2</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">state1</td><td>The local state </td></tr> |
| <tr><td class="paramname">state2</td><td>The local state </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The merged state </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="ac140e980888677549a4114bda8f20c90"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int8 [] __lda_perplexity_sfunc </td> |
| <td>(</td> |
| <td class="paramtype">int8[] </td> |
| <td class="paramname"><em>state</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4[] </td> |
| <td class="paramname"><em>words</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4[] </td> |
| <td class="paramname"><em>counts</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4[] </td> |
| <td class="paramname"><em>doc_topic</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int8 </td> |
| <td class="paramname"><em>model</em>[][], </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">float8 </td> |
| <td class="paramname"><em>alpha</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">float8 </td> |
| <td class="paramname"><em>beta</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>voc_size</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>topic_num</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">state</td><td>The cached model plus perplexity </td></tr> |
| <tr><td class="paramname">words</td><td>The unique words in the document </td></tr> |
| <tr><td class="paramname">counts</td><td>The counts of each unique words in the document </td></tr> |
| <tr><td class="paramname">doc_topic</td><td>The topic counts in the document </td></tr> |
| <tr><td class="paramname">model</td><td>The learned model </td></tr> |
| <tr><td class="paramname">alpha</td><td>The Dirichlet parameter for per-document topic multinomial </td></tr> |
| <tr><td class="paramname">beta</td><td>The Dirichlet parameter for per-topic word multinomial </td></tr> |
| <tr><td class="paramname">voc_size</td><td>The size of vocabulary </td></tr> |
| <tr><td class="paramname">topic_num</td><td>The number of topics </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The updated state </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="a3cdff7ba417e1a9c1b221f23bc2aac64"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int4 [] __lda_random_assign </td> |
| <td>(</td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>word_count</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>topic_num</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">word_count</td><td>The number of words in the document </td></tr> |
| <tr><td class="paramname">topic_num</td><td>The number of topics (specified by the user) </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The topic counts and topic assignments </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="aa4fd0a274f1c400014f2ea9549507436"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">set<lda_result> __lda_util_conorm_data </td> |
| <td>(</td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>data_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>vocab_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>output_data_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>output_vocab_table</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">data_table</td><td>The data table to be normalized </td></tr> |
| <tr><td class="paramname">vocab_table</td><td>The vocabulary table to be normalized </td></tr> |
| <tr><td class="paramname">output_data_table</td><td>The normalized data table </td></tr> |
| <tr><td class="paramname">output_vocab_table</td><td>The normalized vocabulary table </td></tr> |
| </table> |
| </dd> |
| </dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="aa9729b6bcc20d00936b6831382747c2f"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int4 [] __lda_util_index_sort </td> |
| <td>(</td> |
| <td class="paramtype">float8[] </td> |
| <td class="paramname"><em>arr</em>)</td><td></td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">arr</td><td>The array to be sorted </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The index of elements </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="a85990f0bc07a2ab8834301561662939e"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">set<lda_result> __lda_util_norm_dataset </td> |
| <td>(</td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>data_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>norm_vocab_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>output_data_table</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">data_table</td><td>The data table to be normalized </td></tr> |
| <tr><td class="paramname">vocab_table</td><td>The normalized vocabulary table </td></tr> |
| <tr><td class="paramname">output_data_table</td><td>The normalized data table </td></tr> |
| </table> |
| </dd> |
| </dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="a0032ed0be0c41a1b6ed08dca93f4a59f"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">set<lda_result> __lda_util_norm_vocab </td> |
| <td>(</td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>vocab_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>output_vocab_table</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">vocab_table</td><td>The vocabulary table in the form of <wordid::int4, word::text> </td></tr> |
| <tr><td class="paramname">output_vocab_table</td><td>The regularized vocabulary table </td></tr> |
| </table> |
| </dd> |
| </dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="a9b522ce2d080956d7fb3d5ae79195b8a"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">float8 [] __lda_util_norm_with_smoothing </td> |
| <td>(</td> |
| <td class="paramtype">float8[] </td> |
| <td class="paramname"><em>arr</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">float8 </td> |
| <td class="paramname"><em>smooth</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">arr</td><td>The array to be normalized </td></tr> |
| <tr><td class="paramname">smooth</td><td>The smoothing parameter </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The normalized vector </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="a2db0019220f10a1fca78ed77a5c2dcd5"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int8 [][] __lda_util_transpose </td> |
| <td>(</td> |
| <td class="paramtype">int8 </td> |
| <td class="paramname"><em>matrix</em>[][])</td><td></td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">matrix</td><td>The input 2-D array </td></tr> |
| <tr><td class="paramname">The</td><td>transposed array </td></tr> |
| </table> |
| </dd> |
| </dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="a503b42a0e873896dc4313dbc49899a1f"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">set<int8[]> __lda_util_unnest </td> |
| <td>(</td> |
| <td class="paramtype">int8 </td> |
| <td class="paramname"><em>arr</em>[][])</td><td></td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">arr</td><td>The 2-D array to be unnested </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The unnested 1-D arrays </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="a25c3ef12d9808d8a38c5fd2630f3b5a9"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">float8 lda_get_perplexity </td> |
| <td>(</td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>model_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>output_data_table</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">model_table</td><td>The model table generated by lda_train </td></tr> |
| <tr><td class="paramname">output_data_table</td><td>The output data table generated by lda_predict </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>The perplexity </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="a4ac16d87d50821aadcab0d75d65b3f1b"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">set<lda_result> lda_get_topic_desc </td> |
| <td>(</td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>model_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>vocab_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>desc_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>top_k</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">model_table</td><td>The model table generated by the training process </td></tr> |
| <tr><td class="paramname">vocab_table</td><td>The vocabulary table (<wordid, word>) </td></tr> |
| <tr><td class="paramname">top_k</td><td>The number of top words for each topic description </td></tr> |
| <tr><td class="paramname">desc_table</td><td>The output table for storing the per-topic description </td></tr> |
| </table> |
| </dd> |
| </dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="ace20b6314e5b4e98929a98a6f05d925d"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">set<lda_result> lda_get_topic_word_count </td> |
| <td>(</td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>model_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>output_table</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">model_table</td><td>The model table generated by the training process </td></tr> |
| <tr><td class="paramname">output_table</td><td>The output table storing the per-topic word counts </td></tr> |
| </table> |
| </dd> |
| </dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="a455699e72328d6b105c61a3c9e7ae5dc"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">set<lda_result> lda_get_word_topic_count </td> |
| <td>(</td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>model_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>output_table</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">model_table</td><td>The model table generated by the training process </td></tr> |
| <tr><td class="paramname">dist_table</td><td>The output table storing the per-word topic counts </td></tr> |
| </table> |
| </dd> |
| </dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="af1fde06c39dd12bb9e5544997f815323"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">set<lda_result> lda_predict </td> |
| <td>(</td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>data_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>model_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>output_table</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">data_table</td><td>Table storing the testing dataset, each row is in the form of <docid, wordid, count> where docid, wordid, and count are all non-negative integers. </td></tr> |
| <tr><td class="paramname">model_table</td><td>Table storing the learned models </td></tr> |
| <tr><td class="paramname">output_table</td><td>Table storing per-document topic counts and topic assignments </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section note"><dt>Note</dt><dd>default iter_num = 20 </dd></dl> |
| |
| </div> |
| </div> |
| <a class="anchor" id="aaa89e30c8fd0ba41b6feee01ee195330"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">set<lda_result> lda_predict </td> |
| <td>(</td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>data_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>model_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>output_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>iter_num</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| </div> |
| </div> |
| <a class="anchor" id="aeb7593251a4dedb695494f65dc2d1f80"></a> |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">set<lda_result> lda_train </td> |
| <td>(</td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>data_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>model_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">text </td> |
| <td class="paramname"><em>output_data_table</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>voc_size</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>topic_num</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">int4 </td> |
| <td class="paramname"><em>iter_num</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">float8 </td> |
| <td class="paramname"><em>alpha</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">float8 </td> |
| <td class="paramname"><em>beta</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">data_table</td><td>Table storing the training dataset, each row is in the form of <docid, wordid, count> where docid, wordid, and count are all non-negative integers. </td></tr> |
| <tr><td class="paramname">model_table</td><td>Table storing the learned models (voc_size, topic_num, alpha, beta, per-word topic counts, and corpus-level topic counts) </td></tr> |
| <tr><td class="paramname">output_data_table</td><td>Table storing the output data table in the form of <docid, wordcount, words, counts, topic_count, topic_assignment> </td></tr> |
| <tr><td class="paramname">voc_size</td><td>Size of the vocabulary (Note that the wordid should be continous integers starting from 0 to voc_size -<ol type="1"> |
| <li>A data validation rountine will be called to validate the dataset.) </li> |
| </ol> |
| </td></tr> |
| <tr><td class="paramname">topic_num</td><td>Number of topics (e.g. 100) </td></tr> |
| <tr><td class="paramname">iter_num</td><td>Number of iterations (e.g. 60) </td></tr> |
| <tr><td class="paramname">alpha</td><td>Dirichlet parameter for the per-doc topic multinomial (e.g. 50/topic_num) </td></tr> |
| <tr><td class="paramname">beta</td><td>Dirichlet parameter for the per-topic word multinomial (e.g. 0.01) </td></tr> |
| </table> |
| </dd> |
| </dl> |
| |
| </div> |
| </div> |
| </div><!-- contents --> |
| </div><!-- doc-content --> |
| <!-- start footer part --> |
| <div id="nav-path" class="navpath"><!-- id is needed for treeview function! --> |
| <ul> |
| <li class="navelem"><a class="el" href="dir_68267d1309a1af8e8297ef4c3efbcdba.html">src</a></li><li class="navelem"><a class="el" href="dir_efbcf68973d247bbf15f9eecae7f24e3.html">ports</a></li><li class="navelem"><a class="el" href="dir_a4a48839224ef8488facbffa8a397967.html">postgres</a></li><li class="navelem"><a class="el" href="dir_dc596537ad427a4d866006d1a3e1fe29.html">modules</a></li><li class="navelem"><a class="el" href="dir_6ff79b0655deb26abf8f86290b84a97c.html">lda</a></li><li class="navelem"><a class="el" href="lda_8sql__in.html">lda.sql_in</a></li> |
| <li class="footer">Generated on Thu Jul 3 2014 17:38:00 for MADlib by |
| <a href="http://www.doxygen.org/index.html"> |
| <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.4 </li> |
| </ul> |
| </div> |
| </body> |
| </html> |