| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| <html xmlns="http://www.w3.org/1999/xhtml"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> |
| <title>MADlib: viterbi.sql_in Source File</title> |
| |
| <link href="tabs.css" rel="stylesheet" type="text/css"/> |
| <link href="doxygen.css" rel="stylesheet" type="text/css" /> |
| <link href="navtree.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="jquery.js"></script> |
| <script type="text/javascript" src="resize.js"></script> |
| <script type="text/javascript" src="navtree.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(initResizable); |
| </script> |
| <link href="search/search.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="search/search.js"></script> |
| <script type="text/javascript"> |
| $(document).ready(function() { searchBox.OnSelectItem(0); }); |
| </script> |
| <script src="../mathjax/MathJax.js"> |
| MathJax.Hub.Config({ |
| extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"], |
| jax: ["input/TeX","output/HTML-CSS"], |
| }); |
| </script> |
| </head> |
| <body> |
| <div id="top"><!-- do not remove this div! --> |
| |
| |
| <div id="titlearea"> |
| <table cellspacing="0" cellpadding="0"> |
| <tbody> |
| <tr style="height: 56px;"> |
| |
| |
| <td style="padding-left: 0.5em;"> |
| <div id="projectname">MADlib |
|  <span id="projectnumber">0.7</span> <span style="font-size:10pt; font-style:italic"><a href="../latest/./viterbi_8sql__in_source.html"> A newer version is available</a></span> |
| </div> |
| <div id="projectbrief">User Documentation</div> |
| </td> |
| |
| |
| |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| |
| <!-- Generated by Doxygen 1.7.5.1 --> |
| <script type="text/javascript"> |
| var searchBox = new SearchBox("searchBox", "search",false,'Search'); |
| </script> |
| <script type="text/javascript" src="dynsections.js"></script> |
| <div id="navrow1" class="tabs"> |
| <ul class="tablist"> |
| <li><a href="index.html"><span>Main Page</span></a></li> |
| <li><a href="modules.html"><span>Modules</span></a></li> |
| <li class="current"><a href="files.html"><span>Files</span></a></li> |
| <li> |
| <div id="MSearchBox" class="MSearchBoxInactive"> |
| <span class="left"> |
| <img id="MSearchSelect" src="search/mag_sel.png" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| alt=""/> |
| <input type="text" id="MSearchField" value="Search" accesskey="S" |
| onfocus="searchBox.OnSearchFieldFocus(true)" |
| onblur="searchBox.OnSearchFieldFocus(false)" |
| onkeyup="searchBox.OnSearchFieldChange(event)"/> |
| </span><span class="right"> |
| <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a> |
| </span> |
| </div> |
| </li> |
| </ul> |
| </div> |
| <div id="navrow2" class="tabs2"> |
| <ul class="tablist"> |
| <li><a href="files.html"><span>File List</span></a></li> |
| <li><a href="globals.html"><span>File Members</span></a></li> |
| </ul> |
| </div> |
| </div> |
| <div id="side-nav" class="ui-resizable side-nav-resizable"> |
| <div id="nav-tree"> |
| <div id="nav-tree-contents"> |
| </div> |
| </div> |
| <div id="splitbar" style="-moz-user-select:none;" |
| class="ui-resizable-handle"> |
| </div> |
| </div> |
| <script type="text/javascript"> |
| initNavTree('viterbi_8sql__in.html',''); |
| </script> |
| <div id="doc-content"> |
| <div class="header"> |
| <div class="headertitle"> |
| <div class="title">viterbi.sql_in</div> </div> |
| </div> |
| <div class="contents"> |
| <a href="viterbi_8sql__in.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* ----------------------------------------------------------------------- */</span><span class="comment">/**</span> |
| <a name="l00002"></a>00002 <span class="comment"> *</span> |
| <a name="l00003"></a>00003 <span class="comment"> * @file viterbi.sql_in</span> |
| <a name="l00004"></a>00004 <span class="comment"> * @brief concatenate a set of input values into arrays to feed into viterbi c </span> |
| <a name="l00005"></a>00005 <span class="comment"> * function and create a human readable view of the output</span> |
| <a name="l00006"></a>00006 <span class="comment"> * @date February 2012</span> |
| <a name="l00007"></a>00007 <span class="comment"> *</span> |
| <a name="l00008"></a>00008 <span class="comment"> *</span> |
| <a name="l00009"></a>00009 <span class="comment"> */</span><span class="comment">/* ----------------------------------------------------------------------- */</span> |
| <a name="l00010"></a>00010 |
| <a name="l00011"></a>00011 m4_include(`SQLCommon.m4<span class="stringliteral">')</span> |
| <a name="l00012"></a>00012 <span class="stringliteral"></span><span class="comment"></span> |
| <a name="l00013"></a>00013 <span class="comment">/**</span> |
| <a name="l00014"></a>00014 <span class="comment"> * @brief This function creates a human readable view of the results of Viterbi function</span> |
| <a name="l00015"></a>00015 <span class="comment"> * @param segtbl Name of table containing all the testing sentences.</span> |
| <a name="l00016"></a>00016 <span class="comment"> * @param labeltbl Name of table containing all the labels in the label space.</span> |
| <a name="l00017"></a>00017 <span class="comment"> * @param result_tbl Name of table storing the best label sequence and the conditional probability.</span> |
| <a name="l00018"></a>00018 <span class="comment"> * @param vw Name of the human readable view of output.</span> |
| <a name="l00019"></a>00019 <span class="comment">*/</span> |
| <a name="l00020"></a>00020 |
| <a name="l00021"></a>00021 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.vcrf_top1_view (segtbl text, labeltbl text, result_tbl text, vw text) returns text AS |
| <a name="l00022"></a>00022 $$ |
| <a name="l00023"></a>00023 rv = plpy.execute('SELECT COUNT(*) AS total FROM <span class="stringliteral">' + labeltbl);</span> |
| <a name="l00024"></a>00024 <span class="stringliteral">nlabel = rv[0]['</span>total<span class="stringliteral">']</span> |
| <a name="l00025"></a>00025 <span class="stringliteral">query = """create view """ + vw + """ AS</span> |
| <a name="l00026"></a>00026 <span class="stringliteral"> select segs.doc_id, start_pos, seg_text, L.label, (L.id+1) as id, (result.label[max_pos+2]::float/1000000) as prob</span> |
| <a name="l00027"></a>00027 <span class="stringliteral"> from """ + segtbl + """ segs, """ + labeltbl + """ L, """ + result_tbl + """ result</span> |
| <a name="l00028"></a>00028 <span class="stringliteral"> where result.label[segs.start_pos+1]=L.id and segs.doc_id=result.doc_id</span> |
| <a name="l00029"></a>00029 <span class="stringliteral"> order by doc_id, start_pos;"""</span> |
| <a name="l00030"></a>00030 <span class="stringliteral">plpy.execute(query)</span> |
| <a name="l00031"></a>00031 <span class="stringliteral">return vw</span> |
| <a name="l00032"></a>00032 <span class="stringliteral">$$ language plpythonu strict;</span> |
| <a name="l00033"></a>00033 <span class="stringliteral"></span><span class="comment"></span> |
| <a name="l00034"></a>00034 <span class="comment">/**</span> |
| <a name="l00035"></a>00035 <span class="comment"> * @brief This function implements the Viterbi algorithm which takes the sentence to be label as input and return the top1 labeling for that sentence </span> |
| <a name="l00036"></a>00036 <span class="comment"> * @param marray Name of arrays containing m factors</span> |
| <a name="l00037"></a>00037 <span class="comment"> * @param rarray Name of arrays containing r factors</span> |
| <a name="l00038"></a>00038 <span class="comment"> * @param nlabel Total number of labels in the label space</span> |
| <a name="l00039"></a>00039 <span class="comment"> * @returns the top1 label sequence, the last two elements in the array is used to calculate the top1 probability </span> |
| <a name="l00040"></a>00040 <span class="comment"> */</span> |
| <a name="l00041"></a>00041 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.vcrf_top1_label(mArray int[], rArray int[], nlabel int) |
| <a name="l00042"></a>00042 returns int[] as 'MODULE_PATHNAME<span class="stringliteral">' language c strict;</span> |
| <a name="l00043"></a><a class="code" href="viterbi_8sql__in.html#a5949d70b666badace9f7b0c28b77f9de">00043</a> <span class="stringliteral"></span> |
| <a name="l00044"></a>00044 <span class="stringliteral"></span><span class="comment"></span> |
| <a name="l00045"></a>00045 <span class="comment">/**</span> |
| <a name="l00046"></a>00046 <span class="comment"> * @brief This function prepares the inputs for the c function 'vcrf_top1_label' and invoke the c function. </span> |
| <a name="l00047"></a>00047 <span class="comment"> * @param segtbl Name of table containing all the testing sentences.</span> |
| <a name="l00048"></a>00048 <span class="comment"> * @param factor_mtbl Name of table containing all the m factors.</span> |
| <a name="l00049"></a>00049 <span class="comment"> * @param factor_rtbl Name of table containing all the r factors.</span> |
| <a name="l00050"></a>00050 <span class="comment"> * @param labeltbl Name of table containing all the labels in the label space.</span> |
| <a name="l00051"></a>00051 <span class="comment"> * @param resulttbl Name of table to store the output</span> |
| <a name="l00052"></a>00052 <span class="comment"> * @returns the top1 label sequence, the last two elements in the array is used to calculate the top1 probability </span> |
| <a name="l00053"></a>00053 <span class="comment"> */</span> |
| <a name="l00054"></a>00054 |
| <a name="l00055"></a>00055 CREATE OR REPLACE FUNCTION |
| <a name="l00056"></a>00056 MADLIB_SCHEMA.vcrf_label(segtbl text, factor_mtbl text, factor_rtbl text, labeltbl text, resulttbl text) RETURNS text AS |
| <a name="l00057"></a>00057 $$ |
| <a name="l00058"></a>00058 origClientMinMessages = plpy.execute("SELECT setting AS setting FROM pg_settings WHERE name = \'client_min_messages\';") |
| <a name="l00059"></a>00059 plpy.execute("SET client_min_messages TO warning;") |
| <a name="l00060"></a>00060 |
| <a name="l00061"></a>00061 plpy.execute("SELECT MADLIB_SCHEMA.create_schema_pg_temp();"); |
| <a name="l00062"></a>00062 |
| <a name="l00063"></a><a class="code" href="viterbi_8sql__in.html#a6b53e65f31c716966daf7596e449863a">00063</a> m_factors = "pg_temp._madlib_m_factors" |
| <a name="l00064"></a>00064 r_factors = "pg_temp._madlib_r_factors" |
| <a name="l00065"></a>00065 segtbl_digits = "pg_temp._madlib_segtbl_digits" |
| <a name="l00066"></a>00066 resulttbl_raw = "pg_temp._madlib_" + resulttbl + "_raw" |
| <a name="l00067"></a>00067 |
| <a name="l00068"></a>00068 plpy.execute("""DROP TABLE IF EXISTS """ + m_factors + """,""" + r_factors + """,""" + segtbl_digits + """,""" + resulttbl_raw + """;""") |
| <a name="l00069"></a>00069 plpy.execute("""CREATE TABLE """ + resulttbl_raw + """(doc_id integer, label integer[]);""") |
| <a name="l00070"></a>00070 |
| <a name="l00071"></a>00071 plpy.execute("""SET client_min_messages TO """ + str(origClientMinMessages[0]['setting<span class="stringliteral">']) + """;""")</span> |
| <a name="l00072"></a>00072 <span class="stringliteral"></span> |
| <a name="l00073"></a>00073 <span class="stringliteral"> # replace digits with "DIGIT" keyword</span> |
| <a name="l00074"></a>00074 <span class="stringliteral"> plpy.execute("""SELECT start_pos,doc_id,seg_text,max_pos INTO """ + segtbl_digits + """ FROM """ + segtbl + """ WHERE</span> |
| <a name="l00075"></a>00075 <span class="stringliteral"> NOT (seg_text ~ E'</span>^[-+]?([0-9]{1,3}[,]?)*[0-9]{1,3}$<span class="stringliteral">' OR seg_text ~ E'</span>^[-+]?[0-9]*[.][0-9]+$<span class="stringliteral">');""")</span> |
| <a name="l00076"></a>00076 <span class="stringliteral"> plpy.execute("""INSERT INTO """ + segtbl_digits + """ SELECT start_pos,doc_id,'</span>DIGIT<span class="stringliteral">',max_pos FROM """ + segtbl + """ WHERE </span> |
| <a name="l00077"></a><a class="code" href="viterbi_8sql__in.html#a1356ba1427d1f466975b40e76c431abc">00077</a> <span class="stringliteral"> seg_text ~ E'</span>^[-+]?([0-9]{1,3}[,]?)*[0-9]{1,3}$<span class="stringliteral">' OR seg_text ~E'</span>^[-+]?[0-9]*[.][0-9]+$<span class="stringliteral">';""")</span> |
| <a name="l00078"></a>00078 <span class="stringliteral"></span> |
| <a name="l00079"></a>00079 <span class="stringliteral"> query = """</span> |
| <a name="l00080"></a>00080 <span class="stringliteral"> -- for each sentence, store array representation of r_factors</span> |
| <a name="l00081"></a>00081 <span class="stringliteral">m4_ifdef(`__HAS_ORDERED_AGGREGATES__'</span>, ` |
| <a name="l00082"></a>00082 select doc_id, <a class="code" href="array__ops_8sql__in.html#af9f60293134ba4ce05b1f97f6faba822" title="ARRAY_AGG aggregate for compatibility with GPDB < 4.1 and Postgres < 9.0 This is a slower solution th...">array_agg</a>(score order by start_pos, label) as score |
| <a name="l00083"></a>00083 ', ` |
| <a name="l00084"></a>00084 select doc_id, array( |
| <a name="l00085"></a>00085 select score |
| <a name="l00086"></a>00086 from """ + factor_rtbl + """ factors, |
| <a name="l00087"></a>00087 """ + segtbl_digits + """ seg |
| <a name="l00088"></a>00088 where factors.seg_text = seg.seg_text |
| <a name="l00089"></a>00089 and doc_id = ss.doc_id |
| <a name="l00090"></a>00090 order by start_pos, label |
| <a name="l00091"></a>00091 ) as score |
| <a name="l00092"></a>00092 ') |
| <a name="l00093"></a>00093 into """ + r_factors + """ |
| <a name="l00094"></a>00094 from (select doc_id, start_pos, label, score |
| <a name="l00095"></a>00095 from """ + factor_rtbl + """ factors, |
| <a name="l00096"></a>00096 """ + segtbl_digits + """ seg |
| <a name="l00097"></a>00097 where factors.seg_text=seg.seg_text) as ss |
| <a name="l00098"></a>00098 group by doc_id |
| <a name="l00099"></a>00099 order by doc_id;""" |
| <a name="l00100"></a>00100 plpy.execute(query) |
| <a name="l00101"></a>00101 plpy.execute("analyze " + r_factors + ";") |
| <a name="l00102"></a>00102 |
| <a name="l00103"></a>00103 query = """ |
| <a name="l00104"></a>00104 -- array representation of m_factor |
| <a name="l00105"></a>00105 select score |
| <a name="l00106"></a>00106 into """ + m_factors + """ |
| <a name="l00107"></a>00107 from (select score |
| <a name="l00108"></a>00108 from """ + factor_mtbl + """ factors) as ss; """ |
| <a name="l00109"></a>00109 plpy.execute(query) |
| <a name="l00110"></a>00110 |
| <a name="l00111"></a>00111 rv = plpy.execute('SELECT COUNT(*) AS total FROM ' + labeltbl); |
| <a name="l00112"></a>00112 nlabel = rv[0]['total'] |
| <a name="l00113"></a>00113 |
| <a name="l00114"></a>00114 query = (""" INSERT INTO """ + resulttbl_raw + """ |
| <a name="l00115"></a>00115 SELECT doc_id, MADLIB_SCHEMA.<a class="code" href="viterbi_8sql__in.html#a6b53e65f31c716966daf7596e449863a" title="This function implements the Viterbi algorithm which takes the sentence to be label as input and retu...">vcrf_top1_label</a>(mfactors.score, rfactors.score, """ + str(nlabel) + """ ) |
| <a name="l00116"></a>00116 FROM """ + m_factors + """ mfactors, """ + r_factors + """ rfactors;""") |
| <a name="l00117"></a>00117 |
| <a name="l00118"></a>00118 plpy.execute(query); |
| <a name="l00119"></a>00119 |
| <a name="l00120"></a>00120 query = "SELECT * FROM MADLIB_SCHEMA.<a class="code" href="viterbi_8sql__in.html#a5949d70b666badace9f7b0c28b77f9de" title="This function creates a human readable view of the results of Viterbi function.">vcrf_top1_view</a>(\'" + segtbl + "\', \'" + labeltbl + "\', \'" + resulttbl_raw + "\', \'" + resulttbl + "\');" |
| <a name="l00121"></a>00121 plpy.execute(query); |
| <a name="l00122"></a>00122 |
| <a name="l00123"></a>00123 $$ LANGUAGE plpythonu STRICT; |
| </pre></div></div> |
| </div> |
| <div id="nav-path" class="navpath"> |
| <ul> |
| <li class="navelem"><a class="el" href="viterbi_8sql__in.html">viterbi.sql_in</a> </li> |
| <!-- window showing the filter options --> |
| <div id="MSearchSelectWindow" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| onkeydown="return searchBox.OnSearchSelectKey(event)"> |
| <a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Functions</a></div> |
| |
| <!-- iframe showing the search results (closed by default) --> |
| <div id="MSearchResultsWindow"> |
| <iframe src="javascript:void(0)" frameborder="0" |
| name="MSearchResults" id="MSearchResults"> |
| </iframe> |
| </div> |
| |
| |
| <li class="footer">Generated on Fri May 10 2013 01:37:13 for MADlib by |
| <a href="http://www.doxygen.org/index.html"> |
| <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.5.1 </li> |
| </ul> |
| </div> |
| |
| |
| </body> |
| </html> |