blob: 64f2b26e86c119ede923ebc4973b30b8721d0b93 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<title>MADlib: viterbi.sql_in Source File</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript">
$(document).ready(initResizable);
</script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { searchBox.OnSelectItem(0); });
</script>
<script src="../mathjax/MathJax.js">
MathJax.Hub.Config({
extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
jax: ["input/TeX","output/HTML-CSS"],
});
</script>
</head>
<body>
<div id="top"><!-- do not remove this div! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">MADlib
&#160;<span id="projectnumber">0.7</span> <span style="font-size:10pt; font-style:italic"><a href="../latest/./viterbi_8sql__in_source.html"> A newer version is available</a></span>
</div>
<div id="projectbrief">User Documentation</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- Generated by Doxygen 1.7.5.1 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<script type="text/javascript" src="dynsections.js"></script>
<div id="navrow1" class="tabs">
<ul class="tablist">
<li><a href="index.html"><span>Main&#160;Page</span></a></li>
<li><a href="modules.html"><span>Modules</span></a></li>
<li class="current"><a href="files.html"><span>Files</span></a></li>
<li>
<div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</li>
</ul>
</div>
<div id="navrow2" class="tabs2">
<ul class="tablist">
<li><a href="files.html"><span>File&#160;List</span></a></li>
<li><a href="globals.html"><span>File&#160;Members</span></a></li>
</ul>
</div>
</div>
<div id="side-nav" class="ui-resizable side-nav-resizable">
<div id="nav-tree">
<div id="nav-tree-contents">
</div>
</div>
<div id="splitbar" style="-moz-user-select:none;"
class="ui-resizable-handle">
</div>
</div>
<script type="text/javascript">
initNavTree('viterbi_8sql__in.html','');
</script>
<div id="doc-content">
<div class="header">
<div class="headertitle">
<div class="title">viterbi.sql_in</div> </div>
</div>
<div class="contents">
<a href="viterbi_8sql__in.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* ----------------------------------------------------------------------- */</span><span class="comment">/**</span>
<a name="l00002"></a>00002 <span class="comment"> *</span>
<a name="l00003"></a>00003 <span class="comment"> * @file viterbi.sql_in</span>
<a name="l00004"></a>00004 <span class="comment"> * @brief concatenate a set of input values into arrays to feed into viterbi c </span>
<a name="l00005"></a>00005 <span class="comment"> * function and create a human readable view of the output</span>
<a name="l00006"></a>00006 <span class="comment"> * @date February 2012</span>
<a name="l00007"></a>00007 <span class="comment"> *</span>
<a name="l00008"></a>00008 <span class="comment"> *</span>
<a name="l00009"></a>00009 <span class="comment"> */</span><span class="comment">/* ----------------------------------------------------------------------- */</span>
<a name="l00010"></a>00010
<a name="l00011"></a>00011 m4_include(`SQLCommon.m4<span class="stringliteral">&#39;)</span>
<a name="l00012"></a>00012 <span class="stringliteral"></span><span class="comment"></span>
<a name="l00013"></a>00013 <span class="comment">/**</span>
<a name="l00014"></a>00014 <span class="comment"> * @brief This function creates a human readable view of the results of Viterbi function</span>
<a name="l00015"></a>00015 <span class="comment"> * @param segtbl Name of table containing all the testing sentences.</span>
<a name="l00016"></a>00016 <span class="comment"> * @param labeltbl Name of table containing all the labels in the label space.</span>
<a name="l00017"></a>00017 <span class="comment"> * @param result_tbl Name of table storing the best label sequence and the conditional probability.</span>
<a name="l00018"></a>00018 <span class="comment"> * @param vw Name of the human readable view of output.</span>
<a name="l00019"></a>00019 <span class="comment">*/</span>
<a name="l00020"></a>00020
<a name="l00021"></a>00021 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.vcrf_top1_view (segtbl text, labeltbl text, result_tbl text, vw text) returns text AS
<a name="l00022"></a>00022 $$
<a name="l00023"></a>00023 rv = plpy.execute(&#39;SELECT COUNT(*) AS total FROM <span class="stringliteral">&#39; + labeltbl);</span>
<a name="l00024"></a>00024 <span class="stringliteral">nlabel = rv[0][&#39;</span>total<span class="stringliteral">&#39;]</span>
<a name="l00025"></a>00025 <span class="stringliteral">query = &quot;&quot;&quot;create view &quot;&quot;&quot; + vw + &quot;&quot;&quot; AS</span>
<a name="l00026"></a>00026 <span class="stringliteral"> select segs.doc_id, start_pos, seg_text, L.label, (L.id+1) as id, (result.label[max_pos+2]::float/1000000) as prob</span>
<a name="l00027"></a>00027 <span class="stringliteral"> from &quot;&quot;&quot; + segtbl + &quot;&quot;&quot; segs, &quot;&quot;&quot; + labeltbl + &quot;&quot;&quot; L, &quot;&quot;&quot; + result_tbl + &quot;&quot;&quot; result</span>
<a name="l00028"></a>00028 <span class="stringliteral"> where result.label[segs.start_pos+1]=L.id and segs.doc_id=result.doc_id</span>
<a name="l00029"></a>00029 <span class="stringliteral"> order by doc_id, start_pos;&quot;&quot;&quot;</span>
<a name="l00030"></a>00030 <span class="stringliteral">plpy.execute(query)</span>
<a name="l00031"></a>00031 <span class="stringliteral">return vw</span>
<a name="l00032"></a>00032 <span class="stringliteral">$$ language plpythonu strict;</span>
<a name="l00033"></a>00033 <span class="stringliteral"></span><span class="comment"></span>
<a name="l00034"></a>00034 <span class="comment">/**</span>
<a name="l00035"></a>00035 <span class="comment"> * @brief This function implements the Viterbi algorithm which takes the sentence to be label as input and return the top1 labeling for that sentence </span>
<a name="l00036"></a>00036 <span class="comment"> * @param marray Name of arrays containing m factors</span>
<a name="l00037"></a>00037 <span class="comment"> * @param rarray Name of arrays containing r factors</span>
<a name="l00038"></a>00038 <span class="comment"> * @param nlabel Total number of labels in the label space</span>
<a name="l00039"></a>00039 <span class="comment"> * @returns the top1 label sequence, the last two elements in the array is used to calculate the top1 probability </span>
<a name="l00040"></a>00040 <span class="comment"> */</span>
<a name="l00041"></a>00041 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.vcrf_top1_label(mArray int[], rArray int[], nlabel int)
<a name="l00042"></a>00042 returns int[] as &#39;MODULE_PATHNAME<span class="stringliteral">&#39; language c strict;</span>
<a name="l00043"></a><a class="code" href="viterbi_8sql__in.html#a5949d70b666badace9f7b0c28b77f9de">00043</a> <span class="stringliteral"></span>
<a name="l00044"></a>00044 <span class="stringliteral"></span><span class="comment"></span>
<a name="l00045"></a>00045 <span class="comment">/**</span>
<a name="l00046"></a>00046 <span class="comment"> * @brief This function prepares the inputs for the c function &#39;vcrf_top1_label&#39; and invoke the c function. </span>
<a name="l00047"></a>00047 <span class="comment"> * @param segtbl Name of table containing all the testing sentences.</span>
<a name="l00048"></a>00048 <span class="comment"> * @param factor_mtbl Name of table containing all the m factors.</span>
<a name="l00049"></a>00049 <span class="comment"> * @param factor_rtbl Name of table containing all the r factors.</span>
<a name="l00050"></a>00050 <span class="comment"> * @param labeltbl Name of table containing all the labels in the label space.</span>
<a name="l00051"></a>00051 <span class="comment"> * @param resulttbl Name of table to store the output</span>
<a name="l00052"></a>00052 <span class="comment"> * @returns the top1 label sequence, the last two elements in the array is used to calculate the top1 probability </span>
<a name="l00053"></a>00053 <span class="comment"> */</span>
<a name="l00054"></a>00054
<a name="l00055"></a>00055 CREATE OR REPLACE FUNCTION
<a name="l00056"></a>00056 MADLIB_SCHEMA.vcrf_label(segtbl text, factor_mtbl text, factor_rtbl text, labeltbl text, resulttbl text) RETURNS text AS
<a name="l00057"></a>00057 $$
<a name="l00058"></a>00058 origClientMinMessages = plpy.execute(&quot;SELECT setting AS setting FROM pg_settings WHERE name = \&#39;client_min_messages\&#39;;&quot;)
<a name="l00059"></a>00059 plpy.execute(&quot;SET client_min_messages TO warning;&quot;)
<a name="l00060"></a>00060
<a name="l00061"></a>00061 plpy.execute(&quot;SELECT MADLIB_SCHEMA.create_schema_pg_temp();&quot;);
<a name="l00062"></a>00062
<a name="l00063"></a><a class="code" href="viterbi_8sql__in.html#a6b53e65f31c716966daf7596e449863a">00063</a> m_factors = &quot;pg_temp._madlib_m_factors&quot;
<a name="l00064"></a>00064 r_factors = &quot;pg_temp._madlib_r_factors&quot;
<a name="l00065"></a>00065 segtbl_digits = &quot;pg_temp._madlib_segtbl_digits&quot;
<a name="l00066"></a>00066 resulttbl_raw = &quot;pg_temp._madlib_&quot; + resulttbl + &quot;_raw&quot;
<a name="l00067"></a>00067
<a name="l00068"></a>00068 plpy.execute(&quot;&quot;&quot;DROP TABLE IF EXISTS &quot;&quot;&quot; + m_factors + &quot;&quot;&quot;,&quot;&quot;&quot; + r_factors + &quot;&quot;&quot;,&quot;&quot;&quot; + segtbl_digits + &quot;&quot;&quot;,&quot;&quot;&quot; + resulttbl_raw + &quot;&quot;&quot;;&quot;&quot;&quot;)
<a name="l00069"></a>00069 plpy.execute(&quot;&quot;&quot;CREATE TABLE &quot;&quot;&quot; + resulttbl_raw + &quot;&quot;&quot;(doc_id integer, label integer[]);&quot;&quot;&quot;)
<a name="l00070"></a>00070
<a name="l00071"></a>00071 plpy.execute(&quot;&quot;&quot;SET client_min_messages TO &quot;&quot;&quot; + str(origClientMinMessages[0][&#39;setting<span class="stringliteral">&#39;]) + &quot;&quot;&quot;;&quot;&quot;&quot;)</span>
<a name="l00072"></a>00072 <span class="stringliteral"></span>
<a name="l00073"></a>00073 <span class="stringliteral"> # replace digits with &quot;DIGIT&quot; keyword</span>
<a name="l00074"></a>00074 <span class="stringliteral"> plpy.execute(&quot;&quot;&quot;SELECT start_pos,doc_id,seg_text,max_pos INTO &quot;&quot;&quot; + segtbl_digits + &quot;&quot;&quot; FROM &quot;&quot;&quot; + segtbl + &quot;&quot;&quot; WHERE</span>
<a name="l00075"></a>00075 <span class="stringliteral"> NOT (seg_text ~ E&#39;</span>^[-+]?([0-9]{1,3}[,]?)*[0-9]{1,3}$<span class="stringliteral">&#39; OR seg_text ~ E&#39;</span>^[-+]?[0-9]*[.][0-9]+$<span class="stringliteral">&#39;);&quot;&quot;&quot;)</span>
<a name="l00076"></a>00076 <span class="stringliteral"> plpy.execute(&quot;&quot;&quot;INSERT INTO &quot;&quot;&quot; + segtbl_digits + &quot;&quot;&quot; SELECT start_pos,doc_id,&#39;</span>DIGIT<span class="stringliteral">&#39;,max_pos FROM &quot;&quot;&quot; + segtbl + &quot;&quot;&quot; WHERE </span>
<a name="l00077"></a><a class="code" href="viterbi_8sql__in.html#a1356ba1427d1f466975b40e76c431abc">00077</a> <span class="stringliteral"> seg_text ~ E&#39;</span>^[-+]?([0-9]{1,3}[,]?)*[0-9]{1,3}$<span class="stringliteral">&#39; OR seg_text ~E&#39;</span>^[-+]?[0-9]*[.][0-9]+$<span class="stringliteral">&#39;;&quot;&quot;&quot;)</span>
<a name="l00078"></a>00078 <span class="stringliteral"></span>
<a name="l00079"></a>00079 <span class="stringliteral"> query = &quot;&quot;&quot;</span>
<a name="l00080"></a>00080 <span class="stringliteral"> -- for each sentence, store array representation of r_factors</span>
<a name="l00081"></a>00081 <span class="stringliteral">m4_ifdef(`__HAS_ORDERED_AGGREGATES__&#39;</span>, `
<a name="l00082"></a>00082 select doc_id, <a class="code" href="array__ops_8sql__in.html#af9f60293134ba4ce05b1f97f6faba822" title="ARRAY_AGG aggregate for compatibility with GPDB &lt; 4.1 and Postgres &lt; 9.0 This is a slower solution th...">array_agg</a>(score order by start_pos, label) as score
<a name="l00083"></a>00083 &#39;, `
<a name="l00084"></a>00084 select doc_id, array(
<a name="l00085"></a>00085 select score
<a name="l00086"></a>00086 from &quot;&quot;&quot; + factor_rtbl + &quot;&quot;&quot; factors,
<a name="l00087"></a>00087 &quot;&quot;&quot; + segtbl_digits + &quot;&quot;&quot; seg
<a name="l00088"></a>00088 where factors.seg_text = seg.seg_text
<a name="l00089"></a>00089 and doc_id = ss.doc_id
<a name="l00090"></a>00090 order by start_pos, label
<a name="l00091"></a>00091 ) as score
<a name="l00092"></a>00092 &#39;)
<a name="l00093"></a>00093 into &quot;&quot;&quot; + r_factors + &quot;&quot;&quot;
<a name="l00094"></a>00094 from (select doc_id, start_pos, label, score
<a name="l00095"></a>00095 from &quot;&quot;&quot; + factor_rtbl + &quot;&quot;&quot; factors,
<a name="l00096"></a>00096 &quot;&quot;&quot; + segtbl_digits + &quot;&quot;&quot; seg
<a name="l00097"></a>00097 where factors.seg_text=seg.seg_text) as ss
<a name="l00098"></a>00098 group by doc_id
<a name="l00099"></a>00099 order by doc_id;&quot;&quot;&quot;
<a name="l00100"></a>00100 plpy.execute(query)
<a name="l00101"></a>00101 plpy.execute(&quot;analyze &quot; + r_factors + &quot;;&quot;)
<a name="l00102"></a>00102
<a name="l00103"></a>00103 query = &quot;&quot;&quot;
<a name="l00104"></a>00104 -- array representation of m_factor
<a name="l00105"></a>00105 select score
<a name="l00106"></a>00106 into &quot;&quot;&quot; + m_factors + &quot;&quot;&quot;
<a name="l00107"></a>00107 from (select score
<a name="l00108"></a>00108 from &quot;&quot;&quot; + factor_mtbl + &quot;&quot;&quot; factors) as ss; &quot;&quot;&quot;
<a name="l00109"></a>00109 plpy.execute(query)
<a name="l00110"></a>00110
<a name="l00111"></a>00111 rv = plpy.execute(&#39;SELECT COUNT(*) AS total FROM &#39; + labeltbl);
<a name="l00112"></a>00112 nlabel = rv[0][&#39;total&#39;]
<a name="l00113"></a>00113
<a name="l00114"></a>00114 query = (&quot;&quot;&quot; INSERT INTO &quot;&quot;&quot; + resulttbl_raw + &quot;&quot;&quot;
<a name="l00115"></a>00115 SELECT doc_id, MADLIB_SCHEMA.<a class="code" href="viterbi_8sql__in.html#a6b53e65f31c716966daf7596e449863a" title="This function implements the Viterbi algorithm which takes the sentence to be label as input and retu...">vcrf_top1_label</a>(mfactors.score, rfactors.score, &quot;&quot;&quot; + str(nlabel) + &quot;&quot;&quot; )
<a name="l00116"></a>00116 FROM &quot;&quot;&quot; + m_factors + &quot;&quot;&quot; mfactors, &quot;&quot;&quot; + r_factors + &quot;&quot;&quot; rfactors;&quot;&quot;&quot;)
<a name="l00117"></a>00117
<a name="l00118"></a>00118 plpy.execute(query);
<a name="l00119"></a>00119
<a name="l00120"></a>00120 query = &quot;SELECT * FROM MADLIB_SCHEMA.<a class="code" href="viterbi_8sql__in.html#a5949d70b666badace9f7b0c28b77f9de" title="This function creates a human readable view of the results of Viterbi function.">vcrf_top1_view</a>(\&#39;&quot; + segtbl + &quot;\&#39;, \&#39;&quot; + labeltbl + &quot;\&#39;, \&#39;&quot; + resulttbl_raw + &quot;\&#39;, \&#39;&quot; + resulttbl + &quot;\&#39;);&quot;
<a name="l00121"></a>00121 plpy.execute(query);
<a name="l00122"></a>00122
<a name="l00123"></a>00123 $$ LANGUAGE plpythonu STRICT;
</pre></div></div>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="viterbi_8sql__in.html">viterbi.sql_in</a> </li>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a></div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<li class="footer">Generated on Fri May 10 2013 01:37:13 for MADlib by
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.5.1 </li>
</ul>
</div>
</body>
</html>