blob: cf9da0ce68cba021b6a587d0dfc9795b7e6d9689 [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Fri Apr 14 22:11:37 PDT 2017 -->
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>CollocMapper (Mahout Map-Reduce 0.13.0 API)</title>
<meta name="date" content="2017-04-14">
<link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="CollocMapper (Mahout Map-Reduce 0.13.0 API)";
}
}
catch(err) {
}
//-->
var methods = {"i0":10,"i1":10};
var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="class-use/CollocMapper.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/CollocDriver.html" title="class in org.apache.mahout.vectorizer.collocations.llr"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/CollocMapper.Count.html" title="enum in org.apache.mahout.vectorizer.collocations.llr"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../index.html?org/apache/mahout/vectorizer/collocations/llr/CollocMapper.html" target="_top">Frames</a></li>
<li><a href="CollocMapper.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li><a href="#field.summary">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li><a href="#field.detail">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.mahout.vectorizer.collocations.llr</div>
<h2 title="Class CollocMapper" class="title">Class CollocMapper</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">java.lang.Object</a></li>
<li>
<ul class="inheritance">
<li>org.apache.hadoop.mapreduce.Mapper&lt;org.apache.hadoop.io.Text,<a href="../../../../../../org/apache/mahout/common/StringTuple.html" title="class in org.apache.mahout.common">StringTuple</a>,<a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/GramKey.html" title="class in org.apache.mahout.vectorizer.collocations.llr">GramKey</a>,<a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/Gram.html" title="class in org.apache.mahout.vectorizer.collocations.llr">Gram</a>&gt;</li>
<li>
<ul class="inheritance">
<li>org.apache.mahout.vectorizer.collocations.llr.CollocMapper</li>
</ul>
</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<hr>
<br>
<pre>public class <span class="typeNameLabel">CollocMapper</span>
extends org.apache.hadoop.mapreduce.Mapper&lt;org.apache.hadoop.io.Text,<a href="../../../../../../org/apache/mahout/common/StringTuple.html" title="class in org.apache.mahout.common">StringTuple</a>,<a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/GramKey.html" title="class in org.apache.mahout.vectorizer.collocations.llr">GramKey</a>,<a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/Gram.html" title="class in org.apache.mahout.vectorizer.collocations.llr">Gram</a>&gt;</pre>
<div class="block">Pass 1 of the Collocation discovery job which generated ngrams and emits ngrams an their component n-1grams.
Input is a SequeceFile<Text,StringTuple>, where the key is a document id and the value is the tokenized documents.
<p/></div>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== NESTED CLASS SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="nested.class.summary">
<!-- -->
</a>
<h3>Nested Class Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation">
<caption><span>Nested Classes</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Class and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/CollocMapper.Count.html" title="enum in org.apache.mahout.vectorizer.collocations.llr">CollocMapper.Count</a></span></code>&nbsp;</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="nested.classes.inherited.from.class.org.apache.hadoop.mapreduce.Mapper">
<!-- -->
</a>
<h3>Nested classes/interfaces inherited from class&nbsp;org.apache.hadoop.mapreduce.Mapper</h3>
<code>org.apache.hadoop.mapreduce.Mapper.Context</code></li>
</ul>
</li>
</ul>
<!-- =========== FIELD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="field.summary">
<!-- -->
</a>
<h3>Field Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Field Summary table, listing fields, and an explanation">
<caption><span>Fields</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Field and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/CollocMapper.html#MAX_SHINGLE_SIZE">MAX_SHINGLE_SIZE</a></span></code>&nbsp;</td>
</tr>
</table>
</li>
</ul>
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colOne" scope="col">Constructor and Description</th>
</tr>
<tr class="altColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/CollocMapper.html#CollocMapper--">CollocMapper</a></span>()</code>&nbsp;</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>protected void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/CollocMapper.html#map-org.apache.hadoop.io.Text-org.apache.mahout.common.StringTuple-org.apache.hadoop.mapreduce.Mapper.Context-">map</a></span>(org.apache.hadoop.io.Text&nbsp;key,
<a href="../../../../../../org/apache/mahout/common/StringTuple.html" title="class in org.apache.mahout.common">StringTuple</a>&nbsp;value,
org.apache.hadoop.mapreduce.Mapper.Context&nbsp;context)</code>
<div class="block">Collocation finder: pass 1 map phase.</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>protected void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/CollocMapper.html#setup-org.apache.hadoop.mapreduce.Mapper.Context-">setup</a></span>(org.apache.hadoop.mapreduce.Mapper.Context&nbsp;context)</code>&nbsp;</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.org.apache.hadoop.mapreduce.Mapper">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;org.apache.hadoop.mapreduce.Mapper</h3>
<code>cleanup, run</code></li>
</ul>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a></h3>
<code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#clone--" title="class or interface in java.lang">clone</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#equals-java.lang.Object-" title="class or interface in java.lang">equals</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#finalize--" title="class or interface in java.lang">finalize</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#getClass--" title="class or interface in java.lang">getClass</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#hashCode--" title="class or interface in java.lang">hashCode</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notify--" title="class or interface in java.lang">notify</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notifyAll--" title="class or interface in java.lang">notifyAll</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#toString--" title="class or interface in java.lang">toString</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait--" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-int-" title="class or interface in java.lang">wait</a></code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ============ FIELD DETAIL =========== -->
<ul class="blockList">
<li class="blockList"><a name="field.detail">
<!-- -->
</a>
<h3>Field Detail</h3>
<a name="MAX_SHINGLE_SIZE">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>MAX_SHINGLE_SIZE</h4>
<pre>public static final&nbsp;<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> MAX_SHINGLE_SIZE</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../../constant-values.html#org.apache.mahout.vectorizer.collocations.llr.CollocMapper.MAX_SHINGLE_SIZE">Constant Field Values</a></dd>
</dl>
</li>
</ul>
</li>
</ul>
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a name="CollocMapper--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>CollocMapper</h4>
<pre>public&nbsp;CollocMapper()</pre>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="map-org.apache.hadoop.io.Text-org.apache.mahout.common.StringTuple-org.apache.hadoop.mapreduce.Mapper.Context-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>map</h4>
<pre>protected&nbsp;void&nbsp;map(org.apache.hadoop.io.Text&nbsp;key,
<a href="../../../../../../org/apache/mahout/common/StringTuple.html" title="class in org.apache.mahout.common">StringTuple</a>&nbsp;value,
org.apache.hadoop.mapreduce.Mapper.Context&nbsp;context)
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></pre>
<div class="block">Collocation finder: pass 1 map phase.
<p/>
Receives a token stream which gets passed through a Lucene ShingleFilter. The ShingleFilter delivers ngrams of
the appropriate size which are then decomposed into head and tail subgrams which are collected in the
following manner
<p/>
<pre>
k:head_key, v:head_subgram
k:head_key,ngram_key, v:ngram
k:tail_key, v:tail_subgram
k:tail_key,ngram_key, v:ngram
</pre>
<p/>
The 'head' or 'tail' prefix is used to specify whether the subgram in question is the head or tail of the
ngram. In this implementation the head of the ngram is a (n-1)gram, and the tail is a (1)gram.
<p/>
For example, given 'click and clack' and an ngram length of 3:
<pre>
k: head_'click and' v:head_'click and'
k: head_'click and',ngram_'click and clack' v:ngram_'click and clack'
k: tail_'clack', v:tail_'clack'
k: tail_'clack',ngram_'click and clack' v:ngram_'click and clack'
</pre>
<p/>
Also counts the total number of ngrams encountered and adds it to the counter
CollocDriver.Count.NGRAM_TOTAL
</p></div>
<dl>
<dt><span class="overrideSpecifyLabel">Overrides:</span></dt>
<dd><code>map</code>&nbsp;in class&nbsp;<code>org.apache.hadoop.mapreduce.Mapper&lt;org.apache.hadoop.io.Text,<a href="../../../../../../org/apache/mahout/common/StringTuple.html" title="class in org.apache.mahout.common">StringTuple</a>,<a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/GramKey.html" title="class in org.apache.mahout.vectorizer.collocations.llr">GramKey</a>,<a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/Gram.html" title="class in org.apache.mahout.vectorizer.collocations.llr">Gram</a>&gt;</code></dd>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code> - if there's a problem with the ShingleFilter reading data or the collector collecting output.</dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd>
</dl>
</li>
</ul>
<a name="setup-org.apache.hadoop.mapreduce.Mapper.Context-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>setup</h4>
<pre>protected&nbsp;void&nbsp;setup(org.apache.hadoop.mapreduce.Mapper.Context&nbsp;context)
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></pre>
<dl>
<dt><span class="overrideSpecifyLabel">Overrides:</span></dt>
<dd><code>setup</code>&nbsp;in class&nbsp;<code>org.apache.hadoop.mapreduce.Mapper&lt;org.apache.hadoop.io.Text,<a href="../../../../../../org/apache/mahout/common/StringTuple.html" title="class in org.apache.mahout.common">StringTuple</a>,<a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/GramKey.html" title="class in org.apache.mahout.vectorizer.collocations.llr">GramKey</a>,<a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/Gram.html" title="class in org.apache.mahout.vectorizer.collocations.llr">Gram</a>&gt;</code></dd>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="class-use/CollocMapper.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/CollocDriver.html" title="class in org.apache.mahout.vectorizer.collocations.llr"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../../../org/apache/mahout/vectorizer/collocations/llr/CollocMapper.Count.html" title="enum in org.apache.mahout.vectorizer.collocations.llr"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../index.html?org/apache/mahout/vectorizer/collocations/llr/CollocMapper.html" target="_top">Frames</a></li>
<li><a href="CollocMapper.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li><a href="#field.summary">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li><a href="#field.detail">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<p class="legalCopy"><small>Copyright &#169; 2008&#x2013;2017 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</small></p>
</body>
</html>