blob: f8583b66690ca0a5ff9dafc7a7ffdb393ed21806 [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Fri Apr 14 22:11:37 PDT 2017 -->
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>TFIDFConverter (Mahout Map-Reduce 0.13.0 API)</title>
<meta name="date" content="2017-04-14">
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="TFIDFConverter (Mahout Map-Reduce 0.13.0 API)";
}
}
catch(err) {
}
//-->
var methods = {"i0":9,"i1":9};
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="class-use/TFIDFConverter.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Class</li>
<li><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.html" title="class in org.apache.mahout.vectorizer.tfidf"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html" target="_top">Frames</a></li>
<li><a href="TFIDFConverter.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li><a href="#field.summary">Field</a>&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li><a href="#field.detail">Field</a>&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.mahout.vectorizer.tfidf</div>
<h2 title="Class TFIDFConverter" class="title">Class TFIDFConverter</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">java.lang.Object</a></li>
<li>
<ul class="inheritance">
<li>org.apache.mahout.vectorizer.tfidf.TFIDFConverter</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<hr>
<br>
<pre>public final class <span class="typeNameLabel">TFIDFConverter</span>
extends <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a></pre>
<div class="block">This class converts a set of input vectors with term frequencies to TfIdf vectors. The Sequence file input
should have a <code>WritableComparable</code> key containing and a
<a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/math/VectorWritable.html?is-external=true" title="class or interface in org.apache.mahout.math"><code>VectorWritable</code></a> value containing the
term frequency vector. This is conversion class uses multiple map/reduces to convert the vectors to TfIdf
format</div>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- =========== FIELD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="field.summary">
<!-- -->
</a>
<h3>Field Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Field Summary table, listing fields, and an explanation">
<caption><span>Fields</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Field and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#FEATURE_COUNT">FEATURE_COUNT</a></span></code>&nbsp;</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#FREQUENCY_FILE">FREQUENCY_FILE</a></span></code>&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#MAX_DF">MAX_DF</a></span></code>&nbsp;</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#MIN_DF">MIN_DF</a></span></code>&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#VECTOR_COUNT">VECTOR_COUNT</a></span></code>&nbsp;</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#WORDCOUNT_OUTPUT_FOLDER">WORDCOUNT_OUTPUT_FOLDER</a></span></code>&nbsp;</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>static <a href="../../../../../org/apache/mahout/common/Pair.html" title="class in org.apache.mahout.common">Pair</a>&lt;<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Long.html?is-external=true" title="class or interface in java.lang">Long</a>[],<a href="http://docs.oracle.com/javase/7/docs/api/java/util/List.html?is-external=true" title="class or interface in java.util">List</a>&lt;org.apache.hadoop.fs.Path&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#calculateDF-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.hadoop.conf.Configuration-int-">calculateDF</a></span>(org.apache.hadoop.fs.Path&nbsp;input,
org.apache.hadoop.fs.Path&nbsp;output,
org.apache.hadoop.conf.Configuration&nbsp;baseConf,
int&nbsp;chunkSizeInMegabytes)</code>
<div class="block">Calculates the document frequencies of all terms from the input set of vectors in
<code>SequenceFile</code> format.</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>static void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#processTfIdf-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.hadoop.conf.Configuration-org.apache.mahout.common.Pair-int-long-float-boolean-boolean-boolean-int-">processTfIdf</a></span>(org.apache.hadoop.fs.Path&nbsp;input,
org.apache.hadoop.fs.Path&nbsp;output,
org.apache.hadoop.conf.Configuration&nbsp;baseConf,
<a href="../../../../../org/apache/mahout/common/Pair.html" title="class in org.apache.mahout.common">Pair</a>&lt;<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Long.html?is-external=true" title="class or interface in java.lang">Long</a>[],<a href="http://docs.oracle.com/javase/7/docs/api/java/util/List.html?is-external=true" title="class or interface in java.util">List</a>&lt;org.apache.hadoop.fs.Path&gt;&gt;&nbsp;datasetFeatures,
int&nbsp;minDf,
long&nbsp;maxDF,
float&nbsp;normPower,
boolean&nbsp;logNormalize,
boolean&nbsp;sequentialAccessOutput,
boolean&nbsp;namedVector,
int&nbsp;numReducers)</code>
<div class="block">Create Term Frequency-Inverse Document Frequency (Tf-Idf) Vectors from the input set of vectors in
<code>SequenceFile</code> format.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a></h3>
<code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#clone--" title="class or interface in java.lang">clone</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#equals-java.lang.Object-" title="class or interface in java.lang">equals</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#finalize--" title="class or interface in java.lang">finalize</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#getClass--" title="class or interface in java.lang">getClass</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#hashCode--" title="class or interface in java.lang">hashCode</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notify--" title="class or interface in java.lang">notify</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notifyAll--" title="class or interface in java.lang">notifyAll</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#toString--" title="class or interface in java.lang">toString</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait--" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-int-" title="class or interface in java.lang">wait</a></code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ============ FIELD DETAIL =========== -->
<ul class="blockList">
<li class="blockList"><a name="field.detail">
<!-- -->
</a>
<h3>Field Detail</h3>
<a name="VECTOR_COUNT">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>VECTOR_COUNT</h4>
<pre>public static final&nbsp;<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> VECTOR_COUNT</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.VECTOR_COUNT">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a name="FEATURE_COUNT">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>FEATURE_COUNT</h4>
<pre>public static final&nbsp;<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> FEATURE_COUNT</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.FEATURE_COUNT">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a name="MIN_DF">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MIN_DF</h4>
<pre>public static final&nbsp;<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> MIN_DF</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.MIN_DF">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a name="MAX_DF">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MAX_DF</h4>
<pre>public static final&nbsp;<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> MAX_DF</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.MAX_DF">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a name="FREQUENCY_FILE">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>FREQUENCY_FILE</h4>
<pre>public static final&nbsp;<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> FREQUENCY_FILE</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.FREQUENCY_FILE">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a name="WORDCOUNT_OUTPUT_FOLDER">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>WORDCOUNT_OUTPUT_FOLDER</h4>
<pre>public static final&nbsp;<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> WORDCOUNT_OUTPUT_FOLDER</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.WORDCOUNT_OUTPUT_FOLDER">Constant Field Values</a></dd>
</dl>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="processTfIdf-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.hadoop.conf.Configuration-org.apache.mahout.common.Pair-int-long-float-boolean-boolean-boolean-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>processTfIdf</h4>
<pre>public static&nbsp;void&nbsp;processTfIdf(org.apache.hadoop.fs.Path&nbsp;input,
org.apache.hadoop.fs.Path&nbsp;output,
org.apache.hadoop.conf.Configuration&nbsp;baseConf,
<a href="../../../../../org/apache/mahout/common/Pair.html" title="class in org.apache.mahout.common">Pair</a>&lt;<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Long.html?is-external=true" title="class or interface in java.lang">Long</a>[],<a href="http://docs.oracle.com/javase/7/docs/api/java/util/List.html?is-external=true" title="class or interface in java.util">List</a>&lt;org.apache.hadoop.fs.Path&gt;&gt;&nbsp;datasetFeatures,
int&nbsp;minDf,
long&nbsp;maxDF,
float&nbsp;normPower,
boolean&nbsp;logNormalize,
boolean&nbsp;sequentialAccessOutput,
boolean&nbsp;namedVector,
int&nbsp;numReducers)
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></pre>
<div class="block">Create Term Frequency-Inverse Document Frequency (Tf-Idf) Vectors from the input set of vectors in
<code>SequenceFile</code> format. This job uses a fixed limit on the maximum memory used by the feature chunk
per node thereby splitting the process across multiple map/reduces.
Before using this method calculateDF should be called</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>input</code> - input directory of the vectors in <code>SequenceFile</code> format</dd>
<dd><code>output</code> - output directory where <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/math/RandomAccessSparseVector.html?is-external=true" title="class or interface in org.apache.mahout.math"><code>RandomAccessSparseVector</code></a>'s of the document
are generated</dd>
<dd><code>datasetFeatures</code> - Document frequencies information calculated by calculateDF</dd>
<dd><code>minDf</code> - The minimum document frequency. Default 1</dd>
<dd><code>maxDF</code> - The max percentage of vectors for the DF. Can be used to remove really high frequency features.
Expressed as an integer between 0 and 100. Default 99</dd>
<dd><code>numReducers</code> - The number of reducers to spawn. This also affects the possible parallelism since each reducer
will typically produce a single output file containing tf-idf vectors for a subset of the
documents in the corpus.</dd>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd>
</dl>
</li>
</ul>
<a name="calculateDF-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.hadoop.conf.Configuration-int-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>calculateDF</h4>
<pre>public static&nbsp;<a href="../../../../../org/apache/mahout/common/Pair.html" title="class in org.apache.mahout.common">Pair</a>&lt;<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Long.html?is-external=true" title="class or interface in java.lang">Long</a>[],<a href="http://docs.oracle.com/javase/7/docs/api/java/util/List.html?is-external=true" title="class or interface in java.util">List</a>&lt;org.apache.hadoop.fs.Path&gt;&gt;&nbsp;calculateDF(org.apache.hadoop.fs.Path&nbsp;input,
org.apache.hadoop.fs.Path&nbsp;output,
org.apache.hadoop.conf.Configuration&nbsp;baseConf,
int&nbsp;chunkSizeInMegabytes)
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></pre>
<div class="block">Calculates the document frequencies of all terms from the input set of vectors in
<code>SequenceFile</code> format. This job uses a fixed limit on the maximum memory used by the feature chunk
per node thereby splitting the process across multiple map/reduces.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>input</code> - input directory of the vectors in <code>SequenceFile</code> format</dd>
<dd><code>output</code> - output directory where document frequencies will be stored</dd>
<dd><code>chunkSizeInMegabytes</code> - the size in MB of the feature => id chunk to be kept in memory at each node during Map/Reduce
stage. Its recommended you calculated this based on the number of cores and the free memory
available to you per node. Say, you have 2 cores and around 1GB extra memory to spare we
recommend you use a split size of around 400-500MB so that two simultaneous reducers can create
partial vectors without thrashing the system due to increased swapping</dd>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="class-use/TFIDFConverter.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Class</li>
<li><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.html" title="class in org.apache.mahout.vectorizer.tfidf"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html" target="_top">Frames</a></li>
<li><a href="TFIDFConverter.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li><a href="#field.summary">Field</a>&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li><a href="#field.detail">Field</a>&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<p class="legalCopy"><small>Copyright &#169; 2008&#x2013;2017 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</small></p>
</body>
</html>