| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_121) on Fri Apr 14 22:11:37 PDT 2017 --> |
| <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
| <title>TFIDFConverter (Mahout Map-Reduce 0.13.0 API)</title> |
| <meta name="date" content="2017-04-14"> |
| <link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="TFIDFConverter (Mahout Map-Reduce 0.13.0 API)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":9,"i1":9}; |
| var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/TFIDFConverter.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li>Prev Class</li> |
| <li><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.html" title="class in org.apache.mahout.vectorizer.tfidf"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../index.html?org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html" target="_top">Frames</a></li> |
| <li><a href="TFIDFConverter.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li><a href="#field.summary">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li><a href="#field.detail">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.mahout.vectorizer.tfidf</div> |
| <h2 title="Class TFIDFConverter" class="title">Class TFIDFConverter</h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">java.lang.Object</a></li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.mahout.vectorizer.tfidf.TFIDFConverter</li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <hr> |
| <br> |
| <pre>public final class <span class="typeNameLabel">TFIDFConverter</span> |
| extends <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a></pre> |
| <div class="block">This class converts a set of input vectors with term frequencies to TfIdf vectors. The Sequence file input |
| should have a <code>WritableComparable</code> key containing and a |
| <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/math/VectorWritable.html?is-external=true" title="class or interface in org.apache.mahout.math"><code>VectorWritable</code></a> value containing the |
| term frequency vector. This is conversion class uses multiple map/reduces to convert the vectors to TfIdf |
| format</div> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- =========== FIELD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="field.summary"> |
| <!-- --> |
| </a> |
| <h3>Field Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Field Summary table, listing fields, and an explanation"> |
| <caption><span>Fields</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Field and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#FEATURE_COUNT">FEATURE_COUNT</a></span></code> </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#FREQUENCY_FILE">FREQUENCY_FILE</a></span></code> </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#MAX_DF">MAX_DF</a></span></code> </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#MIN_DF">MIN_DF</a></span></code> </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#VECTOR_COUNT">VECTOR_COUNT</a></span></code> </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#WORDCOUNT_OUTPUT_FOLDER">WORDCOUNT_OUTPUT_FOLDER</a></span></code> </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>static <a href="../../../../../org/apache/mahout/common/Pair.html" title="class in org.apache.mahout.common">Pair</a><<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Long.html?is-external=true" title="class or interface in java.lang">Long</a>[],<a href="http://docs.oracle.com/javase/7/docs/api/java/util/List.html?is-external=true" title="class or interface in java.util">List</a><org.apache.hadoop.fs.Path>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#calculateDF-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.hadoop.conf.Configuration-int-">calculateDF</a></span>(org.apache.hadoop.fs.Path input, |
| org.apache.hadoop.fs.Path output, |
| org.apache.hadoop.conf.Configuration baseConf, |
| int chunkSizeInMegabytes)</code> |
| <div class="block">Calculates the document frequencies of all terms from the input set of vectors in |
| <code>SequenceFile</code> format.</div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code>static void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html#processTfIdf-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.hadoop.conf.Configuration-org.apache.mahout.common.Pair-int-long-float-boolean-boolean-boolean-int-">processTfIdf</a></span>(org.apache.hadoop.fs.Path input, |
| org.apache.hadoop.fs.Path output, |
| org.apache.hadoop.conf.Configuration baseConf, |
| <a href="../../../../../org/apache/mahout/common/Pair.html" title="class in org.apache.mahout.common">Pair</a><<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Long.html?is-external=true" title="class or interface in java.lang">Long</a>[],<a href="http://docs.oracle.com/javase/7/docs/api/java/util/List.html?is-external=true" title="class or interface in java.util">List</a><org.apache.hadoop.fs.Path>> datasetFeatures, |
| int minDf, |
| long maxDF, |
| float normPower, |
| boolean logNormalize, |
| boolean sequentialAccessOutput, |
| boolean namedVector, |
| int numReducers)</code> |
| <div class="block">Create Term Frequency-Inverse Document Frequency (Tf-Idf) Vectors from the input set of vectors in |
| <code>SequenceFile</code> format.</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.java.lang.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class java.lang.<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a></h3> |
| <code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#clone--" title="class or interface in java.lang">clone</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#equals-java.lang.Object-" title="class or interface in java.lang">equals</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#finalize--" title="class or interface in java.lang">finalize</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#getClass--" title="class or interface in java.lang">getClass</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#hashCode--" title="class or interface in java.lang">hashCode</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notify--" title="class or interface in java.lang">notify</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notifyAll--" title="class or interface in java.lang">notifyAll</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#toString--" title="class or interface in java.lang">toString</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait--" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-int-" title="class or interface in java.lang">wait</a></code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ============ FIELD DETAIL =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="field.detail"> |
| <!-- --> |
| </a> |
| <h3>Field Detail</h3> |
| <a name="VECTOR_COUNT"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>VECTOR_COUNT</h4> |
| <pre>public static final <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> VECTOR_COUNT</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.VECTOR_COUNT">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="FEATURE_COUNT"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>FEATURE_COUNT</h4> |
| <pre>public static final <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> FEATURE_COUNT</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.FEATURE_COUNT">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="MIN_DF"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>MIN_DF</h4> |
| <pre>public static final <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> MIN_DF</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.MIN_DF">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="MAX_DF"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>MAX_DF</h4> |
| <pre>public static final <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> MAX_DF</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.MAX_DF">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="FREQUENCY_FILE"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>FREQUENCY_FILE</h4> |
| <pre>public static final <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> FREQUENCY_FILE</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.FREQUENCY_FILE">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="WORDCOUNT_OUTPUT_FOLDER"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>WORDCOUNT_OUTPUT_FOLDER</h4> |
| <pre>public static final <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> WORDCOUNT_OUTPUT_FOLDER</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../../constant-values.html#org.apache.mahout.vectorizer.tfidf.TFIDFConverter.WORDCOUNT_OUTPUT_FOLDER">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="processTfIdf-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.hadoop.conf.Configuration-org.apache.mahout.common.Pair-int-long-float-boolean-boolean-boolean-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>processTfIdf</h4> |
| <pre>public static void processTfIdf(org.apache.hadoop.fs.Path input, |
| org.apache.hadoop.fs.Path output, |
| org.apache.hadoop.conf.Configuration baseConf, |
| <a href="../../../../../org/apache/mahout/common/Pair.html" title="class in org.apache.mahout.common">Pair</a><<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Long.html?is-external=true" title="class or interface in java.lang">Long</a>[],<a href="http://docs.oracle.com/javase/7/docs/api/java/util/List.html?is-external=true" title="class or interface in java.util">List</a><org.apache.hadoop.fs.Path>> datasetFeatures, |
| int minDf, |
| long maxDF, |
| float normPower, |
| boolean logNormalize, |
| boolean sequentialAccessOutput, |
| boolean namedVector, |
| int numReducers) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></pre> |
| <div class="block">Create Term Frequency-Inverse Document Frequency (Tf-Idf) Vectors from the input set of vectors in |
| <code>SequenceFile</code> format. This job uses a fixed limit on the maximum memory used by the feature chunk |
| per node thereby splitting the process across multiple map/reduces. |
| Before using this method calculateDF should be called</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>input</code> - input directory of the vectors in <code>SequenceFile</code> format</dd> |
| <dd><code>output</code> - output directory where <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/math/RandomAccessSparseVector.html?is-external=true" title="class or interface in org.apache.mahout.math"><code>RandomAccessSparseVector</code></a>'s of the document |
| are generated</dd> |
| <dd><code>datasetFeatures</code> - Document frequencies information calculated by calculateDF</dd> |
| <dd><code>minDf</code> - The minimum document frequency. Default 1</dd> |
| <dd><code>maxDF</code> - The max percentage of vectors for the DF. Can be used to remove really high frequency features. |
| Expressed as an integer between 0 and 100. Default 99</dd> |
| <dd><code>numReducers</code> - The number of reducers to spawn. This also affects the possible parallelism since each reducer |
| will typically produce a single output file containing tf-idf vectors for a subset of the |
| documents in the corpus.</dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="calculateDF-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.hadoop.conf.Configuration-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>calculateDF</h4> |
| <pre>public static <a href="../../../../../org/apache/mahout/common/Pair.html" title="class in org.apache.mahout.common">Pair</a><<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Long.html?is-external=true" title="class or interface in java.lang">Long</a>[],<a href="http://docs.oracle.com/javase/7/docs/api/java/util/List.html?is-external=true" title="class or interface in java.util">List</a><org.apache.hadoop.fs.Path>> calculateDF(org.apache.hadoop.fs.Path input, |
| org.apache.hadoop.fs.Path output, |
| org.apache.hadoop.conf.Configuration baseConf, |
| int chunkSizeInMegabytes) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></pre> |
| <div class="block">Calculates the document frequencies of all terms from the input set of vectors in |
| <code>SequenceFile</code> format. This job uses a fixed limit on the maximum memory used by the feature chunk |
| per node thereby splitting the process across multiple map/reduces.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>input</code> - input directory of the vectors in <code>SequenceFile</code> format</dd> |
| <dd><code>output</code> - output directory where document frequencies will be stored</dd> |
| <dd><code>chunkSizeInMegabytes</code> - the size in MB of the feature => id chunk to be kept in memory at each node during Map/Reduce |
| stage. Its recommended you calculated this based on the number of cores and the free memory |
| available to you per node. Say, you have 2 cores and around 1GB extra memory to spare we |
| recommend you use a split size of around 400-500MB so that two simultaneous reducers can create |
| partial vectors without thrashing the system due to increased swapping</dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/TFIDFConverter.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li>Prev Class</li> |
| <li><a href="../../../../../org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.html" title="class in org.apache.mahout.vectorizer.tfidf"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../index.html?org/apache/mahout/vectorizer/tfidf/TFIDFConverter.html" target="_top">Frames</a></li> |
| <li><a href="TFIDFConverter.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li><a href="#field.summary">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li><a href="#field.detail">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| <p class="legalCopy"><small>Copyright © 2008–2017 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</small></p> |
| </body> |
| </html> |