| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_121) on Fri Apr 14 22:11:36 PDT 2017 --> |
| <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
| <title>DictionaryVectorizer (Mahout Map-Reduce 0.13.0 API)</title> |
| <meta name="date" content="2017-04-14"> |
| <link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="DictionaryVectorizer (Mahout Map-Reduce 0.13.0 API)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":9,"i1":10,"i2":9,"i3":10}; |
| var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/DictionaryVectorizer.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li>Prev Class</li> |
| <li><a href="../../../../org/apache/mahout/vectorizer/DocumentProcessor.html" title="class in org.apache.mahout.vectorizer"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/mahout/vectorizer/DictionaryVectorizer.html" target="_top">Frames</a></li> |
| <li><a href="DictionaryVectorizer.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li><a href="#field.summary">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li><a href="#field.detail">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.mahout.vectorizer</div> |
| <h2 title="Class DictionaryVectorizer" class="title">Class DictionaryVectorizer</h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">java.lang.Object</a></li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.hadoop.conf.Configured</li> |
| <li> |
| <ul class="inheritance"> |
| <li><a href="../../../../org/apache/mahout/common/AbstractJob.html" title="class in org.apache.mahout.common">org.apache.mahout.common.AbstractJob</a></li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.mahout.vectorizer.DictionaryVectorizer</li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <dl> |
| <dt>All Implemented Interfaces:</dt> |
| <dd>org.apache.hadoop.conf.Configurable, org.apache.hadoop.util.Tool, <a href="../../../../org/apache/mahout/vectorizer/Vectorizer.html" title="interface in org.apache.mahout.vectorizer">Vectorizer</a></dd> |
| </dl> |
| <hr> |
| <br> |
| <pre>public final class <span class="typeNameLabel">DictionaryVectorizer</span> |
| extends <a href="../../../../org/apache/mahout/common/AbstractJob.html" title="class in org.apache.mahout.common">AbstractJob</a> |
| implements <a href="../../../../org/apache/mahout/vectorizer/Vectorizer.html" title="interface in org.apache.mahout.vectorizer">Vectorizer</a></pre> |
| <div class="block">This class converts a set of input documents in the sequence file format to vectors. The Sequence file |
| input should have a <code>Text</code> key containing the unique document identifier and a <a href="../../../../org/apache/mahout/common/StringTuple.html" title="class in org.apache.mahout.common"><code>StringTuple</code></a> |
| value containing the tokenized document. You may use <a href="../../../../org/apache/mahout/vectorizer/DocumentProcessor.html" title="class in org.apache.mahout.vectorizer"><code>DocumentProcessor</code></a> to tokenize the document. |
| This is a dictionary based Vectorizer.</div> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- =========== FIELD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="field.summary"> |
| <!-- --> |
| </a> |
| <h3>Field Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Field Summary table, listing fields, and an explanation"> |
| <caption><span>Fields</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Field and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/vectorizer/DictionaryVectorizer.html#DEFAULT_MIN_SUPPORT">DEFAULT_MIN_SUPPORT</a></span></code> </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/vectorizer/DictionaryVectorizer.html#DICTIONARY_FILE">DICTIONARY_FILE</a></span></code> </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/vectorizer/DictionaryVectorizer.html#DOCUMENT_VECTOR_OUTPUT_FOLDER">DOCUMENT_VECTOR_OUTPUT_FOLDER</a></span></code> </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/vectorizer/DictionaryVectorizer.html#MAX_NGRAMS">MAX_NGRAMS</a></span></code> </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/vectorizer/DictionaryVectorizer.html#MIN_SUPPORT">MIN_SUPPORT</a></span></code> </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="fields.inherited.from.class.org.apache.mahout.common.AbstractJob"> |
| <!-- --> |
| </a> |
| <h3>Fields inherited from class org.apache.mahout.common.<a href="../../../../org/apache/mahout/common/AbstractJob.html" title="class in org.apache.mahout.common">AbstractJob</a></h3> |
| <code><a href="../../../../org/apache/mahout/common/AbstractJob.html#argMap">argMap</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#inputFile">inputFile</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#inputPath">inputPath</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#outputFile">outputFile</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#outputPath">outputPath</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#tempPath">tempPath</a></code></li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>static void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/vectorizer/DictionaryVectorizer.html#createTermFrequencyVectors-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.String-org.apache.hadoop.conf.Configuration-int-int-float-float-boolean-int-int-boolean-boolean-">createTermFrequencyVectors</a></span>(org.apache.hadoop.fs.Path input, |
| org.apache.hadoop.fs.Path output, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> tfVectorsFolderName, |
| org.apache.hadoop.conf.Configuration baseConf, |
| int minSupport, |
| int maxNGramSize, |
| float minLLRValue, |
| float normPower, |
| boolean logNormalize, |
| int numReducers, |
| int chunkSizeInMegabytes, |
| boolean sequentialAccess, |
| boolean namedVectors)</code> |
| <div class="block">Create Term Frequency (Tf) Vectors from the input set of documents in <code>SequenceFile</code> format.</div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/vectorizer/DictionaryVectorizer.html#createVectors-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.mahout.vectorizer.VectorizerConfig-">createVectors</a></span>(org.apache.hadoop.fs.Path input, |
| org.apache.hadoop.fs.Path output, |
| <a href="../../../../org/apache/mahout/vectorizer/VectorizerConfig.html" title="class in org.apache.mahout.vectorizer">VectorizerConfig</a> config)</code> </td> |
| </tr> |
| <tr id="i2" class="altColor"> |
| <td class="colFirst"><code>static void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/vectorizer/DictionaryVectorizer.html#main-java.lang.String:A-">main</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[] args)</code> </td> |
| </tr> |
| <tr id="i3" class="rowColor"> |
| <td class="colFirst"><code>int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/vectorizer/DictionaryVectorizer.html#run-java.lang.String:A-">run</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[] args)</code> </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.org.apache.mahout.common.AbstractJob"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class org.apache.mahout.common.<a href="../../../../org/apache/mahout/common/AbstractJob.html" title="class in org.apache.mahout.common">AbstractJob</a></h3> |
| <code><a href="../../../../org/apache/mahout/common/AbstractJob.html#addFlag-java.lang.String-java.lang.String-java.lang.String-">addFlag</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#addInputOption--">addInputOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#addOption-org.apache.commons.cli2.Option-">addOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#addOption-java.lang.String-java.lang.String-java.lang.String-">addOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#addOption-java.lang.String-java.lang.String-java.lang.String-boolean-">addOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#addOption-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">addOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#addOutputOption--">addOutputOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#buildOption-java.lang.String-java.lang.String-java.lang.String-boolean-boolean-java.lang.String-">buildOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#buildOption-java.lang.String-java.lang.String-java.lang.String-boolean-int-int-boolean-java.lang.String-">buildOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getAnalyzerClassFromOption--">getAnalyzerClassFromOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getCLIOption-java.lang.String-">getCLIOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getConf--">getConf</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getDimensions-org.apache.hadoop.fs.Path-">getDimensions</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getFloat-java.lang.String-">getFloat</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getFloat-java.lang.String-float-">getFloat</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getGroup--">getGroup</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getInputFile--">getInputFile</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getInputPath--">getInputPath</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getInt-java.lang.String-">getInt</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getInt-java.lang.String-int-">getInt</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getOption-java.util.Map-java.lang.String-">getOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getOption-java.lang.String-">getOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getOption-java.lang.String-java.lang.String-">getOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getOptions-java.lang.String-">getOptions</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getOutputFile--">getOutputFile</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getOutputPath--">getOutputPath</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getOutputPath-java.lang.String-">getOutputPath</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getTempPath--">getTempPath</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#getTempPath-java.lang.String-">getTempPath</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#hasOption-java.lang.String-">hasOption</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#keyFor-java.lang.String-">keyFor</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#maybePut-java.util.Map-org.apache.commons.cli2.CommandLine-org.apache.commons.cli2.Option...-">maybePut</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#parseArguments-java.lang.String:A-">parseArguments</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#parseArguments-java.lang.String:A-boolean-boolean-">parseArguments</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#parseDirectories-org.apache.commons.cli2.CommandLine-boolean-boolean-">parseDirectories</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-">prepareJob</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.String-">prepareJob</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-">prepareJob</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-">prepareJob</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#setConf-org.apache.hadoop.conf.Configuration-">setConf</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#setS3SafeCombinedInputPath-org.apache.hadoop.mapreduce.Job-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-">setS3SafeCombinedInputPath</a>, <a href="../../../../org/apache/mahout/common/AbstractJob.html#shouldRunNextPhase-java.util.Map-java.util.concurrent.atomic.AtomicInteger-">shouldRunNextPhase</a></code></li> |
| </ul> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.java.lang.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class java.lang.<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a></h3> |
| <code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#clone--" title="class or interface in java.lang">clone</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#equals-java.lang.Object-" title="class or interface in java.lang">equals</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#finalize--" title="class or interface in java.lang">finalize</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#getClass--" title="class or interface in java.lang">getClass</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#hashCode--" title="class or interface in java.lang">hashCode</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notify--" title="class or interface in java.lang">notify</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notifyAll--" title="class or interface in java.lang">notifyAll</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#toString--" title="class or interface in java.lang">toString</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait--" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-int-" title="class or interface in java.lang">wait</a></code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ============ FIELD DETAIL =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="field.detail"> |
| <!-- --> |
| </a> |
| <h3>Field Detail</h3> |
| <a name="DOCUMENT_VECTOR_OUTPUT_FOLDER"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>DOCUMENT_VECTOR_OUTPUT_FOLDER</h4> |
| <pre>public static final <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> DOCUMENT_VECTOR_OUTPUT_FOLDER</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.mahout.vectorizer.DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="MIN_SUPPORT"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>MIN_SUPPORT</h4> |
| <pre>public static final <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> MIN_SUPPORT</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.mahout.vectorizer.DictionaryVectorizer.MIN_SUPPORT">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="MAX_NGRAMS"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>MAX_NGRAMS</h4> |
| <pre>public static final <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> MAX_NGRAMS</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.mahout.vectorizer.DictionaryVectorizer.MAX_NGRAMS">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="DEFAULT_MIN_SUPPORT"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>DEFAULT_MIN_SUPPORT</h4> |
| <pre>public static final int DEFAULT_MIN_SUPPORT</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.mahout.vectorizer.DictionaryVectorizer.DEFAULT_MIN_SUPPORT">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="DICTIONARY_FILE"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>DICTIONARY_FILE</h4> |
| <pre>public static final <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> DICTIONARY_FILE</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.mahout.vectorizer.DictionaryVectorizer.DICTIONARY_FILE">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="createVectors-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.mahout.vectorizer.VectorizerConfig-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>createVectors</h4> |
| <pre>public void createVectors(org.apache.hadoop.fs.Path input, |
| org.apache.hadoop.fs.Path output, |
| <a href="../../../../org/apache/mahout/vectorizer/VectorizerConfig.html" title="class in org.apache.mahout.vectorizer">VectorizerConfig</a> config) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></pre> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Specified by:</span></dt> |
| <dd><code><a href="../../../../org/apache/mahout/vectorizer/Vectorizer.html#createVectors-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.mahout.vectorizer.VectorizerConfig-">createVectors</a></code> in interface <code><a href="../../../../org/apache/mahout/vectorizer/Vectorizer.html" title="interface in org.apache.mahout.vectorizer">Vectorizer</a></code></dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="createTermFrequencyVectors-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.String-org.apache.hadoop.conf.Configuration-int-int-float-float-boolean-int-int-boolean-boolean-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>createTermFrequencyVectors</h4> |
| <pre>public static void createTermFrequencyVectors(org.apache.hadoop.fs.Path input, |
| org.apache.hadoop.fs.Path output, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> tfVectorsFolderName, |
| org.apache.hadoop.conf.Configuration baseConf, |
| int minSupport, |
| int maxNGramSize, |
| float minLLRValue, |
| float normPower, |
| boolean logNormalize, |
| int numReducers, |
| int chunkSizeInMegabytes, |
| boolean sequentialAccess, |
| boolean namedVectors) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></pre> |
| <div class="block">Create Term Frequency (Tf) Vectors from the input set of documents in <code>SequenceFile</code> format. This |
| tries to fix the maximum memory used by the feature chunk per node thereby splitting the process across |
| multiple map/reduces.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>input</code> - input directory of the documents in <code>SequenceFile</code> format</dd> |
| <dd><code>output</code> - output directory where <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/math/RandomAccessSparseVector.html?is-external=true" title="class or interface in org.apache.mahout.math"><code>RandomAccessSparseVector</code></a>'s of the document |
| are generated</dd> |
| <dd><code>tfVectorsFolderName</code> - The name of the folder in which the final output vectors will be stored</dd> |
| <dd><code>baseConf</code> - job configuration</dd> |
| <dd><code>minSupport</code> - the minimum frequency of the feature in the entire corpus to be considered for inclusion in the |
| sparse vector</dd> |
| <dd><code>maxNGramSize</code> - 1 = unigram, 2 = unigram and bigram, 3 = unigram, bigram and trigram</dd> |
| <dd><code>minLLRValue</code> - minValue of log likelihood ratio to used to prune ngrams</dd> |
| <dd><code>normPower</code> - L_p norm to be computed</dd> |
| <dd><code>logNormalize</code> - whether to use log normalization</dd> |
| <dd><code>numReducers</code> - </dd> |
| <dd><code>chunkSizeInMegabytes</code> - the size in MB of the feature => id chunk to be kept in memory at each node during Map/Reduce |
| stage. Its recommended you calculated this based on the number of cores and the free memory |
| available to you per node. Say, you have 2 cores and around 1GB extra memory to spare we |
| recommend you use a split size of around 400-500MB so that two simultaneous reducers can create |
| partial vectors without thrashing the system due to increased swapping</dd> |
| <dd><code>sequentialAccess</code> - </dd> |
| <dd><code>namedVectors</code> - </dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="run-java.lang.String:A-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>run</h4> |
| <pre>public int run(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[] args) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></pre> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Specified by:</span></dt> |
| <dd><code>run</code> in interface <code>org.apache.hadoop.util.Tool</code></dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="main-java.lang.String:A-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>main</h4> |
| <pre>public static void main(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[] args) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></pre> |
| <dl> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/DictionaryVectorizer.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li>Prev Class</li> |
| <li><a href="../../../../org/apache/mahout/vectorizer/DocumentProcessor.html" title="class in org.apache.mahout.vectorizer"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/mahout/vectorizer/DictionaryVectorizer.html" target="_top">Frames</a></li> |
| <li><a href="DictionaryVectorizer.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li><a href="#field.summary">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li><a href="#field.detail">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| <p class="legalCopy"><small>Copyright © 2008–2017 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</small></p> |
| </body> |
| </html> |