| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_121) on Fri Apr 14 22:11:37 PDT 2017 --> |
| <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
| <title>TextValueEncoder (Mahout Map-Reduce 0.13.0 API)</title> |
| <meta name="date" content="2017-04-14"> |
| <link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="TextValueEncoder (Mahout Map-Reduce 0.13.0 API)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10,"i8":10}; |
| var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/TextValueEncoder.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../org/apache/mahout/vectorizer/encoders/StaticWordValueEncoder.html" title="class in org.apache.mahout.vectorizer.encoders"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../org/apache/mahout/vectorizer/encoders/WordValueEncoder.html" title="class in org.apache.mahout.vectorizer.encoders"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../index.html?org/apache/mahout/vectorizer/encoders/TextValueEncoder.html" target="_top">Frames</a></li> |
| <li><a href="TextValueEncoder.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li><a href="#fields.inherited.from.class.org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder">Field</a> | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.mahout.vectorizer.encoders</div> |
| <h2 title="Class TextValueEncoder" class="title">Class TextValueEncoder</h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">java.lang.Object</a></li> |
| <li> |
| <ul class="inheritance"> |
| <li><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder</a></li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.mahout.vectorizer.encoders.TextValueEncoder</li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <dl> |
| <dt>Direct Known Subclasses:</dt> |
| <dd><a href="../../../../../org/apache/mahout/vectorizer/encoders/CachingTextValueEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">CachingTextValueEncoder</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">LuceneTextValueEncoder</a></dd> |
| </dl> |
| <hr> |
| <br> |
| <pre>public class <span class="typeNameLabel">TextValueEncoder</span> |
| extends <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">FeatureVectorEncoder</a></pre> |
| <div class="block">Encodes text that is tokenized on non-alphanum separators. Each word is encoded using a |
| settable encoder which is by default an StaticWordValueEncoder which gives all |
| words the same weight.</div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../../org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.html" title="class in org.apache.mahout.vectorizer.encoders"><code>LuceneTextValueEncoder</code></a></dd> |
| </dl> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- =========== FIELD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="field.summary"> |
| <!-- --> |
| </a> |
| <h3>Field Summary</h3> |
| <ul class="blockList"> |
| <li class="blockList"><a name="fields.inherited.from.class.org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder"> |
| <!-- --> |
| </a> |
| <h3>Fields inherited from class org.apache.mahout.vectorizer.encoders.<a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">FeatureVectorEncoder</a></h3> |
| <code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#CONTINUOUS_VALUE_HASH_SEED">CONTINUOUS_VALUE_HASH_SEED</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#WORD_LIKE_VALUE_HASH_SEED">WORD_LIKE_VALUE_HASH_SEED</a></code></li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ======== CONSTRUCTOR SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.summary"> |
| <!-- --> |
| </a> |
| <h3>Constructor Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation"> |
| <caption><span>Constructors</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colOne" scope="col">Constructor and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/encoders/TextValueEncoder.html#TextValueEncoder-java.lang.String-">TextValueEncoder</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> name)</code> </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/encoders/TextValueEncoder.html#addText-byte:A-">addText</a></span>(byte[] originalForm)</code> |
| <div class="block">Adds text to the internal word counter, but delays converting it to vector |
| form until flush is called.</div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/encoders/TextValueEncoder.html#addText-java.lang.CharSequence-">addText</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/CharSequence.html?is-external=true" title="class or interface in java.lang">CharSequence</a> text)</code> |
| <div class="block">Adds text to the internal word counter, but delays converting it to vector |
| form until flush is called.</div> |
| </td> |
| </tr> |
| <tr id="i2" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/encoders/TextValueEncoder.html#addToVector-byte:A-double-org.apache.mahout.math.Vector-">addToVector</a></span>(byte[] originalForm, |
| double weight, |
| <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/math/Vector.html?is-external=true" title="class or interface in org.apache.mahout.math">Vector</a> data)</code> |
| <div class="block">Adds a value to a vector after tokenizing it by splitting on non-alphanum characters.</div> |
| </td> |
| </tr> |
| <tr id="i3" class="rowColor"> |
| <td class="colFirst"><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/encoders/TextValueEncoder.html#asString-java.lang.String-">asString</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> originalForm)</code> |
| <div class="block">Converts a value into a form that would help a human understand the internals of how the value |
| is being interpreted.</div> |
| </td> |
| </tr> |
| <tr id="i4" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/encoders/TextValueEncoder.html#flush-double-org.apache.mahout.math.Vector-">flush</a></span>(double weight, |
| <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/math/Vector.html?is-external=true" title="class or interface in org.apache.mahout.math">Vector</a> data)</code> |
| <div class="block">Adds all of the tokens that we counted up to a vector.</div> |
| </td> |
| </tr> |
| <tr id="i5" class="rowColor"> |
| <td class="colFirst"><code>protected <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Iterable.html?is-external=true" title="class or interface in java.lang">Iterable</a><<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Integer.html?is-external=true" title="class or interface in java.lang">Integer</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/encoders/TextValueEncoder.html#hashesForProbe-byte:A-int-java.lang.String-int-">hashesForProbe</a></span>(byte[] originalForm, |
| int dataSize, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> name, |
| int probe)</code> |
| <div class="block">Returns all of the hashes for this probe.</div> |
| </td> |
| </tr> |
| <tr id="i6" class="altColor"> |
| <td class="colFirst"><code>protected int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/encoders/TextValueEncoder.html#hashForProbe-byte:A-int-java.lang.String-int-">hashForProbe</a></span>(byte[] originalForm, |
| int dataSize, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> name, |
| int probe)</code> |
| <div class="block">Provides the unique hash for a particular probe.</div> |
| </td> |
| </tr> |
| <tr id="i7" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/encoders/TextValueEncoder.html#setWordEncoder-org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder-">setWordEncoder</a></span>(<a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">FeatureVectorEncoder</a> wordEncoder)</code> </td> |
| </tr> |
| <tr id="i8" class="altColor"> |
| <td class="colFirst"><code>protected <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Iterable.html?is-external=true" title="class or interface in java.lang">Iterable</a><<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/mahout/vectorizer/encoders/TextValueEncoder.html#tokenize-java.lang.CharSequence-">tokenize</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/CharSequence.html?is-external=true" title="class or interface in java.lang">CharSequence</a> originalForm)</code> |
| <div class="block">Tokenizes a string using the simplest method.</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class org.apache.mahout.vectorizer.encoders.<a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">FeatureVectorEncoder</a></h3> |
| <code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#addToVector-byte:A-org.apache.mahout.math.Vector-">addToVector</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#addToVector-java.lang.String-double-org.apache.mahout.math.Vector-">addToVector</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#addToVector-java.lang.String-org.apache.mahout.math.Vector-">addToVector</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#bytesForString-java.lang.String-">bytesForString</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#getName--">getName</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#getProbes--">getProbes</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#getWeight-byte:A-double-">getWeight</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#hash-byte:A-byte:A-int-int-">hash</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#hash-byte:A-int-int-">hash</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#hash-java.lang.String-int-int-">hash</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#hash-java.lang.String-java.lang.String-int-int-">hash</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#hash-java.lang.String-java.lang.String-java.lang.String-java.lang.String-int-int-">hash</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#isTraceEnabled--">isTraceEnabled</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#setProbes-int-">setProbes</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#setTraceDictionary-java.util.Map-">setTraceDictionary</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#trace-byte:A-int-">trace</a>, <a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#trace-java.lang.String-int-">trace</a></code></li> |
| </ul> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.java.lang.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class java.lang.<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a></h3> |
| <code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#clone--" title="class or interface in java.lang">clone</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#equals-java.lang.Object-" title="class or interface in java.lang">equals</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#finalize--" title="class or interface in java.lang">finalize</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#getClass--" title="class or interface in java.lang">getClass</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#hashCode--" title="class or interface in java.lang">hashCode</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notify--" title="class or interface in java.lang">notify</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notifyAll--" title="class or interface in java.lang">notifyAll</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#toString--" title="class or interface in java.lang">toString</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait--" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-int-" title="class or interface in java.lang">wait</a></code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ========= CONSTRUCTOR DETAIL ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.detail"> |
| <!-- --> |
| </a> |
| <h3>Constructor Detail</h3> |
| <a name="TextValueEncoder-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>TextValueEncoder</h4> |
| <pre>public TextValueEncoder(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> name)</pre> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="addToVector-byte:A-double-org.apache.mahout.math.Vector-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>addToVector</h4> |
| <pre>public void addToVector(byte[] originalForm, |
| double weight, |
| <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/math/Vector.html?is-external=true" title="class or interface in org.apache.mahout.math">Vector</a> data)</pre> |
| <div class="block">Adds a value to a vector after tokenizing it by splitting on non-alphanum characters.</div> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Specified by:</span></dt> |
| <dd><code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#addToVector-byte:A-double-org.apache.mahout.math.Vector-">addToVector</a></code> in class <code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">FeatureVectorEncoder</a></code></dd> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>originalForm</code> - The original form of the value as a string.</dd> |
| <dd><code>data</code> - The vector to which the value should be added.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="addText-byte:A-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>addText</h4> |
| <pre>public void addText(byte[] originalForm)</pre> |
| <div class="block">Adds text to the internal word counter, but delays converting it to vector |
| form until flush is called.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>originalForm</code> - The original text encoded as UTF-8</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="addText-java.lang.CharSequence-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>addText</h4> |
| <pre>public void addText(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/CharSequence.html?is-external=true" title="class or interface in java.lang">CharSequence</a> text)</pre> |
| <div class="block">Adds text to the internal word counter, but delays converting it to vector |
| form until flush is called.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>text</code> - The original text encoded as UTF-8</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="flush-double-org.apache.mahout.math.Vector-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>flush</h4> |
| <pre>public void flush(double weight, |
| <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/math/Vector.html?is-external=true" title="class or interface in org.apache.mahout.math">Vector</a> data)</pre> |
| <div class="block">Adds all of the tokens that we counted up to a vector.</div> |
| </li> |
| </ul> |
| <a name="hashForProbe-byte:A-int-java.lang.String-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>hashForProbe</h4> |
| <pre>protected int hashForProbe(byte[] originalForm, |
| int dataSize, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> name, |
| int probe)</pre> |
| <div class="block"><span class="descfrmTypeLabel">Description copied from class: <code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#hashForProbe-byte:A-int-java.lang.String-int-">FeatureVectorEncoder</a></code></span></div> |
| <div class="block">Provides the unique hash for a particular probe. For all encoders except text, this |
| is all that is needed and the default implementation of hashesForProbe will do the right |
| thing. For text and similar values, hashesForProbe should be over-ridden and this method |
| should not be used.</div> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Specified by:</span></dt> |
| <dd><code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#hashForProbe-byte:A-int-java.lang.String-int-">hashForProbe</a></code> in class <code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">FeatureVectorEncoder</a></code></dd> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>originalForm</code> - The original byte array value</dd> |
| <dd><code>dataSize</code> - The length of the vector being encoded</dd> |
| <dd><code>name</code> - The name of the variable being encoded</dd> |
| <dd><code>probe</code> - The probe number</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>The hash of the current probe</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="hashesForProbe-byte:A-int-java.lang.String-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>hashesForProbe</h4> |
| <pre>protected <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Iterable.html?is-external=true" title="class or interface in java.lang">Iterable</a><<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Integer.html?is-external=true" title="class or interface in java.lang">Integer</a>> hashesForProbe(byte[] originalForm, |
| int dataSize, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> name, |
| int probe)</pre> |
| <div class="block"><span class="descfrmTypeLabel">Description copied from class: <code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#hashesForProbe-byte:A-int-java.lang.String-int-">FeatureVectorEncoder</a></code></span></div> |
| <div class="block">Returns all of the hashes for this probe. For most encoders, this is a singleton, but |
| for text, many hashes are returned, one for each word (unique or not). Most implementations |
| should only implement hashForProbe for simplicity.</div> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Overrides:</span></dt> |
| <dd><code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#hashesForProbe-byte:A-int-java.lang.String-int-">hashesForProbe</a></code> in class <code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">FeatureVectorEncoder</a></code></dd> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>originalForm</code> - The original byte array value.</dd> |
| <dd><code>dataSize</code> - The length of the vector being encoded</dd> |
| <dd><code>name</code> - The name of the variable being encoded</dd> |
| <dd><code>probe</code> - The probe number</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>an Iterable of the hashes</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="tokenize-java.lang.CharSequence-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>tokenize</h4> |
| <pre>protected <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Iterable.html?is-external=true" title="class or interface in java.lang">Iterable</a><<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>> tokenize(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/CharSequence.html?is-external=true" title="class or interface in java.lang">CharSequence</a> originalForm)</pre> |
| <div class="block">Tokenizes a string using the simplest method. This should be over-ridden for more subtle |
| tokenization.</div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../../org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.html" title="class in org.apache.mahout.vectorizer.encoders"><code>LuceneTextValueEncoder</code></a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="asString-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>asString</h4> |
| <pre>public <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> asString(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a> originalForm)</pre> |
| <div class="block">Converts a value into a form that would help a human understand the internals of how the value |
| is being interpreted. For text-like things, this is likely to be a list of the terms found with |
| associated weights (if any).</div> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Specified by:</span></dt> |
| <dd><code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html#asString-java.lang.String-">asString</a></code> in class <code><a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">FeatureVectorEncoder</a></code></dd> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>originalForm</code> - The original form of the value as a string.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>A string that a human can read.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="setWordEncoder-org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>setWordEncoder</h4> |
| <pre>public final void setWordEncoder(<a href="../../../../../org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.html" title="class in org.apache.mahout.vectorizer.encoders">FeatureVectorEncoder</a> wordEncoder)</pre> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/TextValueEncoder.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../org/apache/mahout/vectorizer/encoders/StaticWordValueEncoder.html" title="class in org.apache.mahout.vectorizer.encoders"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../org/apache/mahout/vectorizer/encoders/WordValueEncoder.html" title="class in org.apache.mahout.vectorizer.encoders"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../index.html?org/apache/mahout/vectorizer/encoders/TextValueEncoder.html" target="_top">Frames</a></li> |
| <li><a href="TextValueEncoder.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li><a href="#fields.inherited.from.class.org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder">Field</a> | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| <p class="legalCopy"><small>Copyright © 2008–2017 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</small></p> |
| </body> |
| </html> |