blob: 2607893e63e49362e9a0b4ff99bd8e89a13a68fa [file] [log] [blame]
<html xmlns:MSHelp="http://msdn.microsoft.com/mshelp" xmlns:mshelp="http://msdn.microsoft.com/mshelp" xmlns:ddue="http://ddue.schemas.microsoft.com/authoring/2003/5" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:msxsl="urn:schemas-microsoft-com:xslt"><head><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8" /><META NAME="save" CONTENT="history" /><title>Token Class</title><meta name="Language" content="en-us" /><meta name="System.Keywords" content="Token class" /><meta name="System.Keywords" content="Lucene.Net.Analysis.Token class" /><meta name="System.Keywords" content="Token class, about Token class" /><meta name="Microsoft.Help.F1" content="Lucene.Net.Analysis.Token" /><meta name="Microsoft.Help.Id" content="T:Lucene.Net.Analysis.Token" /><meta name="Description" content="A Token is an occurrence of a term from the text of a field. It consists of a term's text, the start and end offset of the term in the text of the field, and a type string." /><meta name="Microsoft.Help.ContentType" content="Reference" /><link rel="stylesheet" type="text/css" href="../styles/Presentation.css" /><link rel="stylesheet" type="text/css" href="ms-help://Hx/HxRuntime/HxLink.css" /><script type="text/javascript" src="../scripts/EventUtilities.js"> </script><script type="text/javascript" src="../scripts/SplitScreen.js"> </script><script type="text/javascript" src="../scripts/Dropdown.js"> </script><script type="text/javascript" src="../scripts/script_manifold.js"> </script><script type="text/javascript" src="../scripts/script_feedBack.js"> </script><script type="text/javascript" src="../scripts/CheckboxMenu.js"> </script><script type="text/javascript" src="../scripts/CommonUtilities.js"> </script><meta name="container" content="Lucene.Net.Analysis" /><meta name="file" content="443b195e-d4ba-187d-6b5d-8af7f0554135" /><meta name="guid" content="443b195e-d4ba-187d-6b5d-8af7f0554135" /><xml><MSHelp:Attr Name="AssetID" Value="T:Lucene.Net.Analysis.Token" /><MSHelp:TOCTitle Title="Token Class" /><MSHelp:RLTitle Title="Token Class (Lucene.Net.Analysis)" /><MSHelp:Keyword Index="A" Term="T:Lucene.Net.Analysis.Token" /><MSHelp:Keyword Index="A" Term="frlrfLuceneNetAnalysisTokenClassTopic" /><MSHelp:Keyword Index="K" Term="Token class" /><MSHelp:Keyword Index="K" Term="Lucene.Net.Analysis.Token class" /><MSHelp:Keyword Index="K" Term="Token class, about Token class" /><MSHelp:Keyword Index="F" Term="Lucene.Net.Analysis.Token" /><MSHelp:Attr Name="HelpPriority" Value="2" /><MSHelp:Attr Name="APIType" Value="Managed" /><MSHelp:Attr Name="APILocation" Value="Lucene.Net.dll" /><MSHelp:Attr Name="APIName" Value="Lucene.Net.Analysis.Token" /><MSHelp:Attr Name="DevLang" Value="CSharp" /><MSHelp:Attr Name="DevLang" Value="VB" /><MSHelp:Attr Name="Locale" Value="en-us" /><MSHelp:Attr Name="TopicType" Value="kbSyntax" /><MSHelp:Attr Name="TopicType" Value="apiref" /><MSHelp:Attr Name="Abstract" Value="A Token is an occurrence of a term from the text of a field. It consists of a term's text, the start and end offset of the term in the text of the field, and a type string. The start and end offse ..." /><MSHelp:Attr Name="AssemblyVersion" Value="2.9.4.1" /></xml><link type="text/css" rel="stylesheet" href="../styles/highlight.css" /><script type="text/javascript" src="../scripts/highlight.js"> </script></head><body><input type="hidden" id="userDataCache" class="userDataStyle" /><input type="hidden" id="hiddenScrollOffset" /><img id="collapseImage" style="display:none; height:0; width:0;" src="../icons/collapse_all.gif" alt="Collapse image" title="Collapse image" /><img id="expandImage" style="display:none; height:0; width:0;" src="../icons/expand_all.gif" alt="Expand Image" title="Expand Image" /><img id="collapseAllImage" style="display:none; height:0; width:0;" src="../icons/collapse_all.gif" /><img id="expandAllImage" style="display:none; height:0; width:0;" src="../icons/expand_all.gif" /><img id="dropDownImage" style="display:none; height:0; width:0;" src="../icons/dropdown.gif" /><img id="dropDownHoverImage" style="display:none; height:0; width:0;" src="../icons/dropdownHover.gif" /><img id="copyImage" style="display:none; height:0; width:0;" src="../icons/copycode.gif" alt="Copy image" title="Copy image" /><img id="copyHoverImage" style="display:none; height:0; width:0;" src="../icons/copycodeHighlight.gif" alt="CopyHover image" title="CopyHover image" /><div id="header"><table id="topTable" cellspacing="0" cellpadding="0"><tr><td><span onclick="ExpandCollapseAll(toggleAllImage)" style="cursor:default;" onkeypress="ExpandCollapseAll_CheckKey(toggleAllImage, event)" tabindex="0"><img ID="toggleAllImage" class="toggleAll" src="../icons/collapse_all.gif" /> <label id="collapseAllLabel" for="toggleAllImage" style="display: none;">Collapse All</label><label id="expandAllLabel" for="toggleAllImage" style="display: none;">Expand All</label> </span><span>    </span><span id="devlangsDropdown" class="filter" tabindex="0"><img id="devlangsDropdownImage" src="../icons/dropdown.gif" /> <label id="devlangsMenuAllLabel" for="devlangsDropdownImage" style="display: none;"><nobr>Code: All </nobr></label><label id="devlangsMenuMultipleLabel" for="devlangsDropdownImage" style="display: none;"><nobr>Code: Multiple </nobr></label><label id="devlangsMenuCSharpLabel" for="devlangsDropdownImage" style="display: none;"><nobr>Code: C# </nobr></label><label id="devlangsMenuVisualBasicLabel" for="devlangsDropdownImage" style="display: none;"><nobr>Code: Visual Basic </nobr></label><label id="devlangsMenuManagedCPlusPlusLabel" for="devlangsDropdownImage" style="display: none;"><nobr>Code: Visual C++ </nobr></label></span></td></tr></table><div id="devlangsMenu"><input id="CSharpCheckbox" type="checkbox" data="CSharp,cs,'persist'" value="on" onClick="SetLanguage(this)" /><label class="checkboxLabel" for="CSharpCheckbox">C#</label><br /><input id="VisualBasicCheckbox" type="checkbox" data="VisualBasic,vb,'persist'" value="on" onClick="SetLanguage(this)" /><label class="checkboxLabel" for="VisualBasicCheckbox">Visual Basic</label><br /><input id="ManagedCPlusPlusCheckbox" type="checkbox" data="ManagedCPlusPlus,cpp,'persist'" value="on" onClick="SetLanguage(this)" /><label class="checkboxLabel" for="ManagedCPlusPlusCheckbox">Visual C++</label><br /></div><table id="bottomTable" cellpadding="0" cellspacing="0"><tr id="headerTableRow1"><td align="left"><span id="runningHeaderText">Lucene.Net Class Libraries</span></td></tr><tr id="headerTableRow2"><td align="left"><span id="nsrTitle">Token Class</span></td></tr><tr id="headerTableRow3"><td align="left"><a href="4c5b4c37-d072-000c-5ae0-07b3594658ff.htm">Members</a> <a href="#seeAlsoToggle" onclick="OpenSection(seeAlsoToggle)">See Also</a> <span id="headfeedbackarea" class="feedbackhead"><a href="javascript:SubmitFeedback('','Lucene.Net Class Libraries','','','','%0\dYour%20feedback%20is%20used%20to%20improve%20the%20documentation%20and%20the%20product.%20Your%20e-mail%20address%20will%20not%20be%20used%20for%20any%20other%20purpose%20and%20is%20disposed%20of%20after%20the%20issue%20you%20report%20is%20resolved.%20%20While%20working%20to%20resolve%20the%20issue%20that%20you%20report,%20you%20may%20be%20contacted%20via%20e-mail%20to%20get%20further%20details%20or%20clarification%20on%20the%20feedback%20you%20sent.%20After%20the%20issue%20you%20report%20has%20been%20addressed,%20you%20may%20receive%20an%20e-mail%20to%20let%20you%20know%20that%20your%20feedback%20has%20been%20addressed.%0\A%0\d','Customer%20Feedback');">Send Feedback</a></span></td></tr></table><table id="gradientTable"><tr><td class="nsrBottom" background="../icons/gradient.gif" /></tr></table></div><div id="mainSection"><div id="mainBody"><div id="allHistory" class="saveHistory" onsave="saveAll()" onload="loadAll()" /><span style="color: DarkGray"></span><div class="summary">A Token is an occurrence of a term from the text of a field. It consists of
a term's text, the start and end offset of the term in the text of the field,
and a type string.
<p />
The start and end offsets permit applications to re-associate a token with
its source text, e.g., to display highlighted query terms in a document
browser, or to show matching text fragments in a <abbr title="KeyWord In Context">KWIC</abbr> display, etc.
<p />
The type is a string, assigned by a lexical analyzer
(a.k.a. tokenizer), naming the lexical or syntactic class that the token
belongs to. For example an end of sentence marker token might be implemented
with type "eos". The default token type is "word".
<p />
A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
length byte array. Use {@link TermPositions#GetPayloadLength()} and
{@link TermPositions#GetPayload(byte[], int)} to retrieve the payloads from the index.
</div><div class="summary"><br /><br /></div><div class="summary"><p /><b>NOTE:</b> As of 2.9, Token implements all {@link Attribute} interfaces
that are part of core Lucene and can be found in the {@code tokenattributes} subpackage.
Even though it is not necessary to use Token anymore, with the new TokenStream API it can
be used as convenience class that implements all {@link Attribute}s, which is especially useful
to easily switch from the old to the new TokenStream API.
</div><div class="summary"><br /><br /><p /><b>NOTE:</b> As of 2.3, Token stores the term text
internally as a malleable char[] termBuffer instead of
String termText. The indexing code and core tokenizers
have been changed to re-use a single Token instance, changing
its buffer and other fields in-place as the Token is
processed. This provides substantially better indexing
performance as it saves the GC cost of new'ing a Token and
String for every term. The APIs that accept String
termText are still available but a warning about the
associated performance cost has been added (below). The
{@link #TermText()} method has been deprecated.<p /></div><div class="summary"><p />Tokenizers and TokenFilters should try to re-use a Token instance when
possible for best performance, by implementing the
{@link TokenStream#IncrementToken()} API.
Failing that, to create a new Token you should first use
one of the constructors that starts with null text. To load
the token from a char[] use {@link #SetTermBuffer(char[], int, int)}.
To load from a String use {@link #SetTermBuffer(String)} or {@link #SetTermBuffer(String, int, int)}.
Alternatively you can get the Token's termBuffer by calling either {@link #TermBuffer()},
if you know that your text is shorter than the capacity of the termBuffer
or {@link #ResizeTermBuffer(int)}, if there is any possibility
that you may need to grow the buffer. Fill in the characters of your term into this
buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string,
or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #SetTermLength(int)} to
set the length of the term text. See <a target="_top" href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
for details.<p /><p />Typical Token reuse patterns:
<ul><li> Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not
specified):<br /><pre>
return reusableToken.reinit(string, startOffset, endOffset[, type]);
</pre></li><li> Copying some text from a string (type is reset to {@link #DEFAULT_TYPE}
if not specified):<br /><pre>
return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
</pre></li><li> Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE}
if not specified):<br /><pre>
return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
</pre></li><li> Copying some text from a char[] buffer (type is reset to
{@link #DEFAULT_TYPE} if not specified):<br /><pre>
return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
</pre></li><li> Copying from one one Token to another (type is reset to
{@link #DEFAULT_TYPE} if not specified):<br /><pre>
return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
</pre></li></ul>
A few things to note:
<ul><li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li><li>Because <span id="cbc_1" x-lang="CSharp" codeLanguage="CSharp"><div class="highlight-title"><span class="highlight-copycode" onkeypress="javascript:CopyColorizedCodeCheckKey(this.parentNode, event);" tabindex="0" onmouseover="CopyCodeChangeIcon(this)" onmouseout="CopyCodeChangeIcon(this)" onclick="javascript:CopyColorizedCode(this.parentNode);"><img style="margin-right: 5px;" src="../icons/CopyCode.gif" />Copy</span>C#</div><div class="code"><pre xml:space="preserve">TokenStreams</pre></div></span> can be chained, one cannot assume that the <span id="cbc_2" x-lang="CSharp" codeLanguage="CSharp"><div class="highlight-title"><span class="highlight-copycode" onkeypress="javascript:CopyColorizedCodeCheckKey(this.parentNode, event);" tabindex="0" onmouseover="CopyCodeChangeIcon(this)" onmouseout="CopyCodeChangeIcon(this)" onclick="javascript:CopyColorizedCode(this.parentNode);"><img style="margin-right: 5px;" src="../icons/CopyCode.gif" />Copy</span>C#</div><div class="code"><pre xml:space="preserve">Token's</pre></div></span> current type is correct.</li><li>The startOffset and endOffset represent the start and offset in the
source text, so be careful in adjusting them.</li><li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li></ul><p /></div><p /><b>Namespace:</b> <a href="b449a860-f0af-3961-ccd7-4e000f2f2811.htm">Lucene.Net.Analysis</a><br /><b>Assembly:</b> <span sdata="assembly">Lucene.Net</span> (in Lucene.Net.dll) Version: 2.9.4.1<h1 class="heading"><span onclick="ExpandCollapse(syntaxToggle)" style="cursor:default;" onkeypress="ExpandCollapse_CheckKey(syntaxToggle, event)" tabindex="0"><img id="syntaxToggle" class="toggle" name="toggleSwitch" src="../icons/collapse_all.gif" />Syntax</span></h1><div id="syntaxSection" class="section" name="collapseableSection" style=""><div id="syntaxCodeBlocks" class="code"><span codeLanguage="CSharp"><table><tr><th>C#</th></tr><tr><td><pre xml:space="preserve">[<a href="http://msdn2.microsoft.com/en-us/library/bcfsa90a" target="_blank">SerializableAttribute</a>]
<span class="keyword">public</span> <span class="keyword">class</span> <span class="identifier">Token</span> : <a href="5be1c034-4241-eedc-bb26-f61f8eaa18f7.htm">AttributeImpl</a>, <a href="http://msdn2.microsoft.com/en-us/library/hdf3zaf2" target="_blank">ICloneable</a>,
<a href="1cc64e78-a135-673a-693f-a4c394e1e0a7.htm">TermAttribute</a>, <a href="4d94dfde-27cb-82cd-01f0-6a861b7b261c.htm">TypeAttribute</a>, <a href="61267fe9-198d-ed16-26ae-32a056f003a9.htm">PositionIncrementAttribute</a>, <a href="c1053f49-09ee-22ff-9c40-89a9639b982b.htm">FlagsAttribute</a>, <a href="98427d3a-a6ec-d524-8377-9247caf85fd3.htm">OffsetAttribute</a>,
<a href="1d0e481a-2eb3-39f2-3a28-4f32c180c76b.htm">PayloadAttribute</a>, <a href="57230ee6-d042-f90f-169e-d598df2cc692.htm">Attribute</a></pre></td></tr></table></span><span codeLanguage="VisualBasicDeclaration"><table><tr><th>Visual Basic</th></tr><tr><td><pre xml:space="preserve">&lt;<a href="http://msdn2.microsoft.com/en-us/library/bcfsa90a" target="_blank">SerializableAttribute</a>&gt; _
<span class="keyword">Public</span> <span class="keyword">Class</span> <span class="identifier">Token</span> _
<span class="keyword">Inherits</span> <a href="5be1c034-4241-eedc-bb26-f61f8eaa18f7.htm">AttributeImpl</a> _
<span class="keyword">Implements</span> <a href="http://msdn2.microsoft.com/en-us/library/hdf3zaf2" target="_blank">ICloneable</a>, <a href="1cc64e78-a135-673a-693f-a4c394e1e0a7.htm">TermAttribute</a>, <a href="4d94dfde-27cb-82cd-01f0-6a861b7b261c.htm">TypeAttribute</a>, <a href="61267fe9-198d-ed16-26ae-32a056f003a9.htm">PositionIncrementAttribute</a>, _
<a href="c1053f49-09ee-22ff-9c40-89a9639b982b.htm">FlagsAttribute</a>, <a href="98427d3a-a6ec-d524-8377-9247caf85fd3.htm">OffsetAttribute</a>, <a href="1d0e481a-2eb3-39f2-3a28-4f32c180c76b.htm">PayloadAttribute</a>, <a href="57230ee6-d042-f90f-169e-d598df2cc692.htm">Attribute</a></pre></td></tr></table></span><span codeLanguage="ManagedCPlusPlus"><table><tr><th>Visual C++</th></tr><tr><td><pre xml:space="preserve">[<a href="http://msdn2.microsoft.com/en-us/library/bcfsa90a" target="_blank">SerializableAttribute</a>]
<span class="keyword">public</span> <span class="keyword">ref class</span> <span class="identifier">Token</span> : <span class="keyword">public</span> <a href="5be1c034-4241-eedc-bb26-f61f8eaa18f7.htm">AttributeImpl</a>,
<a href="http://msdn2.microsoft.com/en-us/library/hdf3zaf2" target="_blank">ICloneable</a>, <a href="1cc64e78-a135-673a-693f-a4c394e1e0a7.htm">TermAttribute</a>, <a href="4d94dfde-27cb-82cd-01f0-6a861b7b261c.htm">TypeAttribute</a>, <a href="61267fe9-198d-ed16-26ae-32a056f003a9.htm">PositionIncrementAttribute</a>, <a href="c1053f49-09ee-22ff-9c40-89a9639b982b.htm">FlagsAttribute</a>,
<a href="98427d3a-a6ec-d524-8377-9247caf85fd3.htm">OffsetAttribute</a>, <a href="1d0e481a-2eb3-39f2-3a28-4f32c180c76b.htm">PayloadAttribute</a>, <a href="57230ee6-d042-f90f-169e-d598df2cc692.htm">Attribute</a></pre></td></tr></table></span></div></div><h1 class="heading"><span onclick="ExpandCollapse(familyToggle)" style="cursor:default;" onkeypress="ExpandCollapse_CheckKey(familyToggle, event)" tabindex="0"><img id="familyToggle" class="toggle" name="toggleSwitch" src="../icons/collapse_all.gif" />Inheritance Hierarchy</span></h1><div id="familySection" class="section" name="collapseableSection" style=""><a href="http://msdn2.microsoft.com/en-us/library/e5kfa45b" target="_blank">System<span class="languageSpecificText"><span class="cs">.</span><span class="vb">.</span><span class="cpp">::</span><span class="nu">.</span><span class="fs">.</span></span>Object</a><br />  <a href="5be1c034-4241-eedc-bb26-f61f8eaa18f7.htm">Lucene.Net.Util<span class="languageSpecificText"><span class="cs">.</span><span class="vb">.</span><span class="cpp">::</span><span class="nu">.</span><span class="fs">.</span></span>AttributeImpl</a><br />    <span class="selflink">Lucene.Net.Analysis<span class="languageSpecificText"><span class="cs">.</span><span class="vb">.</span><span class="cpp">::</span><span class="nu">.</span><span class="fs">.</span></span>Token</span><br /></div><h1 class="heading"><span onclick="ExpandCollapse(seeAlsoToggle)" style="cursor:default;" onkeypress="ExpandCollapse_CheckKey(seeAlsoToggle, event)" tabindex="0"><img id="seeAlsoToggle" class="toggle" name="toggleSwitch" src="../icons/collapse_all.gif" />See Also</span></h1><div id="seeAlsoSection" class="section" name="collapseableSection" style=""><div class="seeAlsoStyle"><a href="4c5b4c37-d072-000c-5ae0-07b3594658ff.htm">Token Members</a></div><div class="seeAlsoStyle"><a href="b449a860-f0af-3961-ccd7-4e000f2f2811.htm">Lucene.Net.Analysis Namespace</a></div><div class="seeAlsoStyle"><a href="0d8539fc-7d2e-c827-3b52-74be7bee88be.htm">Lucene.Net.Index<span class="languageSpecificText"><span class="cs">.</span><span class="vb">.</span><span class="cpp">::</span><span class="nu">.</span><span class="fs">.</span></span>Payload</a></div></div></div><div id="footer"><div class="footerLine"><img width="100%" height="3px" src="../icons/footer.gif" alt="Footer image" title="Footer image" /></div><A NAME="feedback"></A><span id="fb" class="feedbackcss"></span></div></div></body></html>