blob: 3b349686dd1d8d5fb5f88b929aa8382ace2013c2 [file] [log] [blame]
<!DOCTYPE HTML>
<html lang>
<head>
<!-- Generated by javadoc (17) -->
<title>TokenizerME (Apache OpenNLP Tools 2.5.6 API)</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="description" content="declaration: package: opennlp.tools.tokenize, class: TokenizerME">
<meta name="generator" content="javadoc/ClassWriterImpl">
<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
<link rel="stylesheet" type="text/css" href="../../../script-dir/jquery-ui.min.css" title="Style">
<link rel="stylesheet" type="text/css" href="../../../jquery-ui.overrides.css" title="Style">
<script type="text/javascript" src="../../../script.js"></script>
<script type="text/javascript" src="../../../script-dir/jquery-3.7.1.min.js"></script>
<script type="text/javascript" src="../../../script-dir/jquery-ui.min.js"></script>
</head>
<body class="class-declaration-page">
<script type="text/javascript">var evenRowColor = "even-row-color";
var oddRowColor = "odd-row-color";
var tableTab = "table-tab";
var activeTableTab = "active-table-tab";
var pathtoroot = "../../../";
loadScripts(document, 'script');</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<div class="flex-box">
<header role="banner" class="flex-header">
<nav role="navigation">
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="top-nav" id="navbar-top">
<div class="skip-nav"><a href="#skip-navbar-top" title="Skip navigation links">Skip navigation links</a></div>
<ul id="navbar-top-firstrow" class="nav-list" title="Navigation">
<li><a href="../../../index.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="nav-bar-cell1-rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../index-all.html">Index</a></li>
<li><a href="../../../help-doc.html#class">Help</a></li>
</ul>
</div>
<div class="sub-nav">
<div>
<ul class="sub-nav-list">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li><a href="#field-summary">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor-summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method-summary">Method</a></li>
</ul>
<ul class="sub-nav-list">
<li>Detail:&nbsp;</li>
<li><a href="#field-detail">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor-detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method-detail">Method</a></li>
</ul>
</div>
<div class="nav-list-search"><label for="search-input">SEARCH:</label>
<input type="text" id="search-input" value="search" disabled="disabled">
<input type="reset" id="reset-button" value="reset" disabled="disabled">
</div>
</div>
<!-- ========= END OF TOP NAVBAR ========= -->
<span class="skip-nav" id="skip-navbar-top"></span></nav>
</header>
<div class="flex-content">
<main role="main">
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="sub-title"><span class="package-label-in-type">Package</span>&nbsp;<a href="package-summary.html">opennlp.tools.tokenize</a></div>
<h1 title="Class TokenizerME" class="title">Class TokenizerME</h1>
</div>
<div class="inheritance" title="Inheritance Tree"><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html" title="class or interface in java.lang" class="external-link">java.lang.Object</a>
<div class="inheritance">opennlp.tools.tokenize.TokenizerME</div>
</div>
<section class="class-description" id="class-description">
<dl class="notes">
<dt>All Implemented Interfaces:</dt>
<dd><code><a href="../ml/Probabilistic.html" title="interface in opennlp.tools.ml">Probabilistic</a></code>, <code><a href="Tokenizer.html" title="interface in opennlp.tools.tokenize">Tokenizer</a></code></dd>
</dl>
<hr>
<div class="type-signature"><span class="modifiers">public class </span><span class="element-name type-name-label">TokenizerME</span>
<span class="extends-implements">extends <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html" title="class or interface in java.lang" class="external-link">Object</a>
implements <a href="../ml/Probabilistic.html" title="interface in opennlp.tools.ml">Probabilistic</a></span></div>
<div class="block">A <a href="Tokenizer.html" title="interface in opennlp.tools.tokenize"><code>Tokenizer</code></a> for converting raw text into separated tokens. It uses
Maximum Entropy to make its decisions. The features are loosely
based off of Jeff Reynar's UPenn thesis "Topic Segmentation:
Algorithms and Applications.", which is available from his
homepage: <a href="http://www.cis.upenn.edu/~jcreynar">http://www.cis.upenn.edu/~jcreynar</a>.
<p>
This implementation needs a statistical model to tokenize a text which reproduces
the tokenization observed in the training data used to create the model.
The <a href="TokenizerModel.html" title="class in opennlp.tools.tokenize"><code>TokenizerModel</code></a> class encapsulates that model and provides
methods to create it from the binary representation.
<p>
A tokenizer instance is not thread-safe. For each thread, one tokenizer
must be instantiated which can share one <a href="TokenizerModel.html" title="class in opennlp.tools.tokenize"><code>TokenizerModel</code></a> instance
to safe memory.
<p>
To train a new model, the <a href="#train(opennlp.tools.util.ObjectStream,opennlp.tools.tokenize.TokenizerFactory,opennlp.tools.util.TrainingParameters)"><code>train(ObjectStream, TokenizerFactory, TrainingParameters)</code></a> method
can be used.
<p>
Sample usage:
<p>
<code>
InputStream modelIn;<br>
<br>
...<br>
<br>
TokenizerModel model = TokenizerModel(modelIn);<br>
<br>
Tokenizer tokenizer = new TokenizerME(model);<br>
<br>
String tokens[] = tokenizer.tokenize("A sentence to be tokenized.");
</code></div>
<dl class="notes">
<dt>See Also:</dt>
<dd>
<ul class="see-list">
<li><a href="Tokenizer.html" title="interface in opennlp.tools.tokenize"><code>Tokenizer</code></a></li>
<li><a href="TokenizerModel.html" title="class in opennlp.tools.tokenize"><code>TokenizerModel</code></a></li>
<li><a href="TokenSample.html" title="class in opennlp.tools.tokenize"><code>TokenSample</code></a></li>
<li><a href="../ml/Probabilistic.html" title="interface in opennlp.tools.ml"><code>Probabilistic</code></a></li>
</ul>
</dd>
</dl>
</section>
<section class="summary">
<ul class="summary-list">
<!-- =========== FIELD SUMMARY =========== -->
<li>
<section class="field-summary" id="field-summary">
<h2>Field Summary</h2>
<div class="caption"><span>Fields</span></div>
<div class="summary-table three-column-summary">
<div class="table-header col-first">Modifier and Type</div>
<div class="table-header col-second">Field</div>
<div class="table-header col-last">Description</div>
<div class="col-first even-row-color"><code>static final <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a></code></div>
<div class="col-second even-row-color"><code><a href="#NO_SPLIT" class="member-name-link">NO_SPLIT</a></code></div>
<div class="col-last even-row-color">
<div class="block">Constant indicates no token split.</div>
</div>
<div class="col-first odd-row-color"><code>static final <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a></code></div>
<div class="col-second odd-row-color"><code><a href="#SPLIT" class="member-name-link">SPLIT</a></code></div>
<div class="col-last odd-row-color">
<div class="block">Constant indicates a token split.</div>
</div>
</div>
</section>
</li>
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<li>
<section class="constructor-summary" id="constructor-summary">
<h2>Constructor Summary</h2>
<div class="caption"><span>Constructors</span></div>
<div class="summary-table two-column-summary">
<div class="table-header col-first">Constructor</div>
<div class="table-header col-last">Description</div>
<div class="col-constructor-name even-row-color"><code><a href="#%3Cinit%3E(java.lang.String)" class="member-name-link">TokenizerME</a><wbr>(<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a>&nbsp;language)</code></div>
<div class="col-last even-row-color">
<div class="block">Initializes a <a href="TokenizerME.html" title="class in opennlp.tools.tokenize"><code>TokenizerME</code></a> by downloading a default model.</div>
</div>
<div class="col-constructor-name odd-row-color"><code><a href="#%3Cinit%3E(opennlp.tools.tokenize.TokenizerModel)" class="member-name-link">TokenizerME</a><wbr>(<a href="TokenizerModel.html" title="class in opennlp.tools.tokenize">TokenizerModel</a>&nbsp;model)</code></div>
<div class="col-last odd-row-color">
<div class="block">Instantiates a <a href="TokenizerME.html" title="class in opennlp.tools.tokenize"><code>TokenizerME</code></a> with an existing <a href="TokenizerModel.html" title="class in opennlp.tools.tokenize"><code>TokenizerModel</code></a>.</div>
</div>
<div class="col-constructor-name even-row-color"><code><a href="#%3Cinit%3E(opennlp.tools.tokenize.TokenizerModel,opennlp.tools.dictionary.Dictionary)" class="member-name-link">TokenizerME</a><wbr>(<a href="TokenizerModel.html" title="class in opennlp.tools.tokenize">TokenizerModel</a>&nbsp;model,
<a href="../dictionary/Dictionary.html" title="class in opennlp.tools.dictionary">Dictionary</a>&nbsp;abbDict)</code></div>
<div class="col-last even-row-color">
<div class="block">Instantiates a <a href="TokenizerME.html" title="class in opennlp.tools.tokenize"><code>TokenizerME</code></a> with an existing <a href="TokenizerModel.html" title="class in opennlp.tools.tokenize"><code>TokenizerModel</code></a>.</div>
</div>
</div>
</section>
</li>
<!-- ========== METHOD SUMMARY =========== -->
<li>
<section class="method-summary" id="method-summary">
<h2>Method Summary</h2>
<div id="method-summary-table">
<div class="table-tabs" role="tablist" aria-orientation="horizontal"><button id="method-summary-table-tab0" role="tab" aria-selected="true" aria-controls="method-summary-table.tabpanel" tabindex="0" onkeydown="switchTab(event)" onclick="show('method-summary-table', 'method-summary-table', 3)" class="active-table-tab">All Methods</button><button id="method-summary-table-tab1" role="tab" aria-selected="false" aria-controls="method-summary-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('method-summary-table', 'method-summary-table-tab1', 3)" class="table-tab">Static Methods</button><button id="method-summary-table-tab2" role="tab" aria-selected="false" aria-controls="method-summary-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('method-summary-table', 'method-summary-table-tab2', 3)" class="table-tab">Instance Methods</button><button id="method-summary-table-tab4" role="tab" aria-selected="false" aria-controls="method-summary-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('method-summary-table', 'method-summary-table-tab4', 3)" class="table-tab">Concrete Methods</button><button id="method-summary-table-tab6" role="tab" aria-selected="false" aria-controls="method-summary-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('method-summary-table', 'method-summary-table-tab6', 3)" class="table-tab">Deprecated Methods</button></div>
<div id="method-summary-table.tabpanel" role="tabpanel" aria-labelledby="method-summary-table-tab0">
<div class="summary-table three-column-summary">
<div class="table-header col-first">Modifier and Type</div>
<div class="table-header col-second">Method</div>
<div class="table-header col-last">Description</div>
<div class="col-first even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4 method-summary-table-tab6"><code>double[]</code></div>
<div class="col-second even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4 method-summary-table-tab6"><code><a href="#getTokenProbabilities()" class="member-name-link">getTokenProbabilities</a>()</code></div>
<div class="col-last even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4 method-summary-table-tab6">
<div class="block"><span class="deprecated-label">Deprecated, for removal: This API element is subject to removal in a future version.</span>
<div class="deprecation-comment">Use <a href="#probs()"><code>probs()</code></a> instead.</div>
</div>
</div>
<div class="col-first odd-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code>double[]</code></div>
<div class="col-second odd-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code><a href="#probs()" class="member-name-link">probs</a>()</code></div>
<div class="col-last odd-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4">
<div class="block">Retrieves the probabilities of the last decoded sequence.</div>
</div>
<div class="col-first even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code>void</code></div>
<div class="col-second even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code><a href="#setKeepNewLines(boolean)" class="member-name-link">setKeepNewLines</a><wbr>(boolean&nbsp;keepNewLines)</code></div>
<div class="col-last even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4">
<div class="block">Switches whether to keep new lines or not.</div>
</div>
<div class="col-first odd-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a>[]</code></div>
<div class="col-second odd-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code><a href="#tokenize(java.lang.String)" class="member-name-link">tokenize</a><wbr>(<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a>&nbsp;s)</code></div>
<div class="col-last odd-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4">
<div class="block">Splits a string into its atomic parts.</div>
</div>
<div class="col-first even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code><a href="../util/Span.html" title="class in opennlp.tools.util">Span</a>[]</code></div>
<div class="col-second even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code><a href="#tokenizePos(java.lang.String)" class="member-name-link">tokenizePos</a><wbr>(<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a>&nbsp;d)</code></div>
<div class="col-last even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4">
<div class="block">Tokenizes the string.</div>
</div>
<div class="col-first odd-row-color method-summary-table method-summary-table-tab1 method-summary-table-tab4"><code>static <a href="TokenizerModel.html" title="class in opennlp.tools.tokenize">TokenizerModel</a></code></div>
<div class="col-second odd-row-color method-summary-table method-summary-table-tab1 method-summary-table-tab4"><code><a href="#train(opennlp.tools.util.ObjectStream,opennlp.tools.tokenize.TokenizerFactory,opennlp.tools.util.TrainingParameters)" class="member-name-link">train</a><wbr>(<a href="../util/ObjectStream.html" title="interface in opennlp.tools.util">ObjectStream</a>&lt;<a href="TokenSample.html" title="class in opennlp.tools.tokenize">TokenSample</a>&gt;&nbsp;samples,
<a href="TokenizerFactory.html" title="class in opennlp.tools.tokenize">TokenizerFactory</a>&nbsp;factory,
<a href="../util/TrainingParameters.html" title="class in opennlp.tools.util">TrainingParameters</a>&nbsp;mlParams)</code></div>
<div class="col-last odd-row-color method-summary-table method-summary-table-tab1 method-summary-table-tab4">
<div class="block">Trains a model for the <a href="TokenizerME.html" title="class in opennlp.tools.tokenize"><code>TokenizerME</code></a>.</div>
</div>
<div class="col-first even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code>boolean</code></div>
<div class="col-second even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code><a href="#useAlphaNumericOptimization()" class="member-name-link">useAlphaNumericOptimization</a>()</code></div>
<div class="col-last even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4">&nbsp;</div>
</div>
</div>
</div>
<div class="inherited-list">
<h3 id="methods-inherited-from-class-java.lang.Object">Methods inherited from class&nbsp;java.lang.<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html" title="class or interface in java.lang" class="external-link">Object</a></h3>
<code><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#equals(java.lang.Object)" title="class or interface in java.lang" class="external-link">equals</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#getClass()" title="class or interface in java.lang" class="external-link">getClass</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#hashCode()" title="class or interface in java.lang" class="external-link">hashCode</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#notify()" title="class or interface in java.lang" class="external-link">notify</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#notifyAll()" title="class or interface in java.lang" class="external-link">notifyAll</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#toString()" title="class or interface in java.lang" class="external-link">toString</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#wait()" title="class or interface in java.lang" class="external-link">wait</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#wait(long)" title="class or interface in java.lang" class="external-link">wait</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#wait(long,int)" title="class or interface in java.lang" class="external-link">wait</a></code></div>
</section>
</li>
</ul>
</section>
<section class="details">
<ul class="details-list">
<!-- ============ FIELD DETAIL =========== -->
<li>
<section class="field-details" id="field-detail">
<h2>Field Details</h2>
<ul class="member-list">
<li>
<section class="detail" id="SPLIT">
<h3>SPLIT</h3>
<div class="member-signature"><span class="modifiers">public static final</span>&nbsp;<span class="return-type"><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a></span>&nbsp;<span class="element-name">SPLIT</span></div>
<div class="block">Constant indicates a token split.</div>
<dl class="notes">
<dt>See Also:</dt>
<dd>
<ul class="see-list">
<li><a href="../../../constant-values.html#opennlp.tools.tokenize.TokenizerME.SPLIT">Constant Field Values</a></li>
</ul>
</dd>
</dl>
</section>
</li>
<li>
<section class="detail" id="NO_SPLIT">
<h3>NO_SPLIT</h3>
<div class="member-signature"><span class="modifiers">public static final</span>&nbsp;<span class="return-type"><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a></span>&nbsp;<span class="element-name">NO_SPLIT</span></div>
<div class="block">Constant indicates no token split.</div>
<dl class="notes">
<dt>See Also:</dt>
<dd>
<ul class="see-list">
<li><a href="../../../constant-values.html#opennlp.tools.tokenize.TokenizerME.NO_SPLIT">Constant Field Values</a></li>
</ul>
</dd>
</dl>
</section>
</li>
</ul>
</section>
</li>
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<li>
<section class="constructor-details" id="constructor-detail">
<h2>Constructor Details</h2>
<ul class="member-list">
<li>
<section class="detail" id="&lt;init&gt;(java.lang.String)">
<h3>TokenizerME</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="element-name">TokenizerME</span><wbr><span class="parameters">(<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a>&nbsp;language)</span>
throws <span class="exceptions"><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/io/IOException.html" title="class or interface in java.io" class="external-link">IOException</a></span></div>
<div class="block">Initializes a <a href="TokenizerME.html" title="class in opennlp.tools.tokenize"><code>TokenizerME</code></a> by downloading a default model.</div>
<dl class="notes">
<dt>Parameters:</dt>
<dd><code>language</code> - The language of the tokenizer.</dd>
<dt>Throws:</dt>
<dd><code><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/io/IOException.html" title="class or interface in java.io" class="external-link">IOException</a></code> - Thrown if the model cannot be downloaded or saved.</dd>
</dl>
</section>
</li>
<li>
<section class="detail" id="&lt;init&gt;(opennlp.tools.tokenize.TokenizerModel)">
<h3>TokenizerME</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="element-name">TokenizerME</span><wbr><span class="parameters">(<a href="TokenizerModel.html" title="class in opennlp.tools.tokenize">TokenizerModel</a>&nbsp;model)</span></div>
<div class="block">Instantiates a <a href="TokenizerME.html" title="class in opennlp.tools.tokenize"><code>TokenizerME</code></a> with an existing <a href="TokenizerModel.html" title="class in opennlp.tools.tokenize"><code>TokenizerModel</code></a>.</div>
<dl class="notes">
<dt>Parameters:</dt>
<dd><code>model</code> - The <a href="TokenizerModel.html" title="class in opennlp.tools.tokenize"><code>TokenizerModel</code></a> to be used.</dd>
</dl>
</section>
</li>
<li>
<section class="detail" id="&lt;init&gt;(opennlp.tools.tokenize.TokenizerModel,opennlp.tools.dictionary.Dictionary)">
<h3>TokenizerME</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="element-name">TokenizerME</span><wbr><span class="parameters">(<a href="TokenizerModel.html" title="class in opennlp.tools.tokenize">TokenizerModel</a>&nbsp;model,
<a href="../dictionary/Dictionary.html" title="class in opennlp.tools.dictionary">Dictionary</a>&nbsp;abbDict)</span></div>
<div class="block">Instantiates a <a href="TokenizerME.html" title="class in opennlp.tools.tokenize"><code>TokenizerME</code></a> with an existing <a href="TokenizerModel.html" title="class in opennlp.tools.tokenize"><code>TokenizerModel</code></a>.</div>
<dl class="notes">
<dt>Parameters:</dt>
<dd><code>model</code> - The <a href="TokenizerModel.html" title="class in opennlp.tools.tokenize"><code>TokenizerModel</code></a> to be used.</dd>
<dd><code>abbDict</code> - The <a href="../dictionary/Dictionary.html" title="class in opennlp.tools.dictionary"><code>Dictionary</code></a> to be used. It must fit the language of the <code>model</code>.</dd>
</dl>
</section>
</li>
</ul>
</section>
</li>
<!-- ============ METHOD DETAIL ========== -->
<li>
<section class="method-details" id="method-detail">
<h2>Method Details</h2>
<ul class="member-list">
<li>
<section class="detail" id="probs()">
<h3>probs</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="return-type">double[]</span>&nbsp;<span class="element-name">probs</span>()</div>
<div class="block">Retrieves the probabilities of the last decoded sequence.
The sequence was determined based on the previous call to <a href="#tokenizePos(java.lang.String)"><code>tokenizePos(String)</code></a>.</div>
<dl class="notes">
<dt>Specified by:</dt>
<dd><code><a href="../ml/Probabilistic.html#probs()">probs</a></code>&nbsp;in interface&nbsp;<code><a href="../ml/Probabilistic.html" title="interface in opennlp.tools.ml">Probabilistic</a></code></dd>
<dt>Returns:</dt>
<dd>An array with the same number of probabilities as tokens were sent to
the computational method when <a href="#tokenizePos(java.lang.String)"><code>tokenizePos(String)</code></a> was last called.
If not applicable an empty array is returned.</dd>
</dl>
</section>
</li>
<li>
<section class="detail" id="getTokenProbabilities()">
<h3>getTokenProbabilities</h3>
<div class="member-signature"><span class="annotations"><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Deprecated.html" title="class or interface in java.lang" class="external-link">@Deprecated</a>(<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Deprecated.html#forRemoval()" title="class or interface in java.lang" class="external-link">forRemoval</a>=true,
<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Deprecated.html#since()" title="class or interface in java.lang" class="external-link">since</a>="2.5.5")
</span><span class="modifiers">public</span>&nbsp;<span class="return-type">double[]</span>&nbsp;<span class="element-name">getTokenProbabilities</span>()</div>
<div class="deprecation-block"><span class="deprecated-label">Deprecated, for removal: This API element is subject to removal in a future version.</span>
<div class="deprecation-comment">Use <a href="#probs()"><code>probs()</code></a> instead.</div>
</div>
<dl class="notes">
<dt>Returns:</dt>
<dd>the probabilities associated with the most recent calls to
<a href="#tokenizePos(java.lang.String)"><code>tokenizePos(String)</code></a>.
If not applicable an empty array is returned.</dd>
</dl>
</section>
</li>
<li>
<section class="detail" id="tokenizePos(java.lang.String)">
<h3>tokenizePos</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="return-type"><a href="../util/Span.html" title="class in opennlp.tools.util">Span</a>[]</span>&nbsp;<span class="element-name">tokenizePos</span><wbr><span class="parameters">(<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a>&nbsp;d)</span></div>
<div class="block">Tokenizes the string.</div>
<dl class="notes">
<dt>Specified by:</dt>
<dd><code><a href="Tokenizer.html#tokenizePos(java.lang.String)">tokenizePos</a></code>&nbsp;in interface&nbsp;<code><a href="Tokenizer.html" title="interface in opennlp.tools.tokenize">Tokenizer</a></code></dd>
<dt>Parameters:</dt>
<dd><code>d</code> - The string to be tokenized.</dd>
<dt>Returns:</dt>
<dd>A <a href="../util/Span.html" title="class in opennlp.tools.util"><code>Span</code></a> array containing individual tokens as elements.</dd>
</dl>
</section>
</li>
<li>
<section class="detail" id="train(opennlp.tools.util.ObjectStream,opennlp.tools.tokenize.TokenizerFactory,opennlp.tools.util.TrainingParameters)">
<h3>train</h3>
<div class="member-signature"><span class="modifiers">public static</span>&nbsp;<span class="return-type"><a href="TokenizerModel.html" title="class in opennlp.tools.tokenize">TokenizerModel</a></span>&nbsp;<span class="element-name">train</span><wbr><span class="parameters">(<a href="../util/ObjectStream.html" title="interface in opennlp.tools.util">ObjectStream</a>&lt;<a href="TokenSample.html" title="class in opennlp.tools.tokenize">TokenSample</a>&gt;&nbsp;samples,
<a href="TokenizerFactory.html" title="class in opennlp.tools.tokenize">TokenizerFactory</a>&nbsp;factory,
<a href="../util/TrainingParameters.html" title="class in opennlp.tools.util">TrainingParameters</a>&nbsp;mlParams)</span>
throws <span class="exceptions"><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/io/IOException.html" title="class or interface in java.io" class="external-link">IOException</a></span></div>
<div class="block">Trains a model for the <a href="TokenizerME.html" title="class in opennlp.tools.tokenize"><code>TokenizerME</code></a>.</div>
<dl class="notes">
<dt>Parameters:</dt>
<dd><code>samples</code> - The samples used for the training.</dd>
<dd><code>factory</code> - A <a href="TokenizerFactory.html" title="class in opennlp.tools.tokenize"><code>TokenizerFactory</code></a> to get resources from.</dd>
<dd><code>mlParams</code> - The machine learning <a href="../util/TrainingParameters.html" title="class in opennlp.tools.util"><code>train parameters</code></a>.</dd>
<dt>Returns:</dt>
<dd>A trained <a href="TokenizerModel.html" title="class in opennlp.tools.tokenize"><code>TokenizerModel</code></a>.</dd>
<dt>Throws:</dt>
<dd><code><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/io/IOException.html" title="class or interface in java.io" class="external-link">IOException</a></code> - Thrown during IO operations on a temp file which is created
during training. Or if reading from the <a href="../util/ObjectStream.html" title="interface in opennlp.tools.util"><code>ObjectStream</code></a> fails.</dd>
</dl>
</section>
</li>
<li>
<section class="detail" id="useAlphaNumericOptimization()">
<h3>useAlphaNumericOptimization</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="return-type">boolean</span>&nbsp;<span class="element-name">useAlphaNumericOptimization</span>()</div>
<dl class="notes">
<dt>Returns:</dt>
<dd><code>true</code> if the tokenizer uses alphanumeric optimization, <code>false</code> otherwise.</dd>
</dl>
</section>
</li>
<li>
<section class="detail" id="tokenize(java.lang.String)">
<h3>tokenize</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="return-type"><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a>[]</span>&nbsp;<span class="element-name">tokenize</span><wbr><span class="parameters">(<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html" title="class or interface in java.lang" class="external-link">String</a>&nbsp;s)</span></div>
<div class="block"><span class="descfrm-type-label">Description copied from interface:&nbsp;<code><a href="Tokenizer.html#tokenize(java.lang.String)">Tokenizer</a></code></span></div>
<div class="block">Splits a string into its atomic parts.</div>
<dl class="notes">
<dt>Specified by:</dt>
<dd><code><a href="Tokenizer.html#tokenize(java.lang.String)">tokenize</a></code>&nbsp;in interface&nbsp;<code><a href="Tokenizer.html" title="interface in opennlp.tools.tokenize">Tokenizer</a></code></dd>
<dt>Parameters:</dt>
<dd><code>s</code> - The string to be tokenized.</dd>
<dt>Returns:</dt>
<dd>The String[] with the individual tokens as the array elements.</dd>
</dl>
</section>
</li>
<li>
<section class="detail" id="setKeepNewLines(boolean)">
<h3>setKeepNewLines</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="return-type">void</span>&nbsp;<span class="element-name">setKeepNewLines</span><wbr><span class="parameters">(boolean&nbsp;keepNewLines)</span></div>
<div class="block">Switches whether to keep new lines or not.</div>
<dl class="notes">
<dt>Parameters:</dt>
<dd><code>keepNewLines</code> - <code>True</code> if new lines are kept, <code>false</code> otherwise.</dd>
</dl>
</section>
</li>
</ul>
</section>
</li>
</ul>
</section>
<!-- ========= END OF CLASS DATA ========= -->
</main>
<footer role="contentinfo">
<hr>
<p class="legal-copy"><small>Copyright &#169; 2025 <a href="https://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</small></p>
</footer>
</div>
</div>
</body>
</html>