<?xml version="1.0" encoding="UTF-8"?> | |
<!-- | |
Licensed to the Apache Software Foundation (ASF) under one | |
or more contributor license agreements. See the NOTICE file | |
distributed with this work for additional information | |
regarding copyright ownership. The ASF licenses this file | |
to you under the Apache License, Version 2.0 (the | |
"License"); you may not use this file except in compliance | |
with the License. You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, | |
software distributed under the License is distributed on an | |
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
KIND, either express or implied. See the License for the | |
specific language governing permissions and limitations | |
under the License. | |
--> | |
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier"> | |
<frameworkImplementation>org.apache.uima.java</frameworkImplementation> | |
<primitive>true</primitive> | |
<annotatorImplementationName>org.apache.uima.examples.tagger.HMMModelTrainer</annotatorImplementationName> | |
<analysisEngineMetaData> | |
<name>HMMModelTrainer</name> | |
<description>This analysis engine trains an N-gram model for the HMM tagger. It uses a training corpus as reference. This corpus must contain annotations on words with an attribute corresponding of the POS value to be learned. | |
The configuration of this analysis engine is done through several parameters: | |
<ul> | |
<li>View: - the view from which the tokens will be extracted</li> | |
<li>ModelExportFile: - the path where the model will be written</li> | |
<li>FeaturePathPOS: - feature path to the value of the POS to be learned. The annotation should exactly cover a "word".</li> | |
</ul> | |
<b>BEWARE: this analysis engine does not allow multiple deployment !</b> | |
<i>NB. At the moment: both bi and trigram statistics are saved in one model file.</i></description> | |
<version>1.0</version> | |
<vendor/> | |
<configurationParameters> | |
<configurationParameter> | |
<name>View</name> | |
<description>The view from which the tokens will be extracted.</description> | |
<type>String</type> | |
<multiValued>false</multiValued> | |
<mandatory>true</mandatory> | |
</configurationParameter> | |
<configurationParameter> | |
<name>ModelExportFile</name> | |
<description>The path where the model will be written.</description> | |
<type>String</type> | |
<multiValued>false</multiValued> | |
<mandatory>true</mandatory> | |
</configurationParameter> | |
<configurationParameter> | |
<name>FeaturePathPOS</name> | |
<description>Feature path to the value of the POS to be learnt. The annotation should exactly cover a "word".</description> | |
<type>String</type> | |
<multiValued>false</multiValued> | |
<mandatory>true</mandatory> | |
</configurationParameter> | |
</configurationParameters> | |
<configurationParameterSettings> | |
<nameValuePair> | |
<name>View</name> | |
<value> | |
<string>_InitialView</string> | |
</value> | |
</nameValuePair> | |
<nameValuePair> | |
<name>ModelExportFile</name> | |
<value> | |
<string>hmmtagger_model.dat</string> | |
</value> | |
</nameValuePair> | |
<nameValuePair> | |
<name>FeaturePathPOS</name> | |
<value> | |
<string>org.apache.uima.TokenAnnotation:posTag</string> | |
</value> | |
</nameValuePair> | |
</configurationParameterSettings> | |
<typeSystemDescription/> | |
<typePriorities/> | |
<fsIndexCollection/> | |
<capabilities> | |
<capability> | |
<inputs/> | |
<outputs/> | |
<languagesSupported/> | |
</capability> | |
</capabilities> | |
<operationalProperties> | |
<modifiesCas>false</modifiesCas> | |
<multipleDeploymentAllowed>false</multipleDeploymentAllowed> | |
<outputsNewCASes>false</outputsNewCASes> | |
</operationalProperties> | |
</analysisEngineMetaData> | |
<resourceManagerConfiguration/> | |
</analysisEngineDescription> |