Tagger/desc/HmmModelTrainer.xml - uima-sandbox - Git at Google

 <?xml version="1.0" encoding="UTF-8"?>
 <!--
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
   regarding copyright ownership.  The ASF licenses this file
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing,
   software distributed under the License is distributed on an
   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   KIND, either express or implied.  See the License for the
   specific language governing permissions and limitations
   under the License.
 -->
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
   <primitive>true</primitive>
   <annotatorImplementationName>org.apache.uima.examples.tagger.HMMModelTrainer</annotatorImplementationName>
   <analysisEngineMetaData>
     <name>HMMModelTrainer</name>
     <description>This analysis engine trains an N-gram model for the HMM tagger. It uses a training corpus as reference. This corpus must contain annotations on words with an attribute corresponding of the POS value to be learned.

 The configuration of this analysis engine is done through several parameters:
 &lt;ul&gt;
 &lt;li&gt;View: - the view from which the tokens will be extracted&lt;/li&gt;
 &lt;li&gt;ModelExportFile: - the path where the model will be written&lt;/li&gt;
 &lt;li&gt;FeaturePathPOS: - feature path to the value of the POS to be learned. The annotation should exactly cover a "word".&lt;/li&gt;
 &lt;/ul&gt;

 &lt;b&gt;BEWARE: this analysis engine does not allow multiple deployment !&lt;/b&gt;

 &lt;i&gt;NB. At the moment: both bi and trigram statistics are saved in one model file.&lt;/i&gt;</description>
     <version>1.0</version>
     <vendor/>
     <configurationParameters>
       <configurationParameter>
         <name>View</name>
         <description>The view from which the tokens will be extracted.</description>
         <type>String</type>
         <multiValued>false</multiValued>
         <mandatory>true</mandatory>
       </configurationParameter>
       <configurationParameter>
         <name>ModelExportFile</name>
         <description>The path where the model will be written.</description>
         <type>String</type>
         <multiValued>false</multiValued>
         <mandatory>true</mandatory>
       </configurationParameter>
       <configurationParameter>
         <name>FeaturePathPOS</name>
         <description>Feature path to the value of the POS to be learnt. The annotation should exactly cover a "word".</description>
         <type>String</type>
         <multiValued>false</multiValued>
         <mandatory>true</mandatory>
       </configurationParameter>
     </configurationParameters>
     <configurationParameterSettings>
       <nameValuePair>
         <name>View</name>
         <value>
           <string>_InitialView</string>
         </value>
       </nameValuePair>
       <nameValuePair>
         <name>ModelExportFile</name>
         <value>
           <string>hmmtagger_model.dat</string>
         </value>
       </nameValuePair>
       <nameValuePair>
         <name>FeaturePathPOS</name>
         <value>
           <string>org.apache.uima.TokenAnnotation:posTag</string>
         </value>
       </nameValuePair>
     </configurationParameterSettings>
     <typeSystemDescription/>
     <typePriorities/>
     <fsIndexCollection/>
     <capabilities>
       <capability>
         <inputs/>
         <outputs/>
         <languagesSupported/>
       </capability>
     </capabilities>
     <operationalProperties>
       <modifiesCas>false</modifiesCas>
       <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
       <outputsNewCASes>false</outputsNewCASes>
     </operationalProperties>
   </analysisEngineMetaData>
   <resourceManagerConfiguration/>
 </analysisEngineDescription>
	<?xml version="1.0" encoding="UTF-8"?>
	<!--
	Licensed to the Apache Software Foundation (ASF) under one
	or more contributor license agreements. See the NOTICE file
	distributed with this work for additional information
	regarding copyright ownership. The ASF licenses this file
	to you under the Apache License, Version 2.0 (the
	"License"); you may not use this file except in compliance
	with the License. You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing,
	software distributed under the License is distributed on an
	"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	KIND, either express or implied. See the License for the
	specific language governing permissions and limitations
	under the License.
	-->
	<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
	<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
	<primitive>true</primitive>
	<annotatorImplementationName>org.apache.uima.examples.tagger.HMMModelTrainer</annotatorImplementationName>
	<analysisEngineMetaData>
	<name>HMMModelTrainer</name>
	<description>This analysis engine trains an N-gram model for the HMM tagger. It uses a training corpus as reference. This corpus must contain annotations on words with an attribute corresponding of the POS value to be learned.

	The configuration of this analysis engine is done through several parameters:
	<ul>
	<li>View: - the view from which the tokens will be extracted</li>
	<li>ModelExportFile: - the path where the model will be written</li>
	<li>FeaturePathPOS: - feature path to the value of the POS to be learned. The annotation should exactly cover a "word".</li>
	</ul>

	<b>BEWARE: this analysis engine does not allow multiple deployment !</b>

	<i>NB. At the moment: both bi and trigram statistics are saved in one model file.</i></description>
	<version>1.0</version>
	<vendor/>
	<configurationParameters>
	<configurationParameter>
	<name>View</name>
	<description>The view from which the tokens will be extracted.</description>
	<type>String</type>
	<multiValued>false</multiValued>
	<mandatory>true</mandatory>
	</configurationParameter>
	<configurationParameter>
	<name>ModelExportFile</name>
	<description>The path where the model will be written.</description>
	<type>String</type>
	<multiValued>false</multiValued>
	<mandatory>true</mandatory>
	</configurationParameter>
	<configurationParameter>
	<name>FeaturePathPOS</name>
	<description>Feature path to the value of the POS to be learnt. The annotation should exactly cover a "word".</description>
	<type>String</type>
	<multiValued>false</multiValued>
	<mandatory>true</mandatory>
	</configurationParameter>
	</configurationParameters>
	<configurationParameterSettings>
	<nameValuePair>
	<name>View</name>
	<value>
	<string>_InitialView</string>
	</value>
	</nameValuePair>
	<nameValuePair>
	<name>ModelExportFile</name>
	<value>
	<string>hmmtagger_model.dat</string>
	</value>
	</nameValuePair>
	<nameValuePair>
	<name>FeaturePathPOS</name>
	<value>
	<string>org.apache.uima.TokenAnnotation:posTag</string>
	</value>
	</nameValuePair>
	</configurationParameterSettings>
	<typeSystemDescription/>
	<typePriorities/>
	<fsIndexCollection/>
	<capabilities>
	<capability>
	<inputs/>
	<outputs/>
	<languagesSupported/>
	</capability>
	</capabilities>
	<operationalProperties>
	<modifiesCas>false</modifiesCas>
	<multipleDeploymentAllowed>false</multipleDeploymentAllowed>
	<outputsNewCASes>false</outputsNewCASes>
	</operationalProperties>
	</analysisEngineMetaData>
	<resourceManagerConfiguration/>
	</analysisEngineDescription>