| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, |
| software distributed under the License is distributed on an |
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| KIND, either express or implied. See the License for the |
| specific language governing permissions and limitations |
| under the License. |
| --> |
| <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier"> |
| <frameworkImplementation>org.apache.uima.java</frameworkImplementation> |
| <primitive>true</primitive> |
| <annotatorImplementationName>org.apache.uima.examples.tagger.HMMModelTrainer</annotatorImplementationName> |
| <analysisEngineMetaData> |
| <name>HMMModelTrainer</name> |
| <description>This analysis engine trains an N-gram model for the HMM tagger. It uses a training corpus as reference. This corpus must contain annotations on words with an attribute corresponding of the POS value to be learned. |
| |
| The configuration of this analysis engine is done through several parameters: |
| <ul> |
| <li>View: - the view from which the tokens will be extracted</li> |
| <li>ModelExportFile: - the path where the model will be written</li> |
| <li>FeaturePathPOS: - feature path to the value of the POS to be learned. The annotation should exactly cover a "word".</li> |
| </ul> |
| |
| <b>BEWARE: this analysis engine does not allow multiple deployment !</b> |
| |
| <i>NB. At the moment: both bi and trigram statistics are saved in one model file.</i></description> |
| <version>1.0</version> |
| <vendor/> |
| <configurationParameters> |
| <configurationParameter> |
| <name>View</name> |
| <description>The view from which the tokens will be extracted.</description> |
| <type>String</type> |
| <multiValued>false</multiValued> |
| <mandatory>true</mandatory> |
| </configurationParameter> |
| <configurationParameter> |
| <name>ModelExportFile</name> |
| <description>The path where the model will be written.</description> |
| <type>String</type> |
| <multiValued>false</multiValued> |
| <mandatory>true</mandatory> |
| </configurationParameter> |
| <configurationParameter> |
| <name>FeaturePathPOS</name> |
| <description>Feature path to the value of the POS to be learnt. The annotation should exactly cover a "word".</description> |
| <type>String</type> |
| <multiValued>false</multiValued> |
| <mandatory>true</mandatory> |
| </configurationParameter> |
| </configurationParameters> |
| <configurationParameterSettings> |
| <nameValuePair> |
| <name>View</name> |
| <value> |
| <string>_InitialView</string> |
| </value> |
| </nameValuePair> |
| <nameValuePair> |
| <name>ModelExportFile</name> |
| <value> |
| <string>hmmtagger_model.dat</string> |
| </value> |
| </nameValuePair> |
| <nameValuePair> |
| <name>FeaturePathPOS</name> |
| <value> |
| <string>org.apache.uima.TokenAnnotation:posTag</string> |
| </value> |
| </nameValuePair> |
| </configurationParameterSettings> |
| <typeSystemDescription/> |
| <typePriorities/> |
| <fsIndexCollection/> |
| <capabilities> |
| <capability> |
| <inputs/> |
| <outputs/> |
| <languagesSupported/> |
| </capability> |
| </capabilities> |
| <operationalProperties> |
| <modifiesCas>false</modifiesCas> |
| <multipleDeploymentAllowed>false</multipleDeploymentAllowed> |
| <outputsNewCASes>false</outputsNewCASes> |
| </operationalProperties> |
| </analysisEngineMetaData> |
| <resourceManagerConfiguration/> |
| </analysisEngineDescription> |