| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, |
| software distributed under the License is distributed on an |
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| KIND, either express or implied. See the License for the |
| specific language governing permissions and limitations |
| under the License. |
| --> |
| <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier"> |
| <frameworkImplementation>org.apache.uima.java</frameworkImplementation> |
| <primitive>true</primitive> |
| <annotatorImplementationName>org.apache.uima.examples.tagger.HMMTagger</annotatorImplementationName> |
| <analysisEngineMetaData> |
| <name>Hidden Markov Model - Part of Speech Tagger</name> |
| <description>A configuration of the HmmTaggerAnnotator that looks for |
| parts of speech of identified tokens within existing |
| Sentence and Token annotations. See also |
| WhitespaceTokenizer.xml.</description> |
| <version>1.0</version> |
| <vendor>The Apache Software Foundation</vendor> |
| <configurationParameters> |
| <configurationParameter> |
| <name>NGRAM_SIZE</name> |
| <type>Integer</type> |
| <multiValued>false</multiValued> |
| <mandatory>true</mandatory> |
| </configurationParameter> |
| <configurationParameter> |
| <name>InputView</name> |
| <description>Name of the parameter for the input view (by default it is set to _InitialView)</description> |
| <type>String</type> |
| <multiValued>false</multiValued> |
| <mandatory>false</mandatory> |
| </configurationParameter> |
| <configurationParameter> |
| <name>SentenceType</name> |
| <description>Sentence annotation type which covers token annotations (by default it is set to org.apache.uima.SentenceAnnotation)</description> |
| <type>String</type> |
| <multiValued>false</multiValued> |
| <mandatory>false</mandatory> |
| </configurationParameter> |
| <configurationParameter> |
| <name>TokenFeaturePath</name> |
| <description>Token feature path to set. It should be like "type.name:attribute" (set by default to "org.apache.uima.TokenAnnotation:posTag")</description> |
| <type>String</type> |
| <multiValued>false</multiValued> |
| <mandatory>false</mandatory> |
| </configurationParameter> |
| <configurationParameter> |
| <name>ModelFile</name> |
| <description>If the resource model file is not set, this parameter can be used. If both are empty, the system will throw an exception.</description> |
| <type>String</type> |
| <multiValued>false</multiValued> |
| <mandatory>false</mandatory> |
| </configurationParameter> |
| </configurationParameters> |
| <configurationParameterSettings> |
| <nameValuePair> |
| <name>NGRAM_SIZE</name> |
| <value> |
| <integer>3</integer> |
| </value> |
| </nameValuePair> |
| </configurationParameterSettings> |
| <typeSystemDescription> |
| <types> |
| <typeDescription> |
| <name>org.apache.uima.TokenAnnotation</name> |
| <description>Single token annotation</description> |
| <supertypeName>uima.tcas.Annotation</supertypeName> |
| <features> |
| <featureDescription> |
| <name>posTag</name> |
| <description>contains part-of-speech of a corresponding token</description> |
| <rangeTypeName>uima.cas.String</rangeTypeName> |
| </featureDescription> |
| </features> |
| </typeDescription> |
| <typeDescription> |
| <name>org.apache.uima.SentenceAnnotation</name> |
| <description>sentence annotation</description> |
| <supertypeName>uima.tcas.Annotation</supertypeName> |
| </typeDescription> |
| </types> |
| </typeSystemDescription> |
| <typePriorities/> |
| <fsIndexCollection/> |
| <capabilities> |
| <capability> |
| <inputs> |
| <type>org.apache.uima.TokenAnnotation</type> |
| <type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type> |
| <feature>org.apache.uima.TokenAnnotation:end</feature> |
| <feature>org.apache.uima.TokenAnnotation:begin</feature> |
| </inputs> |
| <outputs> |
| <type>org.apache.uima.TokenAnnotation</type> |
| <feature>org.apache.uima.TokenAnnotation:posTag</feature> |
| <feature>org.apache.uima.TokenAnnotation:end</feature> |
| <feature>org.apache.uima.TokenAnnotation:begin</feature> |
| </outputs> |
| <languagesSupported/> |
| </capability> |
| </capabilities> |
| <operationalProperties> |
| <modifiesCas>true</modifiesCas> |
| <multipleDeploymentAllowed>true</multipleDeploymentAllowed> |
| <outputsNewCASes>false</outputsNewCASes> |
| </operationalProperties> |
| </analysisEngineMetaData> |
| <externalResourceDependencies> |
| <externalResourceDependency> |
| <key>Model</key> |
| <description>HMM Tagger model file</description> |
| <interfaceName>org.apache.uima.examples.tagger.IModelResource</interfaceName> |
| <optional>false</optional> |
| </externalResourceDependency> |
| </externalResourceDependencies> |
| <resourceManagerConfiguration> |
| <externalResources> |
| <externalResource> |
| <name>ModelFile</name> |
| <description>HMM Tagger model file</description> |
| <fileResourceSpecifier> |
| <fileUrl>file:english/BrownModel.dat</fileUrl> |
| </fileResourceSpecifier> |
| <implementationName>org.apache.uima.examples.tagger.ModelResource</implementationName> |
| </externalResource> |
| </externalResources> |
| <externalResourceBindings> |
| <externalResourceBinding> |
| <key>Model</key> |
| <resourceName>ModelFile</resourceName> |
| </externalResourceBinding> |
| </externalResourceBindings> |
| </resourceManagerConfiguration> |
| </analysisEngineDescription> |