<?xml version="1.0" encoding="UTF-8"?> | |
<!-- | |
Licensed to the Apache Software Foundation (ASF) under one | |
or more contributor license agreements. See the NOTICE file | |
distributed with this work for additional information | |
regarding copyright ownership. The ASF licenses this file | |
to you under the Apache License, Version 2.0 (the | |
"License"); you may not use this file except in compliance | |
with the License. You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, | |
software distributed under the License is distributed on an | |
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
KIND, either express or implied. See the License for the | |
specific language governing permissions and limitations | |
under the License. | |
--> | |
<taeDescription xmlns="http://uima.apache.org/resourceSpecifier"> | |
<frameworkImplementation>org.apache.uima.java</frameworkImplementation> | |
<primitive>true</primitive> | |
<annotatorImplementationName>org.apache.uima.conceptMapper.support.tokenizer.OffsetTokenizer</annotatorImplementationName> | |
<analysisEngineMetaData> | |
<name>OffsetTokenenizer</name> | |
<configurationParameters> | |
<configurationParameter> | |
<name>caseMatch</name> | |
<description>matching case sensitive or case insensitive</description> | |
<type>String</type> | |
<multiValued>false</multiValued> | |
<mandatory>true</mandatory> | |
</configurationParameter> | |
<configurationParameter> | |
<name>tokenDelimiters</name> | |
<description>String of characters that separate tokens</description> | |
<type>String</type> | |
<multiValued>false</multiValued> | |
<mandatory>false</mandatory> | |
</configurationParameter> | |
</configurationParameters> | |
<configurationParameterSettings> | |
<nameValuePair> | |
<name>caseMatch</name> | |
<value> | |
<string>ignoreall</string> | |
</value> | |
</nameValuePair> | |
<nameValuePair> | |
<name>tokenDelimiters</name> | |
<value> | |
<string> | |
/-*&@(){}|[]><\'`":;,$%+.?! | |
</string> | |
</value> | |
</nameValuePair> | |
</configurationParameterSettings> | |
<typeSystemDescription> | |
<types> | |
<typeDescription> | |
<name>org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation</name> | |
<description /> | |
<supertypeName>uima.tt.TokenAnnotation</supertypeName> | |
<features> | |
<featureDescription> | |
<name>text</name> | |
<description></description> | |
<rangeTypeName>uima.cas.String</rangeTypeName> | |
</featureDescription> | |
</features> | |
</typeDescription> | |
<typeDescription> | |
<name>uima.tt.TokenAnnotation</name> | |
<description /> | |
<supertypeName>uima.tcas.Annotation</supertypeName> | |
</typeDescription> | |
</types> | |
</typeSystemDescription> | |
<capabilities> | |
<capability> | |
<inputs /> | |
<outputs> | |
<type allAnnotatorFeatures="true">org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation</type> | |
<type allAnnotatorFeatures="true">uima.tt.TokenAnnotation</type> | |
<type allAnnotatorFeatures="true">uima.tcas.DocumentAnnotation</type> | |
</outputs> | |
<languagesSupported /> | |
</capability> | |
</capabilities> | |
<operationalProperties> | |
<modifiesCas>true</modifiesCas> | |
<multipleDeploymentAllowed>true</multipleDeploymentAllowed> | |
<outputsNewCASes>false</outputsNewCASes> | |
</operationalProperties> | |
</analysisEngineMetaData> | |
</taeDescription> |