blob: 3e9d7cf17d1c43826f7e04c09e970a37e48b3117 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8" ?>
<!--
***************************************************************
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
***************************************************************
-->
<taeDescription
xmlns="http://uima.apache.org/resourceSpecifier" >
<frameworkImplementation>org.apache.uima.cpp</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>libstw</annotatorImplementationName>
<analysisEngineMetaData>
<name>StopwordList</name>
<description>This annotates tokens as stopwords by list lookup.</description>
<version>1.0</version>
<vendor>IBM Corporation</vendor>
<configurationParameters>
<configurationParameter>
<name>MinTokenLength</name>
<description>All tokens with less length are considered stopwords</description>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>MinUpperCaseTokenLength</name>
<description>All uppercase tokens with less length are considered stopwords</description>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>UseAlternateTerritories</name>
<description>If true, stopword resources for altarenate territories are loaded</description>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>FeatureName</name>
<description>Fully qualified name of the feature to be set if a token is a stopword. Feature must have int range.</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
</configurationParameterSettings>
<typeSystemDescription>
<imports>
<import location="tt_typesystem.xml"/>
</imports>
</typeSystemDescription>
<fsIndexes>
</fsIndexes>
<capabilities>
<capability>
<inputs>
<type>uima.tt.TokenAnnotation</type>
</inputs>
<outputs>
<feature>uima.tt.TokenAnnotation:stopwordToken</feature>
</outputs>
<languagesSupported>
<!--<language>x-unspecified</language> -->
<language>ar</language>
<language>ca</language>
<language>da</language>
<language>de</language>
<language>de-CH</language>
<language>el</language>
<language>en-US</language>
<language>en-GB</language>
<language>es</language>
<language>fi</language>
<language>fr</language>
<language>fr-CA</language>
<language>he</language>
<language>is</language>
<language>it</language>
<language>nb</language>
<language>nl</language>
<language>no</language>
<language>pt-BR</language>
<language>pt</language>
<language>ru</language>
<language>sv</language>
<language>zh-CN</language>
<language>zh-TW</language>
</languagesSupported>
</capability>
</capabilities>
</analysisEngineMetaData>
</taeDescription>