| <?xml version="1.0" encoding="UTF-8" ?> |
| |
| <!-- |
| *************************************************************** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| *************************************************************** |
| --> |
| |
| <taeDescription |
| xmlns="http://uima.apache.org/resourceSpecifier" > |
| <frameworkImplementation>org.apache.uima.cpp</frameworkImplementation> |
| <primitive>true</primitive> |
| <annotatorImplementationName>libstw</annotatorImplementationName> |
| |
| <analysisEngineMetaData> |
| <name>StopwordList</name> |
| <description>This annotates tokens as stopwords by list lookup.</description> |
| <version>1.0</version> |
| <vendor>IBM Corporation</vendor> |
| |
| <configurationParameters> |
| <configurationParameter> |
| <name>MinTokenLength</name> |
| <description>All tokens with less length are considered stopwords</description> |
| <type>Integer</type> |
| <multiValued>false</multiValued> |
| <mandatory>false</mandatory> |
| </configurationParameter> |
| <configurationParameter> |
| <name>MinUpperCaseTokenLength</name> |
| <description>All uppercase tokens with less length are considered stopwords</description> |
| <type>Integer</type> |
| <multiValued>false</multiValued> |
| <mandatory>false</mandatory> |
| </configurationParameter> |
| <configurationParameter> |
| <name>UseAlternateTerritories</name> |
| <description>If true, stopword resources for altarenate territories are loaded</description> |
| <type>Boolean</type> |
| <multiValued>false</multiValued> |
| <mandatory>false</mandatory> |
| </configurationParameter> |
| <configurationParameter> |
| <name>FeatureName</name> |
| <description>Fully qualified name of the feature to be set if a token is a stopword. Feature must have int range.</description> |
| <type>String</type> |
| <multiValued>false</multiValued> |
| <mandatory>false</mandatory> |
| </configurationParameter> |
| </configurationParameters> |
| |
| <configurationParameterSettings> |
| </configurationParameterSettings> |
| |
| <typeSystemDescription> |
| <imports> |
| <import location="tt_typesystem.xml"/> |
| </imports> |
| </typeSystemDescription> |
| |
| <fsIndexes> |
| </fsIndexes> |
| |
| <capabilities> |
| <capability> |
| <inputs> |
| <type>uima.tt.TokenAnnotation</type> |
| </inputs> |
| |
| <outputs> |
| <feature>uima.tt.TokenAnnotation:stopwordToken</feature> |
| </outputs> |
| |
| <languagesSupported> |
| <!--<language>x-unspecified</language> --> |
| <language>ar</language> |
| <language>ca</language> |
| <language>da</language> |
| <language>de</language> |
| <language>de-CH</language> |
| <language>el</language> |
| <language>en-US</language> |
| <language>en-GB</language> |
| <language>es</language> |
| <language>fi</language> |
| <language>fr</language> |
| <language>fr-CA</language> |
| <language>he</language> |
| <language>is</language> |
| <language>it</language> |
| <language>nb</language> |
| <language>nl</language> |
| <language>no</language> |
| <language>pt-BR</language> |
| <language>pt</language> |
| <language>ru</language> |
| <language>sv</language> |
| <language>zh-CN</language> |
| <language>zh-TW</language> |
| </languagesSupported> |
| </capability> |
| </capabilities> |
| |
| </analysisEngineMetaData> |
| </taeDescription> |
| |