| <?xml version="1.0" encoding="UTF-8" ?> |
| <!-- |
| *************************************************************** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| *************************************************************** |
| --> |
| |
| <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier"> |
| <frameworkImplementation>org.apache.uima.cpp</frameworkImplementation> |
| <primitive>true</primitive> |
| <annotatorImplementationName>libSimpleTextMerger</annotatorImplementationName> |
| |
| <analysisEngineMetaData> |
| <name>Simple Text Merger</name> |
| <description>Merges text documents into larger ones, producing a merged CAS. Also copies |
| selected annotation types to the merged CAS. The input CAS requires an instance of |
| the type uima.tt.SourceDocumentInformation (which is produced by SimpleTextSegmenter). |
| Input CASes will be merged until a CAS is encountered whose SourceDocumentInformation FS has its |
| lastSegment feature set to true. At that point a merged CAS will be output which |
| includes the content up to and including that CAS. If additional CASes are then |
| received, a second merged CAS will be built, including all content from that point until |
| the next CAS with lastSegment = true, and so on.</description> |
| <version>1.0</version> |
| <vendor>The Apache Software Foundation</vendor> |
| |
| <configurationParameters> |
| <configurationParameter> |
| <name>AnnotationTypesToCopy</name> |
| <description> |
| Names of annotation types to be copied from source CASes into the merged CAS. |
| Type uima.tt.SourceDocumentInformation will specifically be ignored. |
| </description> |
| <type>String</type> |
| <multiValued>true</multiValued> |
| <mandatory>true</mandatory> |
| </configurationParameter> |
| |
| |
| <configurationParameter> |
| <name>OutputFrequency</name> |
| <description> |
| How frequent this merger will output a new CAS. If no value or (0) is specified, it |
| will only ouput a CAS at the end. |
| For example: if there are a total of 5 inputs and OutputFrequency is set to 2, this merger will |
| produce 3 CASes: (1 + 2), (3 + 4), (5) |
| </description> |
| <type>Integer</type> |
| <multiValued>false</multiValued> |
| <mandatory>false</mandatory> |
| </configurationParameter> |
| </configurationParameters> |
| <configurationParameterSettings> |
| <nameValuePair> |
| <name>AnnotationTypesToCopy</name> |
| <value> |
| <array> |
| <string>uima.tt.TokenAnnotation</string> |
| </array> |
| </value> |
| </nameValuePair> |
| </configurationParameterSettings> |
| |
| <typeSystemDescription> |
| <imports> |
| <import location="tt_typesystem.xml"/> |
| </imports> |
| </typeSystemDescription> |
| |
| <capabilities> |
| <capability> |
| <inputs> |
| <type>uima.tt.SourceDocumentInformation</type> |
| <feature>uima.tt.SourceDocumentInformation:lastSegment</feature> |
| </inputs> |
| <outputs/> |
| </capability> |
| </capabilities> |
| |
| <operationalProperties> |
| <modifiesCas>false</modifiesCas> |
| <multipleDeploymentAllowed>true</multipleDeploymentAllowed> |
| <outputsNewCASes>true</outputsNewCASes> |
| </operationalProperties> |
| |
| </analysisEngineMetaData> |
| </analysisEngineDescription> |