blob: 307968e53ee5b0f99703a6fa6160ac39ec4fd363 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
<!--
***************************************************************
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
***************************************************************
-->
<!-- A simple collection reader that reads text files from a directory in the
filesystem. The Java class for this collection reader is in uima_core.jar. -->
<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<implementationName>org.apache.uima.examples.cpe.FileSystemCollectionReader</implementationName>
<processingResourceMetaData>
<name>File System Collection Reader</name>
<description>Reads files from the filesystem. This CollectionReader may be used
with or without a CAS Initializer. If a CAS Initializer is supplied, it will
be passed an InputStream to the file and must populate the CAS from that
InputStream. If no CAS Initializer is supplied, this CollectionReader will
read the file itself and set treat the entire contents of the file as the
document to be inserted into the CAS.</description>
<version>1.0</version>
<vendor>The Apache Software Foundation</vendor>
<configurationParameters>
<configurationParameter>
<name>InputDirectory</name>
<description>Directory containing input files</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>Encoding</name>
<description>Character encoding for the documents. If not specified,
the default system encoding will be used. Note that this parameter
only applies if there is no CAS Initializer provided; otherwise,
it is the CAS Initializer's responsibility to deal with character
encoding issues. </description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>Language</name>
<description>ISO language code for the documents</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>InputDirectory</name>
<value>
<string>C:/Program Files/apache-uima/examples/data</string>
</value>
</nameValuePair>
</configurationParameterSettings>
<!-- Type System of CASes returned by this Collection Reader -->
<typeSystemDescription>
<imports>
<import location="SourceDocumentInformation.xml"/>
</imports>
</typeSystemDescription>
<capabilities>
<capability>
<inputs/>
<outputs>
<type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
</outputs>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>false</multipleDeploymentAllowed>
<outputsNewCASes>true</outputsNewCASes>
</operationalProperties>
</processingResourceMetaData>
</collectionReaderDescription>