<?xml version="1.0" encoding="UTF-8"?> | |
<!-- | |
*************************************************************** | |
* Licensed to the Apache Software Foundation (ASF) under one | |
* or more contributor license agreements. See the NOTICE file | |
* distributed with this work for additional information | |
* regarding copyright ownership. The ASF licenses this file | |
* to you under the Apache License, Version 2.0 (the | |
* "License"); you may not use this file except in compliance | |
* with the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, | |
* software distributed under the License is distributed on an | |
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
* KIND, either express or implied. See the License for the | |
* specific language governing permissions and limitations | |
* under the License. | |
*************************************************************** | |
--> | |
<!-- A simple collection reader that reads text files from a directory in the | |
filesystem. The Java class for this collection reader is in uima_core.jar. --> | |
<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier"> | |
<frameworkImplementation>org.apache.uima.java</frameworkImplementation> | |
<implementationName>org.apache.uima.examples.cpe.FileSystemCollectionReader</implementationName> | |
<processingResourceMetaData> | |
<name>File System Collection Reader</name> | |
<description>Reads files from the filesystem. This CollectionReader may be used | |
with or without a CAS Initializer. If a CAS Initializer is supplied, it will | |
be passed an InputStream to the file and must populate the CAS from that | |
InputStream. If no CAS Initializer is supplied, this CollectionReader will | |
read the file itself and set treat the entire contents of the file as the | |
document to be inserted into the CAS.</description> | |
<version>1.0</version> | |
<vendor>The Apache Software Foundation</vendor> | |
<configurationParameters> | |
<configurationParameter> | |
<name>InputDirectory</name> | |
<description>Directory containing input files</description> | |
<type>String</type> | |
<multiValued>false</multiValued> | |
<mandatory>true</mandatory> | |
</configurationParameter> | |
<configurationParameter> | |
<name>Encoding</name> | |
<description>Character encoding for the documents. If not specified, | |
the default system encoding will be used. Note that this parameter | |
only applies if there is no CAS Initializer provided; otherwise, | |
it is the CAS Initializer's responsibility to deal with character | |
encoding issues. </description> | |
<type>String</type> | |
<multiValued>false</multiValued> | |
<mandatory>false</mandatory> | |
</configurationParameter> | |
<configurationParameter> | |
<name>Language</name> | |
<description>ISO language code for the documents</description> | |
<type>String</type> | |
<multiValued>false</multiValued> | |
<mandatory>false</mandatory> | |
</configurationParameter> | |
</configurationParameters> | |
<configurationParameterSettings> | |
<nameValuePair> | |
<name>InputDirectory</name> | |
<value> | |
<string>C:/Program Files/apache-uima-as/examples/data</string> | |
</value> | |
</nameValuePair> | |
<nameValuePair> | |
<name>Language</name> | |
<value> | |
<string>en</string> | |
</value> | |
</nameValuePair> | |
</configurationParameterSettings> | |
<!-- Type System of CASes returned by this Collection Reader --> | |
<typeSystemDescription> | |
<imports> | |
<import location="SourceDocumentInformation.xml"/> | |
</imports> | |
</typeSystemDescription> | |
<capabilities> | |
<capability> | |
<inputs/> | |
<outputs> | |
<type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type> | |
</outputs> | |
</capability> | |
</capabilities> | |
<operationalProperties> | |
<modifiesCas>true</modifiesCas> | |
<multipleDeploymentAllowed>false</multipleDeploymentAllowed> | |
<outputsNewCASes>true</outputsNewCASes> | |
</operationalProperties> | |
</processingResourceMetaData> | |
</collectionReaderDescription> |