| <?xml version="1.0" encoding="UTF-8"?> |
| |
| <!-- |
| *************************************************************** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| *************************************************************** |
| --> |
| |
| <!-- A simple collection reader that reads text files from a directory in the |
| filesystem. The Java class for this collection reader is in uima_core.jar. --> |
| |
| <collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier"> |
| <frameworkImplementation>org.apache.uima.java</frameworkImplementation> |
| <implementationName>org.apache.uima.examples.cpe.FileSystemCollectionReader</implementationName> |
| <processingResourceMetaData> |
| <name>File System Collection Reader</name> |
| <description>Reads files from the filesystem. This CollectionReader may be used |
| with or without a CAS Initializer. If a CAS Initializer is supplied, it will |
| be passed an InputStream to the file and must populate the CAS from that |
| InputStream. If no CAS Initializer is supplied, this CollectionReader will |
| read the file itself and set treat the entire contents of the file as the |
| document to be inserted into the CAS.</description> |
| <version>1.0</version> |
| <vendor>The Apache Software Foundation</vendor> |
| <configurationParameters> |
| <configurationParameter> |
| <name>InputDirectory</name> |
| <description>Directory containing input files</description> |
| <type>String</type> |
| <multiValued>false</multiValued> |
| <mandatory>true</mandatory> |
| </configurationParameter> |
| <configurationParameter> |
| <name>Encoding</name> |
| <description>Character encoding for the documents. If not specified, |
| the default system encoding will be used. Note that this parameter |
| only applies if there is no CAS Initializer provided; otherwise, |
| it is the CAS Initializer's responsibility to deal with character |
| encoding issues. </description> |
| <type>String</type> |
| <multiValued>false</multiValued> |
| <mandatory>false</mandatory> |
| </configurationParameter> |
| <configurationParameter> |
| <name>Language</name> |
| <description>ISO language code for the documents</description> |
| <type>String</type> |
| <multiValued>false</multiValued> |
| <mandatory>false</mandatory> |
| </configurationParameter> |
| </configurationParameters> |
| <configurationParameterSettings> |
| <nameValuePair> |
| <name>InputDirectory</name> |
| <value> |
| <string>src/test/resources/data</string> |
| </value> |
| </nameValuePair> |
| <nameValuePair> |
| <name>Language</name> |
| <value> |
| <string>en</string> |
| </value> |
| </nameValuePair> |
| </configurationParameterSettings> |
| |
| <!-- Type System of CASes returned by this Collection Reader --> |
| |
| <typeSystemDescription> |
| <imports> |
| <import name="org.apache.uima.examples.SourceDocumentInformation"/> |
| </imports> |
| </typeSystemDescription> |
| |
| <capabilities> |
| <capability> |
| <inputs/> |
| <outputs> |
| <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type> |
| </outputs> |
| </capability> |
| </capabilities> |
| <operationalProperties> |
| <modifiesCas>true</modifiesCas> |
| <multipleDeploymentAllowed>false</multipleDeploymentAllowed> |
| <outputsNewCASes>true</outputsNewCASes> |
| </operationalProperties> |
| </processingResourceMetaData> |
| </collectionReaderDescription> |