blob: 4fc71468e5b4d8550fa66fd23c810e1b2b1099c6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.examples.cpe;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Iterator;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CasConsumer_ImplBase;
import org.apache.uima.collection.base_cpm.CasObjectProcessor;
import org.apache.uima.examples.SourceDocumentInformation;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceConfigurationException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
import org.apache.uima.util.ProcessTrace;
/**
* An example of CAS Consumer. <br>
* AnnotationPrinter prints to an output file all annotations in the CAS. <br>
* Parameters needed by the AnnotationPrinter are
* <ol>
* <li> "outputFile" : file to which the output files should be written.</li>
* </ol>
* <br>
* These parameters are set in the initialize method to the values specified in the descriptor file.
* <br>
* These may also be set by the application by using the setConfigParameterValue methods.
*
*
*/
public class AnnotationPrinter extends CasConsumer_ImplBase implements CasObjectProcessor {
File outFile;
FileWriter fileWriter;
public AnnotationPrinter() {
}
/**
* Initializes this CAS Consumer with the parameters specified in the descriptor.
*
* @throws ResourceInitializationException
* if there is error in initializing the resources
*/
public void initialize() throws ResourceInitializationException {
// extract configuration parameter settings
String oPath = (String) getUimaContext().getConfigParameterValue("outputFile");
// Output file should be specified in the descriptor
if (oPath == null) {
throw new ResourceInitializationException(
ResourceInitializationException.CONFIG_SETTING_ABSENT, new Object[] { "outputFile" });
}
// If specified output directory does not exist, try to create it
outFile = new File(oPath.trim());
if (outFile.getParentFile() != null && !outFile.getParentFile().exists()) {
if (!outFile.getParentFile().mkdirs())
throw new ResourceInitializationException(
ResourceInitializationException.RESOURCE_DATA_NOT_VALID, new Object[] { oPath,
"outputFile" });
}
try {
fileWriter = new FileWriter(outFile);
} catch (IOException e) {
throw new ResourceInitializationException(e);
}
}
/**
* Processes the CasContainer which was populated by the TextAnalysisEngines. <br>
* In this case, the CAS index is iterated over selected annotations and printed out into an
* output file
*
* @param aCAS
* CasContainer which has been populated by the TAEs
*
* @throws ResourceProcessException
* if there is an error in processing the Resource
*
* @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(CAS)
*/
public synchronized void processCas(CAS aCAS) throws ResourceProcessException {
JCas jcas;
try {
jcas = aCAS.getJCas();
} catch (CASException e) {
throw new ResourceProcessException(e);
}
boolean titleP = false;
String docUri = null;
Iterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
if (it.hasNext()) {
SourceDocumentInformation srcDocInfo = (SourceDocumentInformation) it.next();
docUri = srcDocInfo.getUri();
}
// iterate and print annotations
Iterator annotationIter = jcas.getAnnotationIndex().iterator();
while (annotationIter.hasNext()) {
Annotation annot = (Annotation) annotationIter.next();
if (titleP == false) {
try {
fileWriter.write("\n\n<++++NEW DOCUMENT++++>\n");
if (docUri != null)
fileWriter.write("DOCUMENT URI:" + docUri + "\n");
fileWriter.write("\n");
} catch (IOException e) {
throw new ResourceProcessException(e);
}
titleP = true;
}
// get the text that is enclosed within the annotation in the CAS
String aText = annot.getCoveredText();
aText = aText.replace('\n', ' ');
aText = aText.replace('\r', ' ');
// System.out.println( annot.getType().getName() + " "+aText);
try {
fileWriter.write(annot.getType().getName() + " " + aText + "\n");
fileWriter.flush();
} catch (IOException e) {
throw new ResourceProcessException(e);
}
}
}
/**
* Called when a batch of processing is completed.
*
* @param aTrace
* ProcessTrace object that will log events in this method.
* @throws ResourceProcessException
* if there is an error in processing the Resource
* @throws IOException
* if there is an IO Error
*
* @see org.apache.uima.collection.CasConsumer#batchProcessComplete(ProcessTrace)
*/
public void batchProcessComplete(ProcessTrace aTrace) throws ResourceProcessException,
IOException {
// nothing to do in this case as AnnotationPrinter doesnot do
// anything cumulatively
}
/**
* Called when the entire collection is completed.
*
* @param aTrace
* ProcessTrace object that will log events in this method.
* @throws ResourceProcessException
* if there is an error in processing the Resource
* @throws IOException
* if there is an IO Error
* @see org.apache.uima.collection.CasConsumer#collectionProcessComplete(ProcessTrace)
*/
public void collectionProcessComplete(ProcessTrace aTrace) throws ResourceProcessException,
IOException {
if (fileWriter != null) {
fileWriter.close();
}
}
/**
* Reconfigures the parameters of this Consumer. <br>
* This is used in conjunction with the setConfigurationParameterValue to set the configuration
* parameter values to values other than the ones specified in the descriptor.
*
* @throws ResourceConfigurationException
* if the configuration parameter settings are invalid
*
* @see org.apache.uima.resource.ConfigurableResource#reconfigure()
*/
public void reconfigure() throws ResourceConfigurationException {
super.reconfigure();
// extract configuration parameter settings
String oPath = (String) getUimaContext().getConfigParameterValue("outputFile");
File oFile = new File(oPath.trim());
// if output file has changed, close exiting file and open new
if (!oFile.equals(this.outFile)) {
this.outFile = oFile;
try {
fileWriter.close();
// If specified output directory does not exist, try to create it
if (oFile.getParentFile() != null && !oFile.getParentFile().exists()) {
if (!oFile.getParentFile().mkdirs())
throw new ResourceConfigurationException(
ResourceInitializationException.RESOURCE_DATA_NOT_VALID, new Object[] { oPath,
"outputFile" });
}
fileWriter = new FileWriter(oFile);
} catch (IOException e) {
throw new ResourceConfigurationException();
}
}
}
/**
* Called if clean up is needed in case of exit under error conditions.
*
* @see org.apache.uima.resource.Resource#destroy()
*/
public void destroy() {
if (fileWriter != null) {
try {
fileWriter.close();
} catch (IOException e) {
// ignore IOException on destroy
}
}
}
}