| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.uima.examples.cpe; |
| |
| import java.io.File; |
| import java.io.FileOutputStream; |
| import java.io.IOException; |
| import java.net.MalformedURLException; |
| import java.net.URL; |
| |
| import org.apache.uima.cas.CAS; |
| import org.apache.uima.cas.CASException; |
| import org.apache.uima.cas.FSIterator; |
| import org.apache.uima.collection.CasConsumer_ImplBase; |
| import org.apache.uima.examples.SourceDocumentInformation; |
| import org.apache.uima.jcas.JCas; |
| import org.apache.uima.resource.ResourceInitializationException; |
| import org.apache.uima.resource.ResourceProcessException; |
| import org.apache.uima.util.CasToInlineXml; |
| |
| /** |
| * A simple CAS consumer that generates inline XML and writes it to a file. UTF-8 encoding is used. |
| * <p> |
| * This CAS Consumer takes one parameter: |
| * <ul> |
| * <li><code>OutputDirectory</code> - path to directory into which output files will be written</li> |
| * </ul> |
| * |
| * |
| */ |
| public class InlineXmlCasConsumer extends CasConsumer_ImplBase { |
| /** |
| * Name of configuration parameter that must be set to the path of a directory into which the |
| * output files will be written. |
| */ |
| public static final String PARAM_OUTPUTDIR = "OutputDirectory"; |
| |
| private File mOutputDir; |
| |
| private CasToInlineXml cas2xml; |
| |
| private int mDocNum; |
| |
| public void initialize() throws ResourceInitializationException { |
| mDocNum = 0; |
| mOutputDir = new File(((String) getConfigParameterValue(PARAM_OUTPUTDIR)).trim()); |
| if (!mOutputDir.exists()) { |
| mOutputDir.mkdirs(); |
| } |
| cas2xml = new CasToInlineXml(); |
| } |
| |
| /** |
| * Processes the CasContainer which was populated by the TextAnalysisEngines. <br> |
| * In this case, the CAS is converted to XML and written into the output file . |
| * |
| * @param aCAS |
| * CasContainer which has been populated by the TAEs |
| * |
| * @throws ResourceProcessException |
| * if there is an error in processing the Resource |
| * |
| * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) |
| */ |
| public void processCas(CAS aCAS) throws ResourceProcessException { |
| JCas jcas; |
| try { |
| jcas = aCAS.getJCas(); |
| } catch (CASException e) { |
| throw new ResourceProcessException(e); |
| } |
| |
| // retreive the filename of the input file from the CAS |
| FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); |
| File outFile = null; |
| if (it.hasNext()) { |
| SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next(); |
| File inFile; |
| try { |
| inFile = new File(new URL(fileLoc.getUri()).getPath()); |
| outFile = new File(mOutputDir, inFile.getName()); |
| } catch (MalformedURLException e1) { |
| // invalid URL, use default processing below |
| } |
| } |
| if (outFile == null) { |
| outFile = new File(mOutputDir, "doc" + mDocNum++); |
| } |
| // convert CAS to xml format and write to output file in UTF-8 |
| try { |
| String xmlAnnotations = cas2xml.generateXML(aCAS); |
| FileOutputStream outStream = new FileOutputStream(outFile); |
| outStream.write(xmlAnnotations.getBytes("UTF-8")); |
| outStream.close(); |
| } catch (CASException e) { |
| throw new ResourceProcessException(e); |
| } catch (IOException e) { |
| throw new ResourceProcessException(e); |
| } |
| } |
| |
| } |