| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.uima.test.junit_extension; |
| |
| import java.io.File; |
| import java.io.FileOutputStream; |
| import java.io.IOException; |
| import java.io.OutputStreamWriter; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.HashMap; |
| import java.util.Iterator; |
| |
| import org.apache.uima.cas.CAS; |
| import org.apache.uima.cas.FSIterator; |
| import org.apache.uima.cas.Feature; |
| import org.apache.uima.cas.FeatureStructure; |
| import org.apache.uima.cas.Type; |
| import org.apache.uima.cas.impl.LowLevelCAS; |
| import org.apache.uima.cas.impl.LowLevelTypeSystem; |
| import org.apache.uima.cas.text.AnnotationFS; |
| import org.apache.uima.collection.CasConsumer; |
| import org.apache.uima.collection.CasConsumer_ImplBase; |
| import org.apache.uima.resource.ResourceConfigurationException; |
| import org.apache.uima.resource.ResourceInitializationException; |
| import org.apache.uima.resource.ResourceProcessException; |
| import org.apache.uima.util.ProcessTrace; |
| |
| |
| /** |
| * The AnnotationWriter class writes specified annotations to an output file. |
| * The encoding of the output file is UTF-8 |
| */ |
| |
| public class AnnotationWriter extends CasConsumer_ImplBase implements CasConsumer |
| { |
| |
| /** The out file. */ |
| //output file |
| private File outFile; |
| |
| /** The file writer. */ |
| //output file writer |
| private OutputStreamWriter fileWriter; |
| |
| /** The tofs. */ |
| //respected annotations |
| private String[] tofs; |
| |
| /** The reconfig. */ |
| //check if reconfigure must be called |
| private boolean reconfig = false; |
| |
| /** The Constant featureOnlyKey. */ |
| private final static String featureOnlyKey = "feature"; |
| |
| /** |
| * Initializes this CAS Consumer with the parameters specified in the |
| * descriptor. |
| * |
| * @throws ResourceInitializationException if there is error in |
| * initializing the resources |
| */ |
| @Override |
| public void initialize() throws ResourceInitializationException |
| { |
| |
| // extract configuration parameter settings |
| String oPath = (String) getUimaContext().getConfigParameterValue("outputFile"); |
| |
| //Output file should be specified in the descriptor |
| if (oPath == null) |
| { |
| //set reconfiguration - reconfig() must be called |
| this.reconfig = true; |
| } |
| else |
| { |
| // If specified output directory does not exist, try to create it |
| this.outFile = new File(oPath); |
| if (this.outFile.getParentFile() != null && !this.outFile.getParentFile().exists()) |
| { |
| if (!this.outFile.getParentFile().mkdirs()) |
| throw new ResourceInitializationException( |
| ResourceInitializationException.RESOURCE_DATA_NOT_VALID, |
| new Object[] { oPath, "outputFile" }); |
| } |
| try |
| { |
| this.fileWriter = new OutputStreamWriter(new FileOutputStream(this.outFile, false), "UTF-8"); |
| } |
| catch (IOException e) |
| { |
| throw new ResourceInitializationException(e); |
| } |
| } |
| |
| //extract annotation types |
| this.tofs = (String[]) getUimaContext().getConfigParameterValue("AnnotationTypes"); |
| //sort array |
| if (this.tofs != null) |
| Arrays.sort(this.tofs); |
| |
| } |
| |
| /** |
| * processTofs() writes als specified types an features to a HashMap. |
| * |
| * @param aCAS a CAS with a TypeSystem |
| * @param someTofs the some tofs |
| * @return HashMap - Map with all types an features. |
| */ |
| private HashMap processTofs(CAS aCAS, String[] someTofs) |
| { |
| HashMap types = new HashMap(10); |
| |
| for (int i = 0; i < someTofs.length; i++) |
| { |
| Type type = aCAS.getTypeSystem().getType(someTofs[i]); |
| |
| if (type == null) //maybe a feature |
| { |
| int index = someTofs[i].indexOf(":"); |
| |
| if (index != -1) |
| { |
| String typename = someTofs[i].substring(0, index); |
| Type typeKey = aCAS.getTypeSystem().getType(typename); |
| |
| //get feature object (Vector) for the current type |
| Object obj = types.get(typeKey); |
| |
| //if type is not included in the typelist create type and add feature |
| if (obj == null) |
| { |
| ArrayList list = new ArrayList(10); |
| Feature fs = aCAS.getTypeSystem().getFeatureByFullName(someTofs[i]); |
| list.add(0, featureOnlyKey); |
| list.add(fs); |
| types.put(typeKey, list); |
| } |
| else //add feature to type |
| { |
| //cast feature vector for the current type |
| ArrayList vec = (ArrayList) obj; |
| Feature fs = aCAS.getTypeSystem().getFeatureByFullName(someTofs[i]); |
| vec.add(fs); |
| } |
| |
| } |
| } |
| else |
| { |
| //add type as key and a Vector as Feature container |
| if(types.containsKey(type)){ |
| ArrayList featureList = (ArrayList) types.get(type); |
| if(featureList.size() >0 && featureList.get(0).equals(featureOnlyKey)){ |
| featureList.remove(0); |
| } |
| // type is already in the list do not overwrite it! |
| }else{ |
| types.put(type, new ArrayList(10)); |
| } |
| } |
| } |
| |
| return types; |
| } |
| |
| /* (non-Javadoc) |
| * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) |
| */ |
| @Override |
| public synchronized void processCas(CAS aCAS) throws ResourceProcessException |
| { |
| if (this.reconfig == true) |
| { |
| throw new ResourceProcessException( |
| ResourceInitializationException.CONFIG_SETTING_ABSENT, |
| new Object[] { "outputFile" }); |
| } |
| |
| //get low level CAS |
| LowLevelCAS ll_cas = aCAS.getLowLevelCAS(); |
| |
| //get low level TypeSystem |
| LowLevelTypeSystem ll_typeSystem = ll_cas.ll_getTypeSystem(); |
| |
| //get types and feature interessted in |
| HashMap types = processTofs(aCAS, this.tofs); |
| |
| try |
| { |
| //iterate and print annotations |
| FSIterator typeIterator = aCAS.getAnnotationIndex().iterator(); |
| |
| for (typeIterator.moveToFirst(); typeIterator.isValid(); typeIterator.moveToNext()) |
| { |
| Iterator it = types.keySet().iterator(); |
| |
| while (it.hasNext()) |
| { |
| //get current type and features |
| Type currentType = (Type) it.next(); |
| boolean isFeatureOnly = false; |
| |
| ArrayList featureList = (ArrayList) types.get(currentType); |
| if(featureList.size() >0 && featureList.get(0).equals(featureOnlyKey)){ |
| featureList.remove(0); |
| isFeatureOnly = true; |
| } |
| Feature[] features = (Feature[]) featureList.toArray(new Feature[] { |
| }); |
| |
| AnnotationFS annot = (AnnotationFS) typeIterator.get(); |
| |
| if (annot.getType().getName() == currentType.getName()) |
| { |
| //only for formatting necessary |
| boolean firstFeature = true; |
| |
| String span = annot.getCoveredText(); |
| if(!isFeatureOnly){ |
| this.fileWriter.write( |
| annot.getType().getShortName() |
| + "(" + annot.getBegin() + "," + annot.getEnd() + "): " + span); |
| }else{ |
| this.fileWriter.write( |
| annot.getType().getShortName() |
| + ": "); |
| } |
| |
| for (int f = 0; f < features.length; f++) |
| { |
| if (firstFeature) |
| { |
| this.fileWriter.write(" { "); |
| firstFeature = false; |
| } |
| else |
| { |
| this.fileWriter.write(", "); |
| } |
| |
| Feature fs = features[f]; |
| int typeClass = ll_cas.ll_getTypeClass(ll_typeSystem.ll_getCodeForType(fs.getRange())); |
| this.fileWriter.write(fs.getShortName() + "="); |
| |
| switch (typeClass) |
| { |
| case LowLevelCAS.TYPE_CLASS_FLOAT : |
| this.fileWriter.write(Float.toString(annot.getFloatValue(fs))); |
| break; |
| |
| case LowLevelCAS.TYPE_CLASS_INT : |
| this.fileWriter.write(Integer.toString(annot.getIntValue(fs))); |
| break; |
| |
| case LowLevelCAS.TYPE_CLASS_STRING : |
| String value = annot.getStringValue(fs); |
| if(value != null) { |
| this.fileWriter.write(value); |
| } else { |
| this.fileWriter.write("null"); |
| } |
| break; |
| |
| case LowLevelCAS.TYPE_CLASS_FS: |
| |
| FeatureStructure fStruct = annot.getFeatureValue(fs); |
| if(fStruct != null) { |
| this.fileWriter.write(fStruct.toString()); |
| } else { |
| this.fileWriter.write("null"); |
| } |
| break; |
| } |
| } |
| |
| if (firstFeature == false) |
| { |
| this.fileWriter.write(" }"); |
| } |
| |
| this.fileWriter.write(System.getProperty("line.separator")); |
| } |
| |
| } |
| } |
| |
| this.fileWriter.flush(); |
| } |
| catch (Exception ex) |
| { |
| throw new ResourceProcessException(ex); |
| } |
| |
| } |
| |
| /* (non-Javadoc) |
| * @see org.apache.uima.collection.CasConsumer_ImplBase#batchProcessComplete(org.apache.uima.util.ProcessTrace) |
| */ |
| @Override |
| public void batchProcessComplete(ProcessTrace aTrace) throws ResourceProcessException, IOException |
| { |
| // nothing to do here |
| } |
| |
| /* (non-Javadoc) |
| * @see org.apache.uima.collection.CasConsumer_ImplBase#collectionProcessComplete(org.apache.uima.util.ProcessTrace) |
| */ |
| @Override |
| public void collectionProcessComplete(ProcessTrace aTrace) throws ResourceProcessException, IOException |
| { |
| if (this.fileWriter != null) |
| { |
| this.fileWriter.close(); |
| } |
| } |
| |
| /* (non-Javadoc) |
| * @see org.apache.uima.collection.CasConsumer_ImplBase#reconfigure() |
| */ |
| @Override |
| public void reconfigure() throws ResourceConfigurationException |
| { |
| //reset reconfiguration - is done |
| this.reconfig = false; |
| |
| super.reconfigure(); |
| // extract configuration parameter settings |
| String oPath = (String) getUimaContext().getConfigParameterValue("outputFile"); |
| File oFile = new File(oPath); |
| //if output file has changed, close exiting file and open new |
| if (!oFile.equals(this.outFile)) |
| { |
| this.outFile = oFile; |
| try |
| { |
| if (this.fileWriter != null) |
| this.fileWriter.close(); |
| |
| // If specified output directory does not exist, try to create it |
| if (oFile.getParentFile() != null && !oFile.getParentFile().exists()) |
| { |
| if (!oFile.getParentFile().mkdirs()) |
| throw new ResourceConfigurationException( |
| ResourceInitializationException.RESOURCE_DATA_NOT_VALID, |
| new Object[] { oPath, "outputFile" }); |
| } |
| //write result specification to the output file |
| this.fileWriter = new OutputStreamWriter(new FileOutputStream(oFile, false), "UTF-8"); |
| } |
| catch (IOException e) |
| { |
| throw new ResourceConfigurationException(); |
| } |
| } |
| |
| //extract annotation types |
| this.tofs = (String[]) getUimaContext().getConfigParameterValue("AnnotationTypes"); |
| //sort array |
| if (this.tofs != null) |
| Arrays.sort(this.tofs); |
| |
| } |
| |
| /* (non-Javadoc) |
| * @see org.apache.uima.collection.CasConsumer_ImplBase#destroy() |
| */ |
| @Override |
| public void destroy() |
| { |
| if (this.fileWriter != null) |
| { |
| try |
| { |
| this.fileWriter.close(); |
| } |
| catch (IOException e) |
| { |
| // ignore IOException on destroy |
| } |
| } |
| } |
| |
| } |