blob: cdb7eea7451c6331bc435fb2d50260d3acedb68b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.examples;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.PrintStream;
import java.util.Iterator;
import java.util.List;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.FloatArrayFS;
import org.apache.uima.cas.IntArrayFS;
import org.apache.uima.cas.StringArrayFS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.XMLInputSource;
/**
* A simple example of how to extract information from the CAS. This example retrieves all
* annotations of a specified type from a CAS and prints them (along with all of their features) to
* a PrintStream.
*
*
*/
public class PrintAnnotations {
/**
* Prints all Annotations to a PrintStream.
*
* @param aCAS
* the CAS containing the FeatureStructures to print
* @param aOut
* the PrintStream to which output will be written
*/
public static void printAnnotations(CAS aCAS, PrintStream aOut) {
// get iterator over annotations
FSIterator iter = aCAS.getAnnotationIndex().iterator();
// iterate
while (iter.isValid()) {
FeatureStructure fs = iter.get();
printFS(fs, aCAS, 0, aOut);
iter.moveToNext();
}
}
/**
* Prints all Annotations of a specified Type to a PrintStream.
*
* @param aCAS
* the CAS containing the FeatureStructures to print
* @param aAnnotType
* the Type of Annotation to be printed
* @param aOut
* the PrintStream to which output will be written
*/
public static void printAnnotations(CAS aCAS, Type aAnnotType, PrintStream aOut) {
// get iterator over annotations
FSIterator iter = aCAS.getAnnotationIndex(aAnnotType).iterator();
// iterate
while (iter.isValid()) {
FeatureStructure fs = iter.get();
printFS(fs, aCAS, 0, aOut);
iter.moveToNext();
}
}
/**
* Prints a FeatureStructure to a PrintStream.
*
* @param aFS
* the FeatureStructure to print
* @param aCAS
* the CAS containing the FeatureStructure
* @param aNestingLevel
* number of tabs to print before each line
* @param aOut
* the PrintStream to which output will be written
*/
public static void printFS(FeatureStructure aFS, CAS aCAS, int aNestingLevel, PrintStream aOut) {
Type stringType = aCAS.getTypeSystem().getType(CAS.TYPE_NAME_STRING);
printTabs(aNestingLevel, aOut);
aOut.println(aFS.getType().getName());
// if it's an annotation, print the first 64 chars of its covered text
if (aFS instanceof AnnotationFS) {
AnnotationFS annot = (AnnotationFS) aFS;
String coveredText = annot.getCoveredText();
printTabs(aNestingLevel + 1, aOut);
aOut.print("\"");
if (coveredText.length() <= 64) {
aOut.print(coveredText);
} else {
aOut.println(coveredText.substring(0, 64) + "...");
}
aOut.println("\"");
}
// print all features
List aFeatures = aFS.getType().getFeatures();
Iterator iter = aFeatures.iterator();
while (iter.hasNext()) {
Feature feat = (Feature) iter.next();
printTabs(aNestingLevel + 1, aOut);
// print feature name
aOut.print(feat.getShortName());
aOut.print(" = ");
// prnt feature value (how we get this depends on feature's range type)
String rangeTypeName = feat.getRange().getName();
if (aCAS.getTypeSystem().subsumes(stringType, feat.getRange())) // must check for subtypes of
// string
{
String str = aFS.getStringValue(feat);
if (str == null) {
aOut.println("null");
} else {
aOut.print("\"");
if (str.length() > 64) {
str = str.substring(0, 64) + "...";
}
aOut.print(str);
aOut.println("\"");
}
} else if (CAS.TYPE_NAME_INTEGER.equals(rangeTypeName)) {
aOut.println(aFS.getIntValue(feat));
} else if (CAS.TYPE_NAME_FLOAT.equals(rangeTypeName)) {
aOut.println(aFS.getFloatValue(feat));
} else if (CAS.TYPE_NAME_STRING_ARRAY.equals(rangeTypeName)) {
StringArrayFS arrayFS = (StringArrayFS) aFS.getFeatureValue(feat);
if (arrayFS == null) {
aOut.println("null");
} else {
String[] vals = arrayFS.toArray();
aOut.print("[");
for (int i = 0; i < vals.length - 1; i++) {
aOut.print(vals[i]);
aOut.print(',');
}
if (vals.length > 0) {
aOut.print(vals[vals.length - 1]);
}
aOut.println("]\"");
}
} else if (CAS.TYPE_NAME_INTEGER_ARRAY.equals(rangeTypeName)) {
IntArrayFS arrayFS = (IntArrayFS) aFS.getFeatureValue(feat);
if (arrayFS == null) {
aOut.println("null");
} else {
int[] vals = arrayFS.toArray();
aOut.print("[");
for (int i = 0; i < vals.length - 1; i++) {
aOut.print(vals[i]);
aOut.print(',');
}
if (vals.length > 0) {
aOut.print(vals[vals.length - 1]);
}
aOut.println("]\"");
}
} else if (CAS.TYPE_NAME_FLOAT_ARRAY.equals(rangeTypeName)) {
FloatArrayFS arrayFS = (FloatArrayFS) aFS.getFeatureValue(feat);
if (arrayFS == null) {
aOut.println("null");
} else {
float[] vals = arrayFS.toArray();
aOut.print("[");
for (int i = 0; i < vals.length - 1; i++) {
aOut.print(vals[i]);
aOut.print(',');
}
if (vals.length > 0) {
aOut.print(vals[vals.length - 1]);
}
aOut.println("]\"");
}
} else // non-primitive type
{
FeatureStructure val = aFS.getFeatureValue(feat);
if (val == null) {
aOut.println("null");
} else {
printFS(val, aCAS, aNestingLevel + 1, aOut);
}
}
}
}
/**
* Prints tabs to a PrintStream.
*
* @param aNumTabs
* number of tabs to print
* @param aOut
* the PrintStream to which output will be written
*/
private static void printTabs(int aNumTabs, PrintStream aOut) {
for (int i = 0; i < aNumTabs; i++) {
aOut.print("\t");
}
}
/**
* Main program for testing this class. Ther are two required arguments - the path to the XML
* descriptor for the TAE to run and an input file. Additional arguments are Type or Feature names
* to be included in the ResultSpecification passed to the TAE.
*/
public static void main(String[] args) {
try {
File taeDescriptor = new File(args[0]);
File inputFile = new File(args[1]);
// get Resource Specifier from XML file or TEAR
XMLInputSource in = new XMLInputSource(taeDescriptor);
ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
// create Analysis Engine
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
// create a CAS
CAS cas = ae.newCAS();
// build ResultSpec if Type and Feature names were specified on commandline
ResultSpecification resultSpec = null;
if (args.length > 2) {
resultSpec = ae.createResultSpecification();
for (int i = 2; i < args.length; i++) {
if (args[i].indexOf(':') > 0) // feature name
{
resultSpec.addResultFeature(args[i]);
} else {
resultSpec.addResultType(args[i], false);
}
}
}
// read contents of file
String document = FileUtils.file2String(inputFile);
// send doc through the AE
cas.setDocumentText(document);
ae.process(cas, resultSpec);
// print results
Type annotationType = cas.getTypeSystem().getType(CAS.TYPE_NAME_ANNOTATION);
PrintAnnotations.printAnnotations(cas, annotationType, System.out);
// destroy AE
ae.destroy();
} catch (Exception e) {
e.printStackTrace();
}
}
}