blob: 14cc9b245748231b1549ef394e282f5258a55657 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.examples.casMultiplier;
import java.io.File;
import java.io.PrintStream;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.CasIterator;
import org.apache.uima.cas.CAS;
import org.apache.uima.examples.PrintAnnotations;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.XMLInputSource;
/**
* An example application that shows how to interact with a CasMultiplier. A CasMultiplier is a type
* of Analysis Engine that outputs new CASes. One use of a CasMultiplier is to divide a large CAS
* into smaller pieces - a CasMultiplier that does this is called a "Segmenter".
* <p>
* This program takes two arguments -
* <ul>
* <li>The path to the Analysis Engine Descriptor for the CasMultiplier to run (such as
* descriptors/cas_multiplier/SimpleTextSegmenter.xml or
* descriptors/cas_multiplier/SegmenterAndTokenizerAE.xml)</li>
* <li>The file name of a text document to analyze (to see the effect of segmentation, choose a
* document larger than 100k characters, which is the default segment size produced by the
* SimpleTextSegmenter.</li>
* </ul>
*/
public class CasMultiplierExampleApplication {
static PrintStream outputStream;
/**
* Main program.
*
* @param args
* Command-line arguments - see class description
*/
public static void main(String[] args) {
try {
// get Resource Specifier from XML file
XMLInputSource in = new XMLInputSource(args[0]);
ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
// create AnalysisEngine
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
// read input text file
File textFile = new File(args[1]);
String document = FileUtils.file2String(textFile, "UTF-8");
// create a new CAS and set the document text
CAS initialCas = ae.newCAS();
initialCas.setDocumentText(document);
// pass the CAS to the AnalysisEngine and get back
// a CasIterator for stepping over the output CASes that are produced.
CasIterator casIterator = ae.processAndOutputNewCASes(initialCas);
while (casIterator.hasNext()) {
CAS outCas = casIterator.next();
// dump the document text and annotations for this segment
System.out.println("********* NEW SEGMENT *********");
System.out.println(outCas.getDocumentText());
PrintAnnotations.printAnnotations(outCas, System.out);
// release the CAS (important)
outCas.release();
}
// If there's a CAS Consumer inside this aggregate and we want
// it's collectionProcessComplete method to be called, we need to
// call it ourselves. If run inside a CPE this would get called
// automatically.
ae.collectionProcessComplete();
} catch (Exception e) {
e.printStackTrace();
}
}
}