| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.uima.examples.cpe; |
| |
| import java.io.IOException; |
| import java.util.List; |
| |
| import org.apache.uima.UIMAException; |
| import org.apache.uima.UIMAFramework; |
| import org.apache.uima.analysis_engine.AnalysisEngine; |
| import org.apache.uima.cas.CAS; |
| import org.apache.uima.collection.CasConsumer; |
| import org.apache.uima.collection.CollectionProcessingManager; |
| import org.apache.uima.collection.CollectionReader; |
| import org.apache.uima.collection.EntityProcessStatus; |
| import org.apache.uima.collection.StatusCallbackListener; |
| import org.apache.uima.resource.ResourceSpecifier; |
| import org.apache.uima.util.XMLInputSource; |
| |
| /** |
| * Main Class that runs the Collection Processing Manager (CPM). This class reads descriptor files |
| * and initiailizes the following components: |
| * <ol> |
| * <li> CollectionReader </li> |
| * <li> Analysis Engine </li> |
| * <li> CAS Consumer </li> |
| * </ol> |
| * <br> |
| * It also registers a callback listener with the CPM, which will print progress and statistics to |
| * System.out. <br> |
| * Command lines arguments for the run are : |
| * <ol> |
| * <li> args[0] : CollectionReader descriptor file </li> |
| * <li> args[1] : CAS Consumer descriptor file. </li> |
| * <li> args[2] : AnnotationPrinter descriptor file </li> |
| * </ol> |
| * <br> |
| * Example : <br> |
| * java -cp < all jar files needed > org.apache.uima.example.cpe.SimpleRunCPE |
| * descriptors/collection_reader/FileSystemCollectionReader.xml |
| * descriptors/analysis_engine/PersonTitleAnnotator.xml |
| * descriptors/cas_consumer/XmiWrtierCasConsumer.xml |
| * |
| */ |
| public class SimpleRunCPM extends Thread { |
| /** |
| * The Collection Processing Manager instance that coordinates the processing. |
| */ |
| private CollectionProcessingManager mCPM; |
| |
| /** |
| * Start time of the processing - used to compute elapsed time. |
| */ |
| private long mStartTime; |
| |
| /** |
| * Constructor for the class. |
| * |
| * @param args |
| * command line arguments into the program - see class description |
| */ |
| public SimpleRunCPM(String args[]) throws UIMAException, IOException { |
| mStartTime = System.currentTimeMillis(); |
| |
| // check command line args |
| if (args.length < 3) { |
| printUsageMessage(); |
| System.exit(1); |
| } |
| |
| // create components from their descriptors |
| |
| // Collection Reader |
| System.out.println("Initializing Collection Reader"); |
| ResourceSpecifier colReaderSpecifier = UIMAFramework.getXMLParser() |
| .parseCollectionReaderDescription(new XMLInputSource(args[0])); |
| CollectionReader collectionReader = UIMAFramework.produceCollectionReader(colReaderSpecifier); |
| |
| // AnalysisEngine |
| System.out.println("Initializing AnalysisEngine"); |
| ResourceSpecifier aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier( |
| new XMLInputSource(args[1])); |
| AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(aeSpecifier); |
| |
| // CAS Consumer |
| System.out.println("Initializing CAS Consumer"); |
| ResourceSpecifier consumerSpecifier = UIMAFramework.getXMLParser().parseCasConsumerDescription( |
| new XMLInputSource(args[2])); |
| CasConsumer casConsumer = UIMAFramework.produceCasConsumer(consumerSpecifier); |
| |
| // create a new Collection Processing Manager |
| mCPM = UIMAFramework.newCollectionProcessingManager(); |
| |
| // Register AE and CAS Consumer with the CPM |
| mCPM.setAnalysisEngine(ae); |
| mCPM.addCasConsumer(casConsumer); |
| |
| // Create and register a Status Callback Listener |
| mCPM.addStatusCallbackListener(new StatusCallbackListenerImpl()); |
| |
| // Finish setup |
| mCPM.setPauseOnException(false); |
| |
| // Start Processing (in batches of 10, just for testing purposes) |
| mCPM.process(collectionReader, 10); |
| } |
| |
| /** |
| * |
| */ |
| private static void printUsageMessage() { |
| System.out.println(" Arguments to the program are as follows : \n" |
| + "args[0] : Collection Reader descriptor file \n " |
| + "args[1] : Analysis Engine descriptor file. \n" |
| + "args[2] : CAS Consumer descriptor file"); |
| } |
| |
| /** |
| * main class. |
| * |
| * @param args |
| * Command line arguments - see class description |
| */ |
| public static void main(String[] args) throws UIMAException, IOException { |
| new SimpleRunCPM(args); |
| } |
| |
| /** |
| * Callback Listener. Receives event notifications from CPM. |
| * |
| * |
| */ |
| class StatusCallbackListenerImpl implements StatusCallbackListener { |
| int entityCount = 0; |
| |
| long size = 0; |
| |
| /** |
| * Called when the initialization is completed. |
| * |
| * @see org.apache.uima.collection.processing.StatusCallbackListener#initializationComplete() |
| */ |
| public void initializationComplete() { |
| System.out.println("CPM Initialization Complete"); |
| } |
| |
| /** |
| * Called when the batchProcessing is completed. |
| * |
| * @see org.apache.uima.collection.processing.StatusCallbackListener#batchProcessComplete() |
| * |
| */ |
| public void batchProcessComplete() { |
| System.out.print("Completed " + entityCount + " documents"); |
| if (size > 0) { |
| System.out.print("; " + size + " characters"); |
| } |
| System.out.println(); |
| long elapsedTime = System.currentTimeMillis() - mStartTime; |
| System.out.println("Time Elapsed : " + elapsedTime + " ms "); |
| } |
| |
| /** |
| * Called when the collection processing is completed. |
| * |
| * @see org.apache.uima.collection.processing.StatusCallbackListener#collectionProcessComplete() |
| */ |
| public void collectionProcessComplete() { |
| System.out.print("Completed " + entityCount + " documents"); |
| if (size > 0) { |
| System.out.print("; " + size + " characters"); |
| } |
| System.out.println(); |
| long elapsedTime = System.currentTimeMillis() - mStartTime; |
| System.out.println("Time Elapsed : " + elapsedTime + " ms "); |
| System.out.println("\n\n ------------------ PERFORMANCE REPORT ------------------\n"); |
| System.out.println(mCPM.getPerformanceReport().toString()); |
| } |
| |
| /** |
| * Called when the CPM is paused. |
| * |
| * @see org.apache.uima.collection.processing.StatusCallbackListener#paused() |
| */ |
| public void paused() { |
| System.out.println("Paused"); |
| } |
| |
| /** |
| * Called when the CPM is resumed after a pause. |
| * |
| * @see org.apache.uima.collection.processing.StatusCallbackListener#resumed() |
| */ |
| public void resumed() { |
| System.out.println("Resumed"); |
| } |
| |
| /** |
| * Called when the CPM is stopped abruptly due to errors. |
| * |
| * @see org.apache.uima.collection.processing.StatusCallbackListener#aborted() |
| */ |
| public void aborted() { |
| System.out.println("Aborted"); |
| } |
| |
| /** |
| * Called when the processing of a Document is completed. <br> |
| * The process status can be looked at and corresponding actions taken. |
| * |
| * @param aCas |
| * CAS corresponding to the completed processing |
| * @param aStatus |
| * EntityProcessStatus that holds the status of all the events for aEntity |
| */ |
| public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) { |
| if (aStatus.isException()) { |
| List exceptions = aStatus.getExceptions(); |
| for (int i = 0; i < exceptions.size(); i++) { |
| ((Throwable) exceptions.get(i)).printStackTrace(); |
| } |
| return; |
| } |
| entityCount++; |
| String docText = aCas.getDocumentText(); |
| if (docText != null) { |
| size += docText.length(); |
| } |
| } |
| } |
| |
| } |