blob: 5558a3392a1640da91c2a014b711ff9775b79b2c [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.apache.uima.collection;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.resource.ResourceConfigurationException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.ProcessTrace;
import org.apache.uima.util.Progress;
* A <code>CollectionProcessingManager</code> (CPM) manages the application of an
* {@link AnalysisEngine} to a collection of artifacts. For text analysis applications, this will be
* a collection of documents. The analysis results will then be delivered to one ore more
* {@link CasConsumer}s.
* <p>
* The CPM is configured with an Analysis Engine and CAS Consumers by calling its
* {@link #setAnalysisEngine(AnalysisEngine)} and {@link #addCasConsumer(CasConsumer)} methods.
* Collection processing is then initiated by calling the {@link #process(CollectionReader)} or
* {@link #process(CollectionReader,int)} methods.
* <p>
* The <code>process</code> methods take a {@link CollectionReader} object as an argument. The
* Collection Reader retreivies each artifact from the collection as a
* {@link org.apache.uima.cas.CAS} object.
* <p>
* Listeners can register with the CPM by calling the
* {@link #addStatusCallbackListener(StatusCallbackListener)} method. These listeners receive status
* callbacks during the processing. At any time, performance and progress reports are available from
* the {@link #getPerformanceReport()} and {@link #getProgress()} methods.
* <p>
* A CPM implementation may choose to implement parallelization of the processing, but this is not a
* requirement of the architecture.
* <p>
* Note that a CPM only supports processing one collection at a time. Attempting to reconfigure a
* CPM or start a new processing job while a previous processing job is occurring will result in a
* {@link org.apache.uima.UIMA_IllegalStateException}. Processing multiple collections
* simultaneously is done by instantiating and configuring multiple instances of the CPM.
* <p>
* A <code>CollectionProcessingManager</code> instance can be obtained by calling
* {@link org.apache.uima.UIMAFramework#newCollectionProcessingManager()}.
public interface CollectionProcessingManager {
* Gets the <code>AnalysisEngine</code> that is assigned to this CPM.
* @return the <code>AnalysisEngine</code> that this CPM will use to analyze each CAS in the
* collection.
public AnalysisEngine getAnalysisEngine();
* Sets the <code>AnalysisEngine</code> that is assigned to this CPM.
* @param aAnalysisEngine
* the <code>AnalysisEngine</code> that this CPM will use to analyze each CAS in the
* collection.
* @throws org.apache.uima.UIMA_IllegalStateException
* if this CPM is currently processing
public void setAnalysisEngine(AnalysisEngine aAnalysisEngine)
throws ResourceConfigurationException;
* Gets the <code>CasConsumers</code>s assigned to this CPM.
* @return an array of <code>CasConsumer</code>s
public CasConsumer[] getCasConsumers();
* Adds a <code>CasConsumer</code> to this CPM.
* @param aCasConsumer
* a <code>CasConsumer</code> to add
* @throws org.apache.uima.UIMA_IllegalStateException
* if this CPM is currently processing
public void addCasConsumer(CasConsumer aCasConsumer) throws ResourceConfigurationException;
* Removes a <code>CasConsumer</code> from this CPM.
* @param aCasConsumer
* the <code>CasConsumer</code> to remove
* @throws org.apache.uima.UIMA_IllegalStateException
* if this CPM is currently processing
public void removeCasConsumer(CasConsumer aCasConsumer);
* Gets whether this CPM is required to process the collection's elements serially (as opposed to
* perfoming parallelization). Note that a value of <code>false</code> does not guarantee that
* parallelization is performed; this is left up to the CPM implementation.
* @return true if and only if serial processing is required
public boolean isSerialProcessingRequired();
* Sets whether this CPM is required to process the collection's elements serially* (as opposed to
* perfoming parallelization). If this method is not called,* the default is <code>false</code>.
* Note that a value of <code>false</code> does not guarantee that parallelization is performed;
* this is left up to the CPM implementation.
* @param aRequired
* true if and only if serial processing is required
* @throws org.apache.uima.UIMA_IllegalStateException
* if this CPM is currently processing
public void setSerialProcessingRequired(boolean aRequired);
* Gets whether this CPM will automatically pause processing if an exception occurs. If processing
* is paused it can be resumed by calling the {@link #resume(boolean)} method.
* @return true if and only if this CPM will pause on exception
public boolean isPauseOnException();
* Sets whether this CPM will automatically pause processing if an exception occurs. If processing
* is paused it can be resumed by calling the {@link #resume(boolean)} method.
* @param aPause
* true if and only if this CPM should pause on exception
* @throws org.apache.uima.UIMA_IllegalStateException
* if this CPM is currently processing
public void setPauseOnException(boolean aPause);
* Registers a listsner to receive status callbacks.
* @param aListener
* the listener to add
public void addStatusCallbackListener(StatusCallbackListener aListener);
* Unregisters a status callback listener.
* @param aListener
* the listener to remove
public void removeStatusCallbackListener(StatusCallbackListener aListener);
* Initiates processing of a collection. CollectionReader initializes the CAS with Documents from
* the Colection. This method starts the processing in another thread and returns immediately.
* Status of the processing can be obtained by registering a listener with the
* {@link #addStatusCallbackListener(StatusCallbackListener)} method.
* <p>
* A CPM can only process one collection at a time. If this method is called while a previous
* processing request has not yet completed, a <code>UIMA_IllegalStateException</code> will
* result. To find out whether a CPM is free to begin another processing request, call the
* {@link #isProcessing()} method.
* @param aCollectionReader
* the <code>CollectionReader</code> from which to obtain the Entities to be processed
* @throws ResourceInitializationException
* if an error occurs during initialization
* @throws org.apache.uima.UIMA_IllegalStateException
* if this CPM is currently processing
public void process(CollectionReader aCollectionReader) throws ResourceInitializationException;
* Initiates processing of a collection. This method works in the same way as
* {@link #process(CollectionReader)}, but it breaks the processing up into batches of a size
* determined by the <code>aBatchSize</code> parameter. Each {@link CasConsumer} will be
* notified at the end of each batch.
* @param aCollectionReader
* the <code>CollectionReader</code> from which to obtain the Entities to be processed
* @param aBatchSize
* the size of the batch.
* @throws ResourceInitializationException
* if an error occurs during initialization
* @throws org.apache.uima.UIMA_IllegalStateException
* if this CPM is currently processing
public void process(CollectionReader aCollectionReader, int aBatchSize)
throws ResourceInitializationException;
* Determines whether this CPM is currently processing. This means that a processing request has
* been submitted and has not yet completed or been {@link #stop()}ped. If processing is paused,
* this method will still return <code>true<code>.
* @return true if and only if this CPM is currently processing.
public boolean isProcessing();
* Pauses processing. Processing can later be resumed by calling the {@link #resume(boolean)}
* method.
* @throws org.apache.uima.UIMA_IllegalStateException
* if no processing is currently occuring
public void pause();
* Determines whether this CPM's processing is currently paused.
* @return true if and only if this CPM's processing is currently paused.
public boolean isPaused();
* Resumes processing that has been paused.
* @param aRetryFailed
* if processing was paused because an exception occurred (see
* {@link #setPauseOnException(boolean)}), setting a value of <code>true</code> for
* this parameter will cause the failed entity to be retried. A value of
* <code>false</code> (the default) will cause processing to continue with the next
* entity after the failure.
* @throws org.apache.uima.UIMA_IllegalStateException
* if processing is not currently paused
public void resume(boolean aRetryFailed);
* Resumes processing that has been paused.
* @throws org.apache.uima.UIMA_IllegalStateException
* if processing is not currently paused
public void resume();
* Stops processing.
* @throws org.apache.uima.UIMA_IllegalStateException
* if no processing is currently occuring
public void stop();
* Gets a performance report for the processing that is currently occurring or has just completed.
* @return an object containing performance statistics
public ProcessTrace getPerformanceReport();
* Gets a progress report for the processing that is currently occurring or has just completed.
* @return an array of <code>Progress</code> objects, each of which represents the progress in a
* different set of units (for example number of entities or bytes)
public Progress[] getProgress();