| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.uima.analysis_component; |
| |
| import org.apache.uima.UimaContext; |
| import org.apache.uima.analysis_engine.AnalysisEngineProcessException; |
| import org.apache.uima.analysis_engine.ResultSpecification; |
| import org.apache.uima.cas.AbstractCas; |
| import org.apache.uima.resource.ResourceConfigurationException; |
| import org.apache.uima.resource.ResourceInitializationException; |
| |
| /** |
| * Analysis Components are the primitive "building blocks" from which UIMA solutions are built. This |
| * is the common superinterface for all user-developed components that take a CAS as input and may |
| * produce CASes as output. |
| * <p> |
| * Typically, developers do not implement this interface directly. There are several abstract |
| * classes that you can inherit from depending on the function that your component performs and |
| * which CAS interface it uses: |
| * <ul> |
| * <li> Annotator: Receives an input CAS and updates it |
| * <ul> |
| * <li>{@link JCasAnnotator_ImplBase}: Uses JCas interface</li> |
| * <li>{@link CasAnnotator_ImplBase}: Uses CASinterface |
| * </ul> |
| * </li> |
| * <li>{@link org.apache.uima.collection.CasConsumer_ImplBase}: Receives an input CAS but does not |
| * update it. May update a data structure based on information in the CASes it receives.</li> |
| * <li> CasMultiplier: Receives an input CAS and, in addition to updating it, may output new CASes. |
| * One common use of this is to split a CAS into pieces, emitting each piece as a separate output |
| * CAS. |
| * <ul> |
| * <li>{@link JCasMultiplier_ImplBase}: Uses JCas interface</li> |
| * <li>{@link CasMultiplier_ImplBase}: Uses CAS interface</li> |
| * <li>{@link org.apache.uima.collection.CollectionReader_ImplBase}: A special type of |
| * CasMultiplier that, for historical reasons, does not take an input CAS.</li> |
| * </ul> |
| * </li> |
| * </ul> |
| * <p> |
| * The framework interacts with AnalysisComponents as follows: |
| * <ol> |
| * <li>The framework calls the AnalysisComponent's {@link #process(AbstractCas)} method with an |
| * input CAS.</li> |
| * <li>The framework then calls the AnalysisComponent's {@link #hasNext()} method, which should |
| * return <code>true</code> if the AnalysisComponent intends to produce new output CASes, or |
| * <code>false</code> if the AnalysisComponent will not produce new output CASes.</li> |
| * <li>If the AnalysisComponent returns <code>true</code>, the framework will then call the |
| * {@link #next()} method.</li> |
| * <li>The AnalysisComponent, in its <code>next</code> method, can create a new CAS by calling |
| * {@link UimaContext#getEmptyCas(Class)} (or instead, one of the helper methods in the ImplBase |
| * class that it extended). It then populates the empty CAS and returns it.</li> |
| * <li>Steps 2 & 3 continue for each subsequent output CAS, until <code>hasNext()</code> returns |
| * false.</li> |
| * </ul> |
| * </ol> |
| * |
| * From the time when <code>process</code> is called until the time when <code>hasNext</code> |
| * returns false, the AnalysisComponent "owns" the CAS that was passed to <code>process</code>. |
| * The AnalysisComponent is permitted to make changes to this CAS. Once <code>hasNext</code> |
| * returns false, the AnalysisComponent releases control of the initial CAS. This means that the |
| * AnalysisComponent must finish all updates to the initial CAS prior to returning false from |
| * <code>hasNext<code>. |
| * <p> |
| * However, if the <code>process</code> method is called a second time, before <code>hasNext</code> has returned |
| * false, this is a signal to the AnalysisComponent to cancel all processing of the previous CAS and begin |
| * processing the new CAS instead. |
| */ |
| public interface AnalysisComponent { |
| /** |
| * Performs any startup tasks required by this component. The framework calls this method only |
| * once, just after the AnalysisComponent has been instantiated. |
| * <p> |
| * The framework supplies this AnalysisComponent with a reference to the {@link UimaContext} that |
| * it will use, for example to access configuration settings or resources. This AnalysisComponent |
| * should store a reference to its the <code>UimaContext</code> for later use. |
| * |
| * @param aContext |
| * Provides access to services and resources managed by the framework. This includes |
| * configuration parameters, logging, and access to external resources. |
| * |
| * @throws ResourceInitializationException |
| * if this AnalysisComponent cannot initialize successfully. |
| */ |
| void initialize(UimaContext aContext) throws ResourceInitializationException; |
| |
| /** |
| * Alerts this AnalysisComponent that the values of its configuration parameters or external |
| * resources have changed. This AnalysisComponent should re-read its configuration from the |
| * {@link UimaContext} and take appropriate action to reconfigure itself. |
| * <p> |
| * In the abstract base classes provided by the framework, this is generally implemented by |
| * calling <code>destroy</code> followed by <code>initialize</code> and |
| * <code>typeSystemChanged</code>. If a more efficient implementation is needed, you can |
| * override that implementation. |
| * |
| * @throws ResourceConfigurationException |
| * if the configuration specified for this component is invalid. |
| * @throws ResourceInitializationException |
| * if this component fails to reinitialize itself based on the new configuration. |
| */ |
| void reconfigure() throws ResourceInitializationException, ResourceConfigurationException; |
| |
| /** |
| * Completes the processing of a batch of CASes. The size of a batch is determined based on |
| * configuration provided by the application that is using this component. The purpose of |
| * <code>batchProcessComplete</code> is to give this AnalysisComponent the change to flush |
| * information from memory to persistent storage. In the event of an error, this allows the |
| * processing to be restarted from the end of the last completed batch. |
| * <p> |
| * If this component's descriptor declares that it is <code>recoverable</code>, then this |
| * component is <i>required</i> to be restartable from the end of the last completed batch. |
| * |
| * @throws AnalysisEngineProcessException |
| * if this component encounters a problem in flushing its state to persistent storage |
| */ |
| void batchProcessComplete() throws AnalysisEngineProcessException; |
| |
| /** |
| * Notifies this AnalysisComponent that processing of an entire collection has been completed. In |
| * this method, this component should finish writing any output relating to the current |
| * collection. |
| * |
| * @throws AnalysisEngineProcessException |
| * if this component encounters a problem in its end-of-collection processing |
| */ |
| void collectionProcessComplete() throws AnalysisEngineProcessException; |
| |
| /** |
| * Frees all resources held by this AnalysisComponent. The framework calls this method only once, |
| * when it is finished using this component. |
| */ |
| void destroy(); |
| |
| /** |
| * Inputs a CAS to the AnalysisComponent. The AnalysisComponent "owns" this CAS until such time as |
| * {@link #hasNext()} is called and returns false or until <code>process</code> is called again |
| * (see class description). |
| * |
| * @param aCAS |
| * A CAS that this AnalysisComponent should process. The framework will ensure that aCAS |
| * implements the specific CAS interface specified by the |
| * {@link #getRequiredCasInterface()} method. |
| * |
| * @throws AnalysisEngineProcessException |
| * if a problem occurs during processing |
| */ |
| void process(AbstractCas aCAS) throws AnalysisEngineProcessException; |
| |
| /** |
| * Asks if this AnalysisComponent has another CAS to output. If this method returns true, then a |
| * call to {@link #next()} should retrieve the next output CAS. When this method returns false, |
| * the AnalysisComponent gives up control of the initial CAS that was passed to its |
| * {@link #process(AbstractCas)} method. |
| * |
| * @return true if this AnalysisComponent has another CAS to output, false if not. |
| * |
| * @throws AnalysisEngineProcessException |
| * if a problem occurs during processing |
| */ |
| boolean hasNext() throws AnalysisEngineProcessException; |
| |
| /** |
| * Gets the next output CAS. The framework will only call this method after first calling |
| * {@link #hasNext()} and checking that it returns true. |
| * <p> |
| * The AnalysisComponent can obtain a new CAS by calling {@link UimaContext#getEmptyCas(Class)} |
| * (or instead, one of the helper methods in the ImplBase class that it extended). |
| * |
| * @return the next output CAS. |
| * |
| * @throws AnalysisEngineProcessException |
| * if a problem occurs during processing |
| */ |
| AbstractCas next() throws AnalysisEngineProcessException; |
| |
| /** |
| * Returns the specific CAS interface that this AnalysisComponent requires the framework to pass |
| * to its {@link #process(AbstractCas)} method. |
| * |
| * @return the required CAS interface. This must specify a subtype of {@link AbstractCas}. |
| */ |
| Class<? extends AbstractCas> getRequiredCasInterface(); |
| |
| /** |
| * Returns the maximum number of CAS instances that this AnalysisComponent expects to use at the |
| * same time. This only applies to CasMultipliers. Most CasMultipliers will only need one CAS at a |
| * time. Only if there is a clear need should this be overridden to return something greater than |
| * 1. |
| * |
| * @return the number of CAS instances required by this AnalysisComponent. |
| */ |
| int getCasInstancesRequired(); |
| |
| /** |
| * Sets the ResultSpecification that this AnalysisComponent should use. The ResultSpecification is |
| * a set of types and features that this AnalysisComponent is asked to produce. An Analysis |
| * Component may (but is not required to) optimize its processing by omitting the generation of |
| * any types or features that are not part of the ResultSpecification. |
| * |
| * @param aResultSpec |
| * the ResultSpecification for this Analysis Component to use. |
| */ |
| void setResultSpecification(ResultSpecification aResultSpec); |
| } |