uimaj-2.3.0-incubating/uimaj-core/src/main/java/org/apache/uima/flow/impl/CapabilityLanguageFlowController.java - uima-uimaj - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.uima.flow.impl;

 import java.io.IOException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;

 import org.apache.uima.UIMAFramework;
 import org.apache.uima.UIMARuntimeException;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.analysis_engine.ResultSpecification;
 import org.apache.uima.analysis_engine.TypeOrFeature;
 import org.apache.uima.analysis_engine.metadata.AnalysisEngineMetaData;
 import org.apache.uima.analysis_engine.metadata.CapabilityLanguageFlow;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.text.Language;
 import org.apache.uima.flow.CasFlowController_ImplBase;
 import org.apache.uima.flow.Flow;
 import org.apache.uima.flow.FlowControllerContext;
 import org.apache.uima.flow.FlowControllerDescription;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.metadata.Capability;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;

 /**
  * FlowController for the CapabilityLanguageFlow, which uses a linear flow but may skip some of the
  * AEs in the flow if they do not handle the language of the current document or if their outputs
  * have already been produced by a previous AE in the flow.
  */
 public class CapabilityLanguageFlowController extends CasFlowController_ImplBase {
   private List<AnalysisSequenceCapabilityNode> mStaticSequence;

   private Map<String, AnalysisEngineMetaData> mComponentMetaDataMap;

   private Map<String, List<AnalysisSequenceCapabilityNode>> mFlowTable;

   private final Map<String, ResultSpecification> lastResultSpecForComponent =
     new HashMap<String, ResultSpecification>();

   /**
    * main language separator e.g 'en' and 'en-US'
    */
   private static final char LANGUAGE_SEPARATOR = '-';

   /*
    * (non-Javadoc)
    *
    * @see org.apache.uima.flow.FlowController#initialize(FlowControllerContext)
    */
   public void initialize(FlowControllerContext aContext) throws ResourceInitializationException {
     super.initialize(aContext);
     mComponentMetaDataMap = (Map<String, AnalysisEngineMetaData>)aContext.getAnalysisEngineMetaDataMap();

     // build a list of AnalysisSequenceNodes from the capabilityLanguageFlow
     mStaticSequence = new ArrayList<AnalysisSequenceCapabilityNode>();
     CapabilityLanguageFlow flowConstraints = (CapabilityLanguageFlow) aContext
             .getAggregateMetadata().getFlowConstraints();
     for (String aeKey : flowConstraints.getCapabilityLanguageFlow()) {
       mStaticSequence.add(
           new AnalysisSequenceCapabilityNode(
               aeKey,
               mComponentMetaDataMap.get(aeKey).getCapabilities(),
               null));
     }

     // compute flow table with the specified capabilities
     mFlowTable = computeFlowTable(aContext.getAggregateMetadata().getCapabilities());
   }

   /*
    * (non-Javadoc)
    *
    * @see org.apache.uima.flow.CasFlowController_ImplBase#computeFlow(org.apache.uima.cas.CAS)
    */
   public Flow computeFlow(CAS aCAS) throws AnalysisEngineProcessException {
     CapabilityLanguageFlowObject flow = new CapabilityLanguageFlowObject(mFlowTable, this);
     flow.setCas(aCAS);
     return flow;
   }

   /**
    * method computeFlowTable create the flow table for faster processing. The flow table includes
    * the corresponding flow sequence for all languages in the capabilities
    *
    * @param aCapabilities
    *          aggregate engine capabilities
    * @return Map - flow table includes all sequences for all languages
    */
   protected Map<String, List<AnalysisSequenceCapabilityNode>> computeFlowTable(Capability[] aCapabilities) {
     // create flowTable
     Map<String, List<AnalysisSequenceCapabilityNode>> flowTable =
       new HashMap<String, List<AnalysisSequenceCapabilityNode>>();

     // get all languages from the capabilities
     Set<String> languages = new HashSet<String>();
     for (Capability capability : aCapabilities) {
       for (String capabilityLanguage : capability.getLanguagesSupported()) {
         languages.add(capabilityLanguage);
       }
     }

     // create flow table with sequences for all languages
     for (String capabilityLanguage : languages) {
       flowTable.put(capabilityLanguage, computeSequence(capabilityLanguage, aCapabilities));
     }

     return flowTable;
   }

   /**
    * method computeSequence creates a capabilityLanguageAnalysisSequence for the given language
    *
    * @param language
    *          current language
    * @param aCapabilities
    *          output capabilities of the aggregate engine
    *
    * @return List - capabilityLanguageAnalysisSequence for the current language
    */
   protected List<AnalysisSequenceCapabilityNode> computeSequence(String language, Capability[] aCapabilities) {
     language = Language.normalize(language);  // lower-cases, replaces _ with -, changes null to x-unspecified

     // create resultSpec from the current aggregate capabilities
     ResultSpecification aggrResultsToProduce = UIMAFramework.getResourceSpecifierFactory()
             .createResultSpecification();

     if (aCapabilities != null) {
       aggrResultsToProduce.addCapabilities(aCapabilities);
     } else {
       return null;
     }

     // create array list for the current sequence
     List<AnalysisSequenceCapabilityNode> newSequence = new ArrayList<AnalysisSequenceCapabilityNode>();

     // loop over all annotators that should be called
     // In this loop we will gradually reduce the set of output capabilities
     for (int sequenceIndex = 0; sequenceIndex < mStaticSequence.size(); sequenceIndex++) {
       // get array of output capabilities for the current language from the current result spec
       TypeOrFeature[] tofsNeeded = aggrResultsToProduce.getResultTypesAndFeatures(language);

       // Augment these outputCapabilities if the language-spec is for a country, to
       // include the outputCapabilities for the language without the country-spec.

       // strip language extension if available
       int index = language.indexOf(LANGUAGE_SEPARATOR);

       // if country extension is available
       if (index >= 0) {
         // create Set for outputSpecs, so we can eliminate duplicates
         Set<TypeOrFeature> outputSpec = new HashSet<TypeOrFeature>();

         // add language with country extension removed,
         // to the existing output capabilities (or if non exist, just use
         // the capabilities for the language without the country extension)
         if (tofsNeeded.length > 0) {
           // copy all existing capabilities to the Set
           for (TypeOrFeature outputCapability : tofsNeeded) {
             outputSpec.add(outputCapability);
           }

           // get array of output capabilities only for the language without country extension
           tofsNeeded = aggrResultsToProduce.getResultTypesAndFeatures(language.substring(0, index));

           // add language output capabilities to the Set
           for (TypeOrFeature outputCapability : tofsNeeded) {
             outputSpec.add(outputCapability);
           }

           // convert all output capabilities to a outputCapabilities array
           tofsNeeded = new TypeOrFeature[outputSpec.size()];
           outputSpec.toArray(tofsNeeded);
         } else { // for language with country extension was noting found
           // get array of output capabilities with the new main language without country extension
           tofsNeeded = aggrResultsToProduce.getResultTypesAndFeatures(language.substring(0, index));
         }
       }

       // current analysis node which contains the current analysis engine
       AnalysisSequenceCapabilityNode node;

       // result spec for the current analysis engine
       ResultSpecification currentAnalysisResultSpec = null;

       // flag if current analysis engine should be called or not
       boolean shouldEngineBeCalled = false;

       // check output capabilities from the current result spec

       // get next analysis engine from the sequence node
       node = mStaticSequence.get(sequenceIndex);

       // get capability container from the current analysis engine
       ResultSpecification delegateProduces = node.getCapabilityContainer();

       // create current analysis result spec without any language information
       currentAnalysisResultSpec = UIMAFramework.getResourceSpecifierFactory()
               .createResultSpecification();

       // check if engine should be called -
       //   loop over all remaining output capabilities of the aggregate's result spec
       //     to see if this component of the aggregate produces that type or feature,
       //     for this language
       for (TypeOrFeature tof : tofsNeeded) {
         if ((tof.isType() && delegateProduces.containsType(tof.getName(), language)) ||
             (!tof.isType() && delegateProduces.containsFeature(tof.getName(), language))) {
 //        if (capabilityContainer.hasOutputTypeOrFeature(tof, language, true)) {
           currentAnalysisResultSpec.addResultTypeOrFeature(tof);
           shouldEngineBeCalled = true;
           // remove current ToF from the result spec
           aggrResultsToProduce.removeTypeOrFeature(tof);
         }
       }

       // skip engine if not output capability match

       // should be called is false if this engine produces none of the
       //   needed outputs of the aggregate
       if (shouldEngineBeCalled == true) {
         // tell this component which output types/features need to be produced
         //   note: As an exception to the way normal result-specifications are produced,
         //         here we *don't* add the types/features which are input to
         //         other delegates need to be produced.
         //         This is for backward compatibility.
         node.setResultSpec(currentAnalysisResultSpec);

         // add note to the current sequence
         newSequence.add((AnalysisSequenceCapabilityNode)node.clone());
       } else {
       // engine should not be called, but add null to the sequence to track that
       // engine should not be called
         newSequence.add(null);
       }
     } // loop over all delegates in the flow sequence

     return newSequence;
   }

   public static FlowControllerDescription getDescription() {
     URL descUrl = FixedFlowController.class
             .getResource("/org/apache/uima/flow/CapabilityLanguageFlowController.xml");
     FlowControllerDescription desc;
     try {
       desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(
               new XMLInputSource(descUrl));
     } catch (InvalidXMLException e) {
       throw new UIMARuntimeException(e);
     } catch (IOException e) {
       throw new UIMARuntimeException(e);
     }
     return desc;
   }

   public Map<String, ResultSpecification> getLastResultSpecForComponent() {
     return lastResultSpecForComponent;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.uima.flow.impl;

	import java.io.IOException;
	import java.net.URL;
	import java.util.ArrayList;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.List;
	import java.util.Map;
	import java.util.Set;

	import org.apache.uima.UIMAFramework;
	import org.apache.uima.UIMARuntimeException;
	import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
	import org.apache.uima.analysis_engine.ResultSpecification;
	import org.apache.uima.analysis_engine.TypeOrFeature;
	import org.apache.uima.analysis_engine.metadata.AnalysisEngineMetaData;
	import org.apache.uima.analysis_engine.metadata.CapabilityLanguageFlow;
	import org.apache.uima.cas.CAS;
	import org.apache.uima.cas.text.Language;
	import org.apache.uima.flow.CasFlowController_ImplBase;
	import org.apache.uima.flow.Flow;
	import org.apache.uima.flow.FlowControllerContext;
	import org.apache.uima.flow.FlowControllerDescription;
	import org.apache.uima.resource.ResourceInitializationException;
	import org.apache.uima.resource.metadata.Capability;
	import org.apache.uima.util.InvalidXMLException;
	import org.apache.uima.util.XMLInputSource;

	/**
	* FlowController for the CapabilityLanguageFlow, which uses a linear flow but may skip some of the
	* AEs in the flow if they do not handle the language of the current document or if their outputs
	* have already been produced by a previous AE in the flow.
	*/
	public class CapabilityLanguageFlowController extends CasFlowController_ImplBase {
	private List<AnalysisSequenceCapabilityNode> mStaticSequence;

	private Map<String, AnalysisEngineMetaData> mComponentMetaDataMap;

	private Map<String, List<AnalysisSequenceCapabilityNode>> mFlowTable;

	private final Map<String, ResultSpecification> lastResultSpecForComponent =
	new HashMap<String, ResultSpecification>();

	/**
	* main language separator e.g 'en' and 'en-US'
	*/
	private static final char LANGUAGE_SEPARATOR = '-';

	/*
	* (non-Javadoc)
	*
	* @see org.apache.uima.flow.FlowController#initialize(FlowControllerContext)
	*/
	public void initialize(FlowControllerContext aContext) throws ResourceInitializationException {
	super.initialize(aContext);
	mComponentMetaDataMap = (Map<String, AnalysisEngineMetaData>)aContext.getAnalysisEngineMetaDataMap();

	// build a list of AnalysisSequenceNodes from the capabilityLanguageFlow
	mStaticSequence = new ArrayList<AnalysisSequenceCapabilityNode>();
	CapabilityLanguageFlow flowConstraints = (CapabilityLanguageFlow) aContext
	.getAggregateMetadata().getFlowConstraints();
	for (String aeKey : flowConstraints.getCapabilityLanguageFlow()) {
	mStaticSequence.add(
	new AnalysisSequenceCapabilityNode(
	aeKey,
	mComponentMetaDataMap.get(aeKey).getCapabilities(),
	null));
	}

	// compute flow table with the specified capabilities
	mFlowTable = computeFlowTable(aContext.getAggregateMetadata().getCapabilities());
	}

	/*
	* (non-Javadoc)
	*
	* @see org.apache.uima.flow.CasFlowController_ImplBase#computeFlow(org.apache.uima.cas.CAS)
	*/
	public Flow computeFlow(CAS aCAS) throws AnalysisEngineProcessException {
	CapabilityLanguageFlowObject flow = new CapabilityLanguageFlowObject(mFlowTable, this);
	flow.setCas(aCAS);
	return flow;
	}

	/**
	* method computeFlowTable create the flow table for faster processing. The flow table includes
	* the corresponding flow sequence for all languages in the capabilities
	*
	* @param aCapabilities
	* aggregate engine capabilities
	* @return Map - flow table includes all sequences for all languages
	*/
	protected Map<String, List<AnalysisSequenceCapabilityNode>> computeFlowTable(Capability[] aCapabilities) {
	// create flowTable
	Map<String, List<AnalysisSequenceCapabilityNode>> flowTable =
	new HashMap<String, List<AnalysisSequenceCapabilityNode>>();

	// get all languages from the capabilities
	Set<String> languages = new HashSet<String>();
	for (Capability capability : aCapabilities) {
	for (String capabilityLanguage : capability.getLanguagesSupported()) {
	languages.add(capabilityLanguage);
	}
	}

	// create flow table with sequences for all languages
	for (String capabilityLanguage : languages) {
	flowTable.put(capabilityLanguage, computeSequence(capabilityLanguage, aCapabilities));
	}

	return flowTable;
	}

	/**
	* method computeSequence creates a capabilityLanguageAnalysisSequence for the given language
	*
	* @param language
	* current language
	* @param aCapabilities
	* output capabilities of the aggregate engine
	*
	* @return List - capabilityLanguageAnalysisSequence for the current language
	*/
	protected List<AnalysisSequenceCapabilityNode> computeSequence(String language, Capability[] aCapabilities) {
	language = Language.normalize(language); // lower-cases, replaces _ with -, changes null to x-unspecified

	// create resultSpec from the current aggregate capabilities
	ResultSpecification aggrResultsToProduce = UIMAFramework.getResourceSpecifierFactory()
	.createResultSpecification();

	if (aCapabilities != null) {
	aggrResultsToProduce.addCapabilities(aCapabilities);
	} else {
	return null;
	}

	// create array list for the current sequence
	List<AnalysisSequenceCapabilityNode> newSequence = new ArrayList<AnalysisSequenceCapabilityNode>();

	// loop over all annotators that should be called
	// In this loop we will gradually reduce the set of output capabilities
	for (int sequenceIndex = 0; sequenceIndex < mStaticSequence.size(); sequenceIndex++) {
	// get array of output capabilities for the current language from the current result spec
	TypeOrFeature[] tofsNeeded = aggrResultsToProduce.getResultTypesAndFeatures(language);

	// Augment these outputCapabilities if the language-spec is for a country, to
	// include the outputCapabilities for the language without the country-spec.

	// strip language extension if available
	int index = language.indexOf(LANGUAGE_SEPARATOR);

	// if country extension is available
	if (index >= 0) {
	// create Set for outputSpecs, so we can eliminate duplicates
	Set<TypeOrFeature> outputSpec = new HashSet<TypeOrFeature>();

	// add language with country extension removed,
	// to the existing output capabilities (or if non exist, just use
	// the capabilities for the language without the country extension)
	if (tofsNeeded.length > 0) {
	// copy all existing capabilities to the Set
	for (TypeOrFeature outputCapability : tofsNeeded) {
	outputSpec.add(outputCapability);
	}

	// get array of output capabilities only for the language without country extension
	tofsNeeded = aggrResultsToProduce.getResultTypesAndFeatures(language.substring(0, index));

	// add language output capabilities to the Set
	for (TypeOrFeature outputCapability : tofsNeeded) {
	outputSpec.add(outputCapability);
	}

	// convert all output capabilities to a outputCapabilities array
	tofsNeeded = new TypeOrFeature[outputSpec.size()];
	outputSpec.toArray(tofsNeeded);
	} else { // for language with country extension was noting found
	// get array of output capabilities with the new main language without country extension
	tofsNeeded = aggrResultsToProduce.getResultTypesAndFeatures(language.substring(0, index));
	}
	}

	// current analysis node which contains the current analysis engine
	AnalysisSequenceCapabilityNode node;

	// result spec for the current analysis engine
	ResultSpecification currentAnalysisResultSpec = null;

	// flag if current analysis engine should be called or not
	boolean shouldEngineBeCalled = false;

	// check output capabilities from the current result spec

	// get next analysis engine from the sequence node
	node = mStaticSequence.get(sequenceIndex);

	// get capability container from the current analysis engine
	ResultSpecification delegateProduces = node.getCapabilityContainer();

	// create current analysis result spec without any language information
	currentAnalysisResultSpec = UIMAFramework.getResourceSpecifierFactory()
	.createResultSpecification();

	// check if engine should be called -
	// loop over all remaining output capabilities of the aggregate's result spec
	// to see if this component of the aggregate produces that type or feature,
	// for this language
	for (TypeOrFeature tof : tofsNeeded) {
	if ((tof.isType() && delegateProduces.containsType(tof.getName(), language)) \|\|
	(!tof.isType() && delegateProduces.containsFeature(tof.getName(), language))) {
	// if (capabilityContainer.hasOutputTypeOrFeature(tof, language, true)) {
	currentAnalysisResultSpec.addResultTypeOrFeature(tof);
	shouldEngineBeCalled = true;
	// remove current ToF from the result spec
	aggrResultsToProduce.removeTypeOrFeature(tof);
	}
	}

	// skip engine if not output capability match

	// should be called is false if this engine produces none of the
	// needed outputs of the aggregate
	if (shouldEngineBeCalled == true) {
	// tell this component which output types/features need to be produced
	// note: As an exception to the way normal result-specifications are produced,
	// here we don't add the types/features which are input to
	// other delegates need to be produced.
	// This is for backward compatibility.
	node.setResultSpec(currentAnalysisResultSpec);

	// add note to the current sequence
	newSequence.add((AnalysisSequenceCapabilityNode)node.clone());
	} else {
	// engine should not be called, but add null to the sequence to track that
	// engine should not be called
	newSequence.add(null);
	}
	} // loop over all delegates in the flow sequence

	return newSequence;
	}

	public static FlowControllerDescription getDescription() {
	URL descUrl = FixedFlowController.class
	.getResource("/org/apache/uima/flow/CapabilityLanguageFlowController.xml");
	FlowControllerDescription desc;
	try {
	desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(
	new XMLInputSource(descUrl));
	} catch (InvalidXMLException e) {
	throw new UIMARuntimeException(e);
	} catch (IOException e) {
	throw new UIMARuntimeException(e);
	}
	return desc;
	}

	public Map<String, ResultSpecification> getLastResultSpecForComponent() {
	return lastResultSpecForComponent;
	}
	}