| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.uima.flow.impl; |
| |
| import java.io.IOException; |
| import java.net.URL; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.uima.UIMAFramework; |
| import org.apache.uima.UIMARuntimeException; |
| import org.apache.uima.analysis_engine.AnalysisEngineProcessException; |
| import org.apache.uima.analysis_engine.ResultSpecification; |
| import org.apache.uima.analysis_engine.TypeOrFeature; |
| import org.apache.uima.analysis_engine.metadata.AnalysisEngineMetaData; |
| import org.apache.uima.analysis_engine.metadata.CapabilityLanguageFlow; |
| import org.apache.uima.cas.CAS; |
| import org.apache.uima.cas.text.Language; |
| import org.apache.uima.flow.CasFlowController_ImplBase; |
| import org.apache.uima.flow.Flow; |
| import org.apache.uima.flow.FlowControllerContext; |
| import org.apache.uima.flow.FlowControllerDescription; |
| import org.apache.uima.resource.ResourceInitializationException; |
| import org.apache.uima.resource.metadata.Capability; |
| import org.apache.uima.util.InvalidXMLException; |
| import org.apache.uima.util.XMLInputSource; |
| |
| /** |
| * FlowController for the CapabilityLanguageFlow, which uses a linear fow but may skip some of the |
| * AEs in the flow if they do not handle the language of the current document or if their outputs |
| * have already been produced by a previous AE in the flow. |
| */ |
| public class CapabilityLanguageFlowController extends CasFlowController_ImplBase { |
| private ArrayList mStaticSequence; |
| |
| private Map mComponentMetaDataMap; |
| |
| private Map mFlowTable; |
| |
| /** |
| * main language separator e.g 'en' and 'en-US' |
| */ |
| private static final char LANGUAGE_SEPARATOR = '-'; |
| |
| /* |
| * (non-Javadoc) |
| * |
| * @see org.apache.uima.flow.FlowController#initialize(FlowControllerContext) |
| */ |
| public void initialize(FlowControllerContext aContext) throws ResourceInitializationException { |
| super.initialize(aContext); |
| mComponentMetaDataMap = aContext.getAnalysisEngineMetaDataMap(); |
| |
| // build a list of AnalysisSequenceNodes from the capabilityLanguageFlow |
| mStaticSequence = new ArrayList(); |
| CapabilityLanguageFlow flowConstraints = (CapabilityLanguageFlow) aContext |
| .getAggregateMetadata().getFlowConstraints(); |
| String[] flow = flowConstraints.getCapabilityLanguageFlow(); |
| for (int i = 0; i < flow.length; i++) { |
| AnalysisEngineMetaData md = (AnalysisEngineMetaData) mComponentMetaDataMap.get(flow[i]); |
| mStaticSequence.add(new AnalysisSequenceCapabilityNode(flow[i], md.getCapabilities(), null)); |
| } |
| |
| // compute flow table with the specified capabilities |
| mFlowTable = computeFlowTable(aContext.getAggregateMetadata().getCapabilities()); |
| } |
| |
| /* |
| * (non-Javadoc) |
| * |
| * @see org.apache.uima.flow.CasFlowController_ImplBase#computeFlow(org.apache.uima.cas.CAS) |
| */ |
| public Flow computeFlow(CAS aCAS) throws AnalysisEngineProcessException { |
| CapabilityLanguageFlowObject flow = new CapabilityLanguageFlowObject(mFlowTable); |
| flow.setCas(aCAS); |
| return flow; |
| } |
| |
| /** |
| * method computeFlowTable create the flow table for faster processing. The flow table inlcudes |
| * for all languages in the capabilities the coresponding flow sequence |
| * |
| * @param aCapabilities |
| * aggregate engine capabilities |
| * @return Map - flow table includes all sequences for all languages |
| */ |
| protected Map computeFlowTable(Capability[] aCapabilities) { |
| // create flowTable |
| Map flowTable = new HashMap(); |
| |
| // get all languages from the capabilities |
| HashSet languages = new HashSet(); |
| for (int i = 0; i < aCapabilities.length; i++) { |
| // get languages from current capability |
| aCapabilities[i].getLanguagesSupported(); |
| String language; |
| for (int y = 0; y < aCapabilities[i].getLanguagesSupported().length; y++) { |
| language = aCapabilities[i].getLanguagesSupported()[y]; |
| languages.add(language); |
| } |
| } |
| |
| // create flow table with sequences for all languages |
| Iterator it = languages.iterator(); |
| while (it.hasNext()) { |
| // add sequence for the current language |
| String language = (String) it.next(); |
| flowTable.put(language, computeSequence(language, aCapabilities)); |
| } |
| |
| return flowTable; |
| } |
| |
| /** |
| * method computeSequence creates a capabilityLanguageAnalysisSequence for the given language |
| * |
| * @param language |
| * current language |
| * @param aCapabilities |
| * output capabilities of the aggregate engine |
| * |
| * @return List - capabilityLanguageAnalysisSequence for the current language |
| */ |
| protected List computeSequence(String language, Capability[] aCapabilities) { |
| language = Language.normalize(language); |
| |
| // create resultSpec from the current aggregate capabilities |
| ResultSpecification resultSpec = UIMAFramework.getResourceSpecifierFactory() |
| .createResultSpecification(); |
| |
| if (aCapabilities != null) { |
| resultSpec.addCapabilities(aCapabilities); |
| } else { |
| return null; |
| } |
| |
| // create array list for the current sequence |
| List newSequence = new ArrayList(); |
| |
| // loop pver all annotators that should be called |
| for (int sequenceIndex = 0; sequenceIndex < mStaticSequence.size(); sequenceIndex++) { |
| // get array of ouput capabilities for the current languge from the current result spec |
| TypeOrFeature[] ouputCapabilities = resultSpec.getResultTypesAndFeatures(language); |
| |
| // strip language extension if available |
| int index = language.indexOf(LANGUAGE_SEPARATOR); |
| |
| // if country extension is available |
| if (index >= 0) { |
| // create HashSet for outputSpec |
| HashSet outputSpec = new HashSet(); |
| |
| // add language with country extension output capabilities to the outputSpec |
| if (ouputCapabilities.length > 0) { |
| for (int i = 0; i < ouputCapabilities.length; i++) { |
| outputSpec.add(ouputCapabilities[i]); |
| } |
| |
| // get array of output capabilities only for the language without country extension |
| ouputCapabilities = resultSpec.getResultTypesAndFeatures(language.substring(0, index)); |
| |
| // add language output capabilities to the outputSpec |
| for (int i = 0; i < ouputCapabilities.length; i++) { |
| outputSpec.add(ouputCapabilities[i]); |
| } |
| |
| // convert all output capabilities to a outputCapabilities array |
| ouputCapabilities = new TypeOrFeature[outputSpec.size()]; |
| outputSpec.toArray(ouputCapabilities); |
| } else |
| // for language with country extension was noting found |
| { |
| // get array of output capabilities with the new main language without country extension |
| ouputCapabilities = resultSpec.getResultTypesAndFeatures(language.substring(0, index)); |
| } |
| } |
| |
| // current analysis node which contains the current analysis engine |
| AnalysisSequenceCapabilityNode node; |
| |
| // result spec for the current analysis engine |
| ResultSpecification currentAnalysisResultSpec = null; |
| |
| // flag if current analysis engine should be called or not |
| boolean shouldEngineBeCalled = false; |
| |
| // check output capabilites from the current result spec |
| |
| // get next analysis engine from the sequence node |
| node = (AnalysisSequenceCapabilityNode) mStaticSequence.get(sequenceIndex); |
| |
| // get capability container from the current analysis engine |
| CapabilityContainer capabilityContainer = node.getCapabilityContainer(); |
| |
| // create current analysis result spec without any language information |
| currentAnalysisResultSpec = UIMAFramework.getResourceSpecifierFactory() |
| .createResultSpecification(); |
| |
| // check if engine should be called - loop over all ouput capabilities of the result spec |
| for (int i = 0; i < ouputCapabilities.length; i++) { |
| // check if current ToF can be produced by the current analysis engine |
| if (capabilityContainer.hasOutputTypeOrFeature(ouputCapabilities[i], language, true)) { |
| currentAnalysisResultSpec.addResultTypeOrFeature(ouputCapabilities[i]); |
| shouldEngineBeCalled = true; |
| |
| // remove current ToF from the result spec |
| resultSpec.removeTypeOrFeature(ouputCapabilities[i]); |
| } |
| } |
| // skip engine if not output capability match |
| |
| // check if current engine should be called |
| if (shouldEngineBeCalled == true) { |
| // set result spec for current analysis engine |
| node.setResultSpec(currentAnalysisResultSpec); |
| |
| // add note to the current sequence |
| newSequence.add(node.clone()); |
| } else |
| // engine should not be called, but add null to the sequence to track that |
| // engine should not be called |
| { |
| newSequence.add(null); |
| } |
| } |
| |
| return newSequence; |
| } |
| |
| public static FlowControllerDescription getDescription() { |
| URL descUrl = FixedFlowController.class |
| .getResource("/org/apache/uima/flow/CapabilityLanguageFlowController.xml"); |
| FlowControllerDescription desc; |
| try { |
| desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse( |
| new XMLInputSource(descUrl)); |
| } catch (InvalidXMLException e) { |
| throw new UIMARuntimeException(e); |
| } catch (IOException e) { |
| throw new UIMARuntimeException(e); |
| } |
| return desc; |
| } |
| } |