| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.uima.flow.impl; |
| |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| |
| import org.apache.uima.UIMAFramework; |
| import org.apache.uima.analysis_engine.ResultSpecification; |
| import org.apache.uima.analysis_engine.TypeOrFeature; |
| import org.apache.uima.cas.CAS; |
| import org.apache.uima.cas.TypeSystem; |
| import org.apache.uima.cas.text.Language; |
| import org.apache.uima.flow.CasFlow_ImplBase; |
| import org.apache.uima.flow.FinalStep; |
| import org.apache.uima.flow.SimpleStep; |
| import org.apache.uima.flow.SimpleStepWithResultSpec; |
| import org.apache.uima.flow.Step; |
| |
| /** |
| * The <code>CapabilityLanguageAnalysisSequence</code> is used for a |
| * <code>CapabilityLanguageFlow</code>. The sequence contains all analysis engines included in |
| * the <code>CapabilityLanguageFlow</code>. |
| * |
| * Within this sequence skipping of analysis engines is possible if the document language of the |
| * current document does not match to the analysis engine capabilities or the output capabilities |
| * are already done by another analysis engine. |
| * |
| */ |
| public class CapabilityLanguageFlowObject extends CasFlow_ImplBase implements Cloneable { |
| |
| private static final String UNSPECIFIED_LANGUAGE = "x-unspecified"; |
| |
| /** |
| * save the last type system |
| */ |
| private TypeSystem mLastTypeSystem; |
| |
| /** |
| * The static list of nodes. |
| */ |
| private List mNodeList; |
| |
| /** |
| * Current index in the sequence list. |
| */ |
| private int mIndex; |
| |
| /** |
| * mResultSpec provides the current result specification which has to be processed. After every |
| * analysis run, the processed ouput result are removed from the mResultSpec. |
| */ |
| private ResultSpecification mResultSpec; |
| |
| /** |
| * flowTable includes all languages with their flow sequence |
| */ |
| private Map mFlowTable; |
| |
| /** |
| * main language separator e.g 'en' and 'en-US' |
| */ |
| private static final char LANGUAGE_SEPARATOR = '-'; |
| |
| static final long serialVersionUID = -5879514955935785660L; |
| |
| /** |
| * Creates a new CapabilityLanguageAnalysisSequence. |
| * |
| * @param aNodeList |
| * a List of {@link AnalysisSequenceNode} objects. These will be returned in order by |
| * {@link #getNext(CAS)}. |
| * @param resultSpec |
| * result specification of the top level aggregate AE |
| */ |
| public CapabilityLanguageFlowObject(List aNodeList, ResultSpecification resultSpec) { |
| mNodeList = aNodeList; |
| mIndex = 0; |
| // clone result specification |
| mResultSpec = (ResultSpecification) resultSpec.clone(); |
| mFlowTable = null; |
| mLastTypeSystem = null; |
| |
| } |
| |
| /** |
| * Create a new CapabilityLangaugeAnalysisSequence with the flowTable |
| * |
| * @param aFlowTable |
| * a flow table |
| */ |
| public CapabilityLanguageFlowObject(Map aFlowTable) { |
| mNodeList = null; |
| mIndex = 0; |
| mResultSpec = null; |
| mFlowTable = aFlowTable; |
| mLastTypeSystem = null; |
| } |
| |
| public Step next() { |
| // check if CAS is set |
| CAS cas = getCas(); |
| assert cas != null; // CapabilityLanguageFlowController ensures this |
| |
| // if type system has changed, recompile flow table |
| if (mLastTypeSystem != cas.getTypeSystem()) { |
| // set new type system |
| mLastTypeSystem = cas.getTypeSystem(); |
| |
| // recompile all result specs |
| recompileFlowTable(); |
| } |
| |
| // get current document language from the CAS |
| String documentLanguage = Language.normalize(cas.getDocumentLanguage()); |
| |
| if (mNodeList != null) { |
| // check if another engine is available |
| if (mIndex >= mNodeList.size()) { |
| return new FinalStep(); |
| } else { |
| // get array of ouput capabilities for the current languge from the current result spec |
| TypeOrFeature[] ouputCapabilities = mResultSpec.getResultTypesAndFeatures(documentLanguage); |
| |
| // strip language extension if available |
| int index = documentLanguage.indexOf(LANGUAGE_SEPARATOR); |
| |
| // if country extension was available |
| if (index >= 0) { |
| // create HashSet for outputSpec |
| HashSet outputSpec = new HashSet(); |
| |
| // add language with country extension output capabilities to the outputSpec |
| if (ouputCapabilities.length > 0) { |
| for (int i = 0; i < ouputCapabilities.length; i++) { |
| outputSpec.add(ouputCapabilities[i]); |
| } |
| |
| // get array of output capabilities only for the language without country extension |
| ouputCapabilities = mResultSpec.getResultTypesAndFeatures(documentLanguage.substring(0, |
| index)); |
| |
| // add language output capabilities to the outputSpec |
| for (int i = 0; i < ouputCapabilities.length; i++) { |
| outputSpec.add(ouputCapabilities[i]); |
| } |
| |
| // convert all output capabilities to a outputCapabilities array |
| ouputCapabilities = new TypeOrFeature[outputSpec.size()]; |
| outputSpec.toArray(ouputCapabilities); |
| } else // for language with country extension was noting found |
| { |
| // get array of output capabilities with the new main language without country extension |
| ouputCapabilities = mResultSpec.getResultTypesAndFeatures(documentLanguage.substring(0, |
| index)); |
| } |
| } |
| |
| // current analysis node which contains the current analysis engine |
| AnalysisSequenceCapabilityNode node; |
| |
| // result spec for the current analysis engine |
| ResultSpecification currentAnalysisResultSpec = null; |
| |
| // flag if current analysis engine should be called or not |
| boolean shouldEngineBeCalled = false; |
| |
| // check output capabilites from the current result spec |
| do { |
| // get next analysis engine from the sequence node |
| node = (AnalysisSequenceCapabilityNode) mNodeList.get(mIndex++); |
| |
| // get capability container from the current analysis engine |
| CapabilityContainer capabilityContainer = node.getCapabilityContainer(); |
| |
| // create current analysis result spec without any language information |
| currentAnalysisResultSpec = UIMAFramework.getResourceSpecifierFactory() |
| .createResultSpecification(); |
| |
| // check if engine should be called - loop over all ouput capabilities of the result spec |
| for (int i = 0; i < ouputCapabilities.length; i++) { |
| // check if current ToF can be produced by the current analysis engine |
| if (capabilityContainer.hasOutputTypeOrFeature(ouputCapabilities[i], documentLanguage, |
| true)) { |
| currentAnalysisResultSpec.addResultTypeOrFeature(ouputCapabilities[i]); |
| shouldEngineBeCalled = true; |
| |
| // remove current ToF from the result spec |
| mResultSpec.removeTypeOrFeature(ouputCapabilities[i]); |
| } |
| |
| } |
| // skip engine if not output capability match |
| } while (shouldEngineBeCalled == false && mIndex < mNodeList.size()); |
| |
| // check if current engine should be called |
| if (shouldEngineBeCalled == true) { |
| // set result spec for current analysis engine |
| node.setResultSpec(currentAnalysisResultSpec); |
| |
| // return current analysis engine node |
| return new SimpleStepWithResultSpec(node.getCasProcessorKey(), currentAnalysisResultSpec); |
| } else // no engine left which can be called |
| { |
| return new FinalStep(); |
| } |
| } |
| } else if (mFlowTable != null) { |
| AnalysisSequenceCapabilityNode node = null; |
| |
| // check if document language is included in the flowTable |
| List flow = (List) mFlowTable.get(documentLanguage); |
| |
| if (flow == null) // try to get flow without language extension or with x-unspecified |
| { |
| // strip language extension if available |
| int index = documentLanguage.indexOf(LANGUAGE_SEPARATOR); |
| |
| // if country extension is available |
| if (index >= 0) { |
| // check if document language is included in the flowTable |
| flow = (List) mFlowTable.get(documentLanguage.substring(0, index)); |
| // If the language was not found, use flow for unspecified lang instead. |
| if (flow == null) { |
| flow = (List) mFlowTable.get(UNSPECIFIED_LANGUAGE); |
| } |
| } else // try to get flow for language x-unspecified |
| { |
| flow = (List) mFlowTable.get(UNSPECIFIED_LANGUAGE); |
| } |
| } |
| |
| // if flow is available get next node |
| if (flow != null) { |
| if (flow.size() > mIndex) { |
| node = (AnalysisSequenceCapabilityNode) flow.get(mIndex++); |
| while (node == null && flow.size() > mIndex) { |
| node = (AnalysisSequenceCapabilityNode) flow.get(mIndex++); |
| } |
| } |
| } |
| if (node != null) { |
| return new SimpleStepWithResultSpec(node.getCasProcessorKey(), node.getResultSpec()); |
| } |
| } |
| return new FinalStep(); |
| } |
| |
| /** |
| * Returns a clone of this <code>AnalysisSequence</code>. |
| * |
| * @return a new <code>AnalysisSequence</code> object that is an exact clone of this one. |
| */ |
| public Object clone() { |
| try { |
| return super.clone(); |
| } catch (CloneNotSupportedException e) { |
| return null; |
| } |
| } |
| |
| /** |
| * reset index of the sequence to 0 |
| */ |
| public void resetIndex() { |
| mIndex = 0; |
| } |
| |
| /** |
| * recompiles all result specs in the flow table with the current type system |
| */ |
| protected void recompileFlowTable() { |
| |
| if (mFlowTable != null) { |
| // get all language key from the table |
| Set keys = mFlowTable.keySet(); |
| |
| // loop over all languages |
| Iterator it = keys.iterator(); |
| while (it.hasNext()) { |
| |
| // get sequence for current language |
| List sequence = (List) mFlowTable.get(it.next()); |
| |
| // loop over all nodes in the sequence |
| for (int i = 0; i < sequence.size(); i++) { |
| // get current annotator node |
| AnalysisSequenceCapabilityNode node = (AnalysisSequenceCapabilityNode) sequence.get(i); |
| if (node != null) { |
| // recompile result spec |
| node.getResultSpec().compile(mLastTypeSystem); |
| } |
| } |
| } |
| } |
| } |
| } |