blob: fa84495025ee60229357c7f6ffb81c10dd2fbf05 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.uimalocal;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Service;
import org.apache.lucene.analysis.uima.ae.AEProvider;
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
import org.apache.stanbol.commons.caslight.Feature;
import org.apache.stanbol.commons.caslight.FeatureStructure;
import org.apache.stanbol.commons.caslight.FeatureStructureListHolder;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.NoSuchPartException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.TypeDescription;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Mihaly Heder
*/
@Component(immediate = true, metatype = true, inherit = true, label = "UIMA Local Enhancement Engine",
description = "Runs UIMA Analysis Engine and retuns values.")
@Service
@Properties(value = {
@Property(name = EnhancementEngine.PROPERTY_NAME, value = "uimalocal")
})
public class UIMALocal extends AbstractEnhancementEngine<RuntimeException, RuntimeException>
implements EnhancementEngine, ServiceProperties {
private final Logger logger = LoggerFactory.getLogger(getClass());
@Property(value = "sourceName", label = "UIMA source name",
description = "The name of this UIMA source which will be used for referring internally to the UIMA endpoint")
public static final String UIMA_SOURCENAME = "stanbol.engine.uimalocal.sourcename";
@Property(value = "/path/to/descriptor",
label = "UIMA descriptor file path",
description = "The file path to the UIMA descriptor XML to load")
public static final String UIMA_DESCRIPTOR_PATH = "stanbol.engine.uimalocal.descriptorpath";
@Property(value = "uima.apache.org", label = "Content Part URI reference",
description = "The URI Reference of the UIMA content part to be created. This content part will "
+ "contain Annotations from all the resources above.")
public static final String UIMA_CONTENTPART_URIREF = "stanbol.engine.uimalocal.contentpart.uriref";
@Property(cardinality = 1000, value = "text/plain", label = "Supported Mime Types",
description = "Mime Types supported by this client. This should be aligned to the capabilities of the UIMA Endpoints.")
public static final String UIMA_SUPPORTED_MIMETYPES = "stanbol.engine.uimalocal.contentpart.mimetypes";
public static final Integer defaultOrder = ServiceProperties.ORDERING_PRE_PROCESSING;
private AEProvider aeProvider;
private Set<String> SUPPORTED_MIMETYPES;
private String uimaUri;
private String uimaSourceName;
private String uimaDescriptorPath;
private List<String> uimaTypeNames;
@Override
protected void activate(ComponentContext ctx) throws ConfigurationException {
super.activate(ctx);
Dictionary<String, Object> props = ctx.getProperties();
this.uimaUri = (String) props.get(UIMA_CONTENTPART_URIREF);
this.uimaSourceName = (String) props.get(UIMA_SOURCENAME);
this.uimaDescriptorPath = (String) props.get(UIMA_DESCRIPTOR_PATH);
SUPPORTED_MIMETYPES = Collections.unmodifiableSet(new HashSet<String>(
Arrays.asList((String[]) props.get(UIMA_SUPPORTED_MIMETYPES))));
aeProvider = AEProviderFactory.getInstance().getAEProvider(uimaSourceName,
uimaDescriptorPath, new HashMap<String, Object>());
try {
AnalysisEngine ae = aeProvider.getAE();
TypeDescription[] aeTypes = ae.getAnalysisEngineMetaData().getTypeSystem().getTypes();
uimaTypeNames = new ArrayList<String>();
for (TypeDescription aeType : aeTypes) {
String aeTypeName = aeType.getName();
logger.info("Configuring Analysis Engine Type:" + aeTypeName);
uimaTypeNames.add(aeTypeName);
}
} catch (ResourceInitializationException ex) {
logger.error("Cannot retrieve AE from AEProvider. ", ex);
throw new ConfigurationException(uimaDescriptorPath, "Cannot retreive AE from AEProvider", ex);
}
}
@Override
protected void deactivate(ComponentContext ctx) {
super.deactivate(ctx);
}
@Override
public int canEnhance(ContentItem ci) throws EngineException {
if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null) {
return ENHANCE_ASYNC;
}
return CANNOT_ENHANCE;
}
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with an supported Mimetype '"
+ SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri()
+ ": This is also checked in the canEnhance method! -> This "
+ "indicated an Bug in the implementation of the "
+ "EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
JCas jcas;
try {
logger.info("Processing text with UIMA AE...");
jcas = processText(text);
} catch (ResourceInitializationException ex) {
logger.error("Error initializing UIMA AE", ex);
throw new EngineException("Error initializing UIMA AE", ex);
} catch (AnalysisEngineProcessException ex) {
logger.error("Error running UIMA AE", ex);
throw new EngineException("Error running UIMA AE", ex);
}
//just for being sure
if (jcas == null) {
return;
}
for (String typeName : uimaTypeNames) {
List<FeatureStructure> featureSetList = concertToCasLight(jcas, typeName);
IRI uimaIRI = new IRI(uimaUri);
FeatureStructureListHolder holder;
ci.getLock().writeLock().lock();
try {
holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
} catch (NoSuchPartException e) {
holder = new FeatureStructureListHolder();
logger.info("Adding FeatureSet List Holder content part with uri:" + uimaUri);
ci.addPart(uimaIRI, holder);
logger.info(uimaUri + " content part added.");
} finally {
ci.getLock().writeLock().unlock();
}
ci.getLock().writeLock().lock();
try {
holder.addFeatureStructureList(uimaSourceName, featureSetList);
} finally {
ci.getLock().writeLock().unlock();
}
}
}
@Override
public Map<String, Object> getServiceProperties() {
return Collections.unmodifiableMap(Collections.singletonMap(
ENHANCEMENT_ENGINE_ORDERING,
(Object) defaultOrder));
}
/*
* process a field value executing UIMA the CAS containing it as document
* text - From SOLR.
*/
private JCas processText(String textFieldValue) throws ResourceInitializationException,
AnalysisEngineProcessException {
logger.info(new StringBuffer("Analazying text").toString());
/*
* get the UIMA analysis engine
*/
AnalysisEngine ae = aeProvider.getAE();
/*
* create a JCas which contain the text to analyze
*/
JCas jcas = ae.newJCas();
jcas.setDocumentText(textFieldValue);
/*
* perform analysis on text field
*/
ae.process(jcas);
logger.info(new StringBuilder("Text processing completed").toString());
return jcas;
}
private List<FeatureStructure> concertToCasLight(JCas jcas, String typeName) {
List<FeatureStructure> ret = new ArrayList<FeatureStructure>();
Type type = jcas.getTypeSystem().getType(typeName);
List<org.apache.uima.cas.Feature> featList = type.getFeatures();
for (FSIterator<org.apache.uima.cas.FeatureStructure> iterator = jcas.getFSIndexRepository().getAllIndexedFS(type); iterator.hasNext(); ) {
org.apache.uima.cas.FeatureStructure casFs = iterator.next();
logger.debug("Processing UIMA CAS FeatureSet:" + casFs.toString());
FeatureStructure newFs = new FeatureStructure(UUID.randomUUID().toString(), type.getShortName());
for (org.apache.uima.cas.Feature casF : featList) {
String fName = casF.getShortName();
logger.debug("Feature Name:" + fName);
if (casF.getRange().getName().equals("uima.cas.Sofa")) {
continue;
}
if (casF.getRange().isPrimitive()) {
logger.debug("Getting primitive value...");
if (casF.getRange().getName().equals("uima.cas.String")) {
String fVal = casFs.getStringValue(casF);
newFs.addFeature(new Feature<String>(fName, fVal));
} else if (casF.getRange().getName().equals("uima.cas.Integer")) {
int fVal = casFs.getIntValue(casF);
newFs.addFeature(new Feature<Integer>(fName, fVal));
} else if (casF.getRange().getName().equals("uima.cas.Short")) {
short fVal = casFs.getShortValue(casF);
newFs.addFeature(new Feature<Integer>(fName, (int) fVal));
} else if (casF.getRange().getName().equals("uima.cas.Byte")) {
byte fVal = casFs.getByteValue(casF);
newFs.addFeature(new Feature<Integer>(fName, (int) fVal));
} else if (casF.getRange().getName().equals("uima.cas.Double")) {
double fVal = casFs.getDoubleValue(casF);
newFs.addFeature(new Feature<Double>(fName, fVal));
} else if (casF.getRange().getName().equals("uima.cas.Float")) {
float fVal = casFs.getFloatValue(casF);
newFs.addFeature(new Feature<Double>(fName, (double) fVal));
} else {
Object fVal = casFs.clone();
newFs.addFeature(new Feature<Object>(fName, fVal));
}
} else {
logger.debug("Getting FeatureStructure value...");
throw new UnsupportedOperationException("This client cannot handle FeatureStructure features");
}
if (casFs instanceof Annotation && "coveredText".equals(fName)) {
newFs.setCoveredText(((Annotation) casFs).getCoveredText());
}
}
logger.debug("FeatureStructure:" + newFs);
ret.add(newFs);
}
return ret;
}
}