| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.stanbol.enhancer.engines.textannotationnewmodel.impl; |
| |
| import static org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper.getBlob; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_HEAD; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_PREFIX; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_SUFFIX; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_TAIL; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION; |
| |
| import java.io.IOException; |
| import java.util.Collections; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Set; |
| import org.apache.clerezza.commons.rdf.BlankNodeOrIRI; |
| import org.apache.clerezza.commons.rdf.Graph; |
| import org.apache.clerezza.commons.rdf.IRI; |
| import org.apache.clerezza.commons.rdf.Language; |
| import org.apache.clerezza.commons.rdf.Triple; |
| import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl; |
| import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl; |
| |
| |
| import org.apache.clerezza.rdf.core.LiteralFactory; |
| |
| import org.apache.felix.scr.annotations.Component; |
| import org.apache.felix.scr.annotations.ConfigurationPolicy; |
| import org.apache.felix.scr.annotations.Properties; |
| import org.apache.felix.scr.annotations.Property; |
| import org.apache.felix.scr.annotations.Service; |
| import org.apache.stanbol.enhancer.servicesapi.Blob; |
| import org.apache.stanbol.enhancer.servicesapi.ContentItem; |
| import org.apache.stanbol.enhancer.servicesapi.EngineException; |
| import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine; |
| import org.apache.stanbol.enhancer.servicesapi.ServiceProperties; |
| import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper; |
| import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; |
| import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine; |
| import org.osgi.framework.Constants; |
| import org.osgi.service.cm.ConfigurationException; |
| import org.osgi.service.component.ComponentContext; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| |
| @Component(policy = ConfigurationPolicy.OPTIONAL, metatype = true, immediate = true) |
| @Service |
| @Properties(value = { |
| @Property(name = EnhancementEngine.PROPERTY_NAME, value="text-annotation-new-model"), |
| @Property(name = TextAnnotationsNewModelEngine.PROPERTY_PREFIX_SUFFIX_SIZE, |
| intValue=TextAnnotationsNewModelEngine.DEFAULT_PREFIX_SUFFIX_SIZE), |
| @Property(name = Constants.SERVICE_RANKING, intValue=0) |
| }) |
| public class TextAnnotationsNewModelEngine extends AbstractEnhancementEngine<RuntimeException,RuntimeException> |
| implements EnhancementEngine, ServiceProperties { |
| |
| private final Logger log = LoggerFactory.getLogger(TextAnnotationsNewModelEngine.class); |
| |
| public static final String PROPERTY_PREFIX_SUFFIX_SIZE = "enhancer.engines.textannotationnewmodel.prefixSuffixSize"; |
| public static final int DEFAULT_PREFIX_SUFFIX_SIZE = EnhancementEngineHelper.DEFAULT_PREFIX_SUFFIX_LENGTH; |
| |
| // the order in which this engine is executed. |
| public static final Integer ENGINE_ORDER = ServiceProperties.ORDERING_POST_PROCESSING - 20; |
| |
| private static final Set<String> supportedMimeTypes = Collections.singleton("text/plain"); |
| |
| private LiteralFactory lf = LiteralFactory.getInstance(); |
| |
| private int prefixSuffixSize; |
| |
| /** |
| * Get the service properties (basically the engine order). |
| */ |
| @Override |
| public Map<String,Object> getServiceProperties() { |
| return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, |
| (Object) ENGINE_ORDER)); |
| } |
| |
| /** |
| * States whether can enhance the provided ContentItem. |
| */ |
| @Override |
| public int canEnhance(ContentItem contentItem) throws EngineException { |
| if(getBlob(contentItem, supportedMimeTypes) != null){ |
| return ENHANCE_ASYNC; |
| } else { |
| return CANNOT_ENHANCE; |
| } |
| } |
| |
| /** |
| * Computes the enhancements on the provided ContentItem. |
| */ |
| @Override |
| public void computeEnhancements(ContentItem contentItem) throws EngineException { |
| Entry<IRI,Blob> textBlob = getBlob(contentItem, supportedMimeTypes); |
| if(textBlob == null){ |
| return; |
| } |
| String language = EnhancementEngineHelper.getLanguage(contentItem); |
| Language lang = language == null ? null : new Language(language); |
| String text; |
| try { |
| text = ContentItemHelper.getText(textBlob.getValue()); |
| } catch (IOException e) { |
| throw new EngineException(this, contentItem, "Unable to read Plain Text Blob", e); |
| } |
| Set<Triple> addedTriples = new HashSet<Triple>(); |
| Graph metadata = contentItem.getMetadata(); |
| //extract all the necessary information within a read lock |
| contentItem.getLock().readLock().lock(); |
| try { |
| Iterator<Triple> it = metadata.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION); |
| while(it.hasNext()){ |
| BlankNodeOrIRI ta = it.next().getSubject(); |
| boolean hasPrefix = metadata.filter(ta, ENHANCER_SELECTION_PREFIX, null).hasNext(); |
| boolean hasSuffix = metadata.filter(ta, ENHANCER_SELECTION_SUFFIX, null).hasNext(); |
| boolean hasSelected = metadata.filter(ta, ENHANCER_SELECTED_TEXT, null).hasNext(); |
| if(hasPrefix && hasSuffix && hasSelected){ |
| continue; //this TextAnnotation already uses the new model |
| } |
| Integer start; |
| if(!hasPrefix){ |
| start = EnhancementEngineHelper.get(metadata, ta, ENHANCER_START, Integer.class, lf); |
| if(start == null){ |
| log.debug("unable to add fise:selection-prefix to TextAnnotation {} " |
| + "because fise:start is not present",ta); |
| } else if(start < 0){ |
| log.warn("fise:start {} of TextAnnotation {} < 0! " |
| + "Will not transform this TextAnnotation", start, ta); |
| start = 0; |
| } |
| } else { |
| start = null; |
| } |
| Integer end; |
| if(!hasSuffix){ |
| end = EnhancementEngineHelper.get(metadata, ta, ENHANCER_END, Integer.class, lf); |
| if(end == null){ |
| log.debug("unable to add fise:selection-suffix to TextAnnotation {} " |
| + "because fise:end is not present",ta); |
| } else if(end > text.length()) { |
| log.warn("fise:end {} of TextAnnotation {} > as the content length {}! " |
| + "Will not transform this TextAnnotation", |
| end, ta, text.length()); |
| end = null; |
| } else if(start != null && end < start){ |
| log.warn("fise:end {} < fise:start {} of TextAnnotation {}! " |
| + "Will not transform this TextAnnotation", |
| end, start, ta); |
| end = null; |
| start = null; |
| } |
| } else { |
| end = null; |
| } |
| if(!hasPrefix && start != null){ |
| addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_PREFIX, |
| new PlainLiteralImpl(text.substring(Math.max(0,start-prefixSuffixSize), start), lang))); |
| } |
| if(!hasSuffix && end != null){ |
| addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_SUFFIX, |
| new PlainLiteralImpl(text.substring(end,Math.min(text.length(), end+prefixSuffixSize)),lang))); |
| } |
| if(!hasSelected && start != null && end != null){ |
| //This adds missing fise:selected or fise:head/fise:tail if the selected text is to long |
| int length = end - start; |
| if(length > 3*prefixSuffixSize){ //add prefix/suffix |
| addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_HEAD, |
| new PlainLiteralImpl(text.substring(start, start+prefixSuffixSize), lang))); |
| addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_TAIL, |
| new PlainLiteralImpl(text.substring(end-prefixSuffixSize,end),lang))); |
| } else { //add missing fise:selected |
| String selection = text.substring(start, end); |
| addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT, |
| new PlainLiteralImpl(selection,lang))); |
| //check if we should also add an selection context |
| if(!metadata.filter(ta, ENHANCER_SELECTION_CONTEXT, null).hasNext()){ |
| addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT, |
| new PlainLiteralImpl(EnhancementEngineHelper.getSelectionContext(text, selection, start),lang))); |
| } |
| } |
| } |
| } |
| } finally { |
| contentItem.getLock().readLock().unlock(); |
| } |
| //finally write the prefix/suffix triples within a write lock |
| if(!addedTriples.isEmpty()){ |
| contentItem.getLock().writeLock().lock(); |
| try { |
| metadata.addAll(addedTriples); |
| } finally { |
| contentItem.getLock().writeLock().unlock(); |
| } |
| } |
| } |
| |
| @Override |
| protected void activate(ComponentContext ctx) throws ConfigurationException, RuntimeException { |
| super.activate(ctx); |
| Object value = ctx.getProperties().get(PROPERTY_PREFIX_SUFFIX_SIZE); |
| if(value instanceof Number){ |
| prefixSuffixSize = ((Number)value).intValue(); |
| } else if (value != null){ |
| try { |
| prefixSuffixSize = Integer.parseInt(value.toString()); |
| } catch (NumberFormatException e) { |
| throw new ConfigurationException(PROPERTY_PREFIX_SUFFIX_SIZE, "The value MUST be an Integer", e); |
| } |
| } else { |
| prefixSuffixSize = DEFAULT_PREFIX_SUFFIX_SIZE; |
| } |
| if(prefixSuffixSize < EnhancementEngineHelper.MIN_PREFIX_SUFFIX_SIZE){ |
| throw new ConfigurationException(PROPERTY_PREFIX_SUFFIX_SIZE, |
| "The prefixSuffixSize MUST BE >= " + EnhancementEngineHelper.MIN_PREFIX_SUFFIX_SIZE); |
| } |
| } |
| |
| @Override |
| protected void deactivate(ComponentContext ctx) throws RuntimeException { |
| prefixSuffixSize = 0; |
| super.deactivate(ctx); |
| } |
| } |