| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.stanbol.enhancer.engines.zemanta.impl; |
| |
| import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTextEnhancement; |
| import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTopicEnhancement; |
| import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getReferences; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.SKOS_CONCEPT; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_CATEGORY; |
| import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Set; |
| |
| import org.apache.clerezza.rdf.core.Literal; |
| import org.apache.clerezza.rdf.core.LiteralFactory; |
| import org.apache.clerezza.rdf.core.MGraph; |
| import org.apache.clerezza.rdf.core.NonLiteral; |
| import org.apache.clerezza.rdf.core.PlainLiteral; |
| import org.apache.clerezza.rdf.core.Triple; |
| import org.apache.clerezza.rdf.core.TripleCollection; |
| import org.apache.clerezza.rdf.core.UriRef; |
| import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl; |
| import org.apache.clerezza.rdf.core.impl.SimpleMGraph; |
| import org.apache.clerezza.rdf.core.impl.TripleImpl; |
| import org.apache.commons.io.IOUtils; |
| import org.apache.felix.scr.annotations.Activate; |
| import org.apache.felix.scr.annotations.Component; |
| import org.apache.felix.scr.annotations.Deactivate; |
| import org.apache.felix.scr.annotations.Properties; |
| import org.apache.felix.scr.annotations.Property; |
| import org.apache.felix.scr.annotations.Reference; |
| import org.apache.felix.scr.annotations.Service; |
| import org.apache.stanbol.commons.stanboltools.offline.OnlineMode; |
| import org.apache.stanbol.enhancer.engines.zemanta.ZemantaOntologyEnum; |
| import org.apache.stanbol.enhancer.servicesapi.Blob; |
| import org.apache.stanbol.enhancer.servicesapi.ContentItem; |
| import org.apache.stanbol.enhancer.servicesapi.EngineException; |
| import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine; |
| import org.apache.stanbol.enhancer.servicesapi.InvalidContentException; |
| import org.apache.stanbol.enhancer.servicesapi.ServiceProperties; |
| import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper; |
| import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; |
| import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine; |
| import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses; |
| import org.osgi.framework.BundleContext; |
| import org.osgi.service.cm.ConfigurationException; |
| import org.osgi.service.component.ComponentContext; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| |
| |
| /** |
| * Apache Stanbol Enhancer Zemanta enhancement engine. |
| * This enhancement engine uses the the Zemanta API for enhancing content. |
| * See http://developer.zemanta.com |
| * To run this engine you need a Zemanta API key configured (see README) |
| * <p> |
| * For detailed information on the mappings of Zemanta annotations to Stanbol |
| * Enhancer enhancements see |
| * <a>http://wiki.iks-project.eu/index.php/ZemantaEnhancementEngine</a> |
| * <p> |
| * This implementation currently only provides Stanbol Enhancer enhancements for |
| * Zemanta Recognitions. |
| * |
| * @author michaelmarth |
| * @author Rupert Westenthaler |
| */ |
| @Component(immediate = true, metatype = true, inherit = true) |
| @Service |
| @Properties(value={ |
| @Property(name=EnhancementEngine.PROPERTY_NAME,value="zemanta") |
| }) |
| public class ZemantaEnhancementEngine |
| extends AbstractEnhancementEngine<IOException,RuntimeException> |
| implements EnhancementEngine, ServiceProperties { |
| |
| @Property |
| public static final String API_KEY_PROPERTY = "org.apache.stanbol.enhancer.engines.zemanta.key"; |
| |
| public static final String DMOZ_BASE_URL = "http://www.dmoz.org/"; |
| public static final String ZEMANTA_DMOZ_PREFIX = "Top/"; |
| |
| protected static final Set<String> SUPPORTED_MIMETYPES = |
| Collections.unmodifiableSet(new HashSet<String>( |
| Arrays.asList("text/plain","text/html"))); |
| |
| /** |
| * The maximal prefix/suffix size used for the selection context. This is |
| * required, because Zemanta does only provide the Anchor text, but not the |
| * exact position within the text. So this engine creates a TextAnnotation |
| * for each occurrence of the Anchor within the text and uses the surrounding |
| * as context. |
| */ |
| private static final int SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50; |
| |
| private static final Logger log = LoggerFactory.getLogger(ZemantaEnhancementEngine.class); |
| |
| /** |
| * The default value for the Execution of this Engine. Currently set to |
| * {@link ServiceProperties#ORDERING_EXTRACTION_ENHANCEMENT} + 10. It should run after Metaxa and LangId. |
| */ |
| public static final Integer defaultOrder = ServiceProperties.ORDERING_EXTRACTION_ENHANCEMENT + 10; |
| |
| private String key; |
| |
| public LiteralFactory literalFactory; |
| |
| protected BundleContext bundleContext; |
| /** |
| * Only activate this engine in online mode |
| */ |
| @SuppressWarnings("unused") |
| @Reference |
| private OnlineMode onlineMode; |
| |
| @Activate |
| protected void activate(ComponentContext ce) throws IOException,ConfigurationException { |
| super.activate(ce); |
| bundleContext = ce.getBundleContext(); |
| key = (String)ce.getProperties().get(API_KEY_PROPERTY); |
| checkConfig(); |
| //init the LiteralFactory |
| literalFactory = LiteralFactory.getInstance(); |
| } |
| |
| @Deactivate |
| protected void deactivate(ComponentContext ce) { |
| super.deactivate(ce); |
| literalFactory = null; |
| key = null; |
| bundleContext = null; |
| } |
| /** |
| * Checks the configuration of the {@link #API_KEY_PROPERTY} |
| * @throws ConfigurationException if the Zemanta key is not configured |
| */ |
| private void checkConfig() throws ConfigurationException { |
| if(key == null || key.trim().length() == 0) { |
| throw new ConfigurationException(API_KEY_PROPERTY,String.format( |
| "%s : please configure a Zemanta key to use this engine (e.g. by" + |
| "using the 'Configuration' tab of the Apache Felix Web Console).", |
| getClass().getSimpleName())); |
| } |
| } |
| |
| public int canEnhance(ContentItem ci) { |
| if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null){ |
| return ENHANCE_ASYNC; //the ZEMANTA engine now supports async processing! |
| } else { |
| return CANNOT_ENHANCE; |
| } |
| } |
| |
| |
| public void computeEnhancements(ContentItem ci) throws EngineException { |
| Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES); |
| if(contentPart == null){ |
| throw new IllegalStateException("No ContentPart with a supported Mime Type" |
| + "found for ContentItem "+ci.getUri()+"(supported: '" |
| + SUPPORTED_MIMETYPES+"') -> this indicates that canEnhance was" |
| + "NOT called and indicates a bug in the used EnhancementJobManager!"); |
| } |
| String text; |
| try { |
| text = ContentItemHelper.getText(contentPart.getValue()); |
| } catch (IOException e) { |
| throw new InvalidContentException(this, ci, e); |
| } |
| if (text.trim().length() == 0) { |
| log.warn("ContentPart {} of ContentItem {} does not contain any text to enhance", |
| contentPart.getKey(),ci.getUri()); |
| return; |
| } |
| MGraph graph = ci.getMetadata(); |
| UriRef ciId = ci.getUri(); |
| //we need to store the results of Zemanta in an temp graph |
| MGraph results = new SimpleMGraph(); |
| ZemantaAPIWrapper zemanta = new ZemantaAPIWrapper(key); |
| try { |
| results.addAll(zemanta.enhance(text)); |
| } catch (IOException e) { |
| throw new EngineException("Unable to get Enhancement from remote Zemanta Service",e); |
| } |
| //now we need to process the results and convert them into the Enhancer |
| //annotation structure |
| ci.getLock().writeLock().lock(); |
| try { |
| processRecognition(results, graph, text, ciId); |
| processCategories(results, graph, ciId); |
| } finally { |
| ci.getLock().writeLock().unlock(); |
| } |
| } |
| public Map<String, Object> getServiceProperties() { |
| // TODO Auto-generated method stub |
| return Collections.unmodifiableMap(Collections.singletonMap( |
| ENHANCEMENT_ENGINE_ORDERING, |
| (Object) defaultOrder)); |
| } |
| |
| protected void processCategories(MGraph results, MGraph enhancements, UriRef ciId) { |
| Iterator<Triple> categories = results.filter(null, RDF_TYPE, ZemantaOntologyEnum.Category.getUri()); |
| //add the root Text annotation as soon as the first TopicAnnotation is added. |
| UriRef textAnnotation = null; |
| while (categories.hasNext()) { |
| NonLiteral category = categories.next().getSubject(); |
| log.debug("process category " + category); |
| Double confidence = parseConfidence(results, category); |
| log.debug(" > confidence :" + confidence); |
| //now we need to follow the Target link |
| UriRef target = EnhancementEngineHelper.getReference(results, category, ZemantaOntologyEnum.target.getUri()); |
| if (target != null) { |
| //first check the used categorisation |
| UriRef categorisationScheme = EnhancementEngineHelper.getReference(results, target, ZemantaOntologyEnum.categorization.getUri()); |
| if (categorisationScheme != null && categorisationScheme.equals(ZemantaOntologyEnum.categorization_DMOZ.getUri())) { |
| String categoryTitle = EnhancementEngineHelper.getString(results, target, ZemantaOntologyEnum.title.getUri()); |
| if (categoryTitle != null) { |
| if(textAnnotation == null){ |
| //this is the first category ... create the TextAnnotation used |
| //to link all fise:TopicAnnotations |
| textAnnotation = createTextEnhancement(enhancements, this, ciId); |
| enhancements.add(new TripleImpl(textAnnotation,DC_TYPE,SKOS_CONCEPT)); |
| } |
| //now write the TopicAnnotation |
| UriRef categoryEnhancement = createTopicEnhancement(enhancements, this, ciId); |
| //make related to the EntityAnnotation |
| enhancements.add(new TripleImpl(categoryEnhancement, DC_RELATION, textAnnotation)); |
| //write the title |
| enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(categoryTitle))); |
| //write the reference |
| if (categoryTitle.startsWith(ZEMANTA_DMOZ_PREFIX)) { |
| enhancements.add( |
| new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_REFERENCE, new UriRef(DMOZ_BASE_URL + categoryTitle.substring(ZEMANTA_DMOZ_PREFIX.length())))); |
| } |
| //write the confidence |
| if (confidence != null) { |
| enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_CONFIDENCE, |
| literalFactory.createTypedLiteral(confidence))); |
| } |
| //we need to write the fise:entity-type |
| //as of STANBOL-617 we use now both the zemanta:Category AND the skos:Concept |
| //type. dc:type is no longer used as this is only used by fise:TextAnnotations |
| // see http://wiki.iks-project.eu/index.php/ZemantaEnhancementEngine#Mapping_of_Categories |
| // for more Information |
| enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_TYPE, SKOS_CONCEPT)); |
| //Use also Zemanta Category as type for the referred Entity |
| enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_TYPE, ZemantaOntologyEnum.Category.getUri())); |
| } else { |
| log.warn("Unable to process category " + category + " because no title is present"); |
| } |
| } else { |
| log.warn("Unable to process category " + category + " because categorisation scheme != DMOZ (" + categorisationScheme + " != " + ZemantaOntologyEnum.categorization_DMOZ.getUri() + ")"); |
| } |
| } else { |
| log.warn("Unable to process category " + category + " because no target node was found"); |
| } |
| } |
| } |
| |
| /** |
| * Processes all Zemanta Recognitions and converts them to the according |
| * FISE enhancements |
| * |
| * @param results the results of the Zemanta enhancement process |
| * @param enhancements the graph containing the current Stanbol Enhancer |
| * enhancements |
| * @param text the content of the content item as string |
| */ |
| protected void processRecognition(MGraph results, MGraph enhancements, String text, UriRef ciId) { |
| Iterator<Triple> recognitions = results.filter(null, RDF_TYPE, ZemantaOntologyEnum.Recognition.getUri()); |
| while (recognitions.hasNext()) { |
| NonLiteral recognition = recognitions.next().getSubject(); |
| log.debug("process recognition " + recognition); |
| //first get everything we need for the textAnnotations |
| Double confidence = parseConfidence(results, recognition); |
| log.debug(" > confidence :" + confidence); |
| String anchor = EnhancementEngineHelper.getString(results, recognition, ZemantaOntologyEnum.anchor.getUri()); |
| log.debug(" > anchor :" + anchor); |
| Collection<NonLiteral> textAnnotations = processTextAnnotation(enhancements, text, ciId, anchor, confidence); |
| log.debug(" > number of textAnnotations :" + textAnnotations.size()); |
| |
| //second we need to create the EntityAnnotation that represent the |
| //recognition |
| NonLiteral object = EnhancementEngineHelper.getReference(results, recognition, ZemantaOntologyEnum.object.getUri()); |
| log.debug(" > object :" + object); |
| //The targets represent the linked entities |
| // ... and yes there can be more of them! |
| //TODO: can we create an EntityAnnotation with several referred entities? |
| // Should we use the owl:sameAs to decide that! |
| Set<UriRef> sameAsSet = new HashSet<UriRef>(); |
| for (Iterator<UriRef> sameAs = getReferences(results, object, ZemantaOntologyEnum.owlSameAs.getUri()); sameAs.hasNext(); sameAsSet.add(sameAs.next())) |
| ; |
| log.debug(" > sameAs :" + sameAsSet); |
| //now parse the targets and look if there are others than the one |
| //merged by using sameAs |
| Iterator<UriRef> targets = EnhancementEngineHelper.getReferences(results, object, ZemantaOntologyEnum.target.getUri()); |
| String title = null; |
| while (targets.hasNext()) { |
| //the entityRef is the URL of the target |
| UriRef entity = targets.next(); |
| log.debug(" - target :" + entity); |
| UriRef targetType = EnhancementEngineHelper.getReference(results, entity, ZemantaOntologyEnum.targetType.getUri()); |
| log.debug(" o type :" + targetType); |
| if (ZemantaOntologyEnum.targetType_RDF.getUri().equals(targetType)) { |
| String targetTitle = EnhancementEngineHelper.getString(results, entity, ZemantaOntologyEnum.title.getUri()); |
| log.debug(" o title :" + targetTitle); |
| if (sameAsSet.contains(entity)) { |
| if (title == null) { |
| title = targetTitle; |
| } else if (!title.equals(targetTitle)) { |
| log.warn("Entities marked with owl:sameAs do use different labels '" + title + "' != '" + targetTitle + "'!"); |
| } //else the same label used by both -> thats expected |
| } else { |
| //maybe we should create an second entityEnhancement, but I think, that such a case should |
| //not happen. So write an warning for now |
| log.warn("Found Target with type RDF, that is not linked with owl:sameAs to the others (this: '" + entity + " | sameAs: " + sameAsSet + ")"); |
| log.warn(" - no Enhancement for " + entity + " will be created"); |
| } |
| } //else -> do not process -> RDF Entities only |
| //TODO: targetTypes are not parsed by Zemanta, therefore we can not set |
| // any entity types! |
| } |
| //create the entityEnhancement |
| UriRef entityEnhancement = EnhancementEngineHelper.createEntityEnhancement(enhancements, this, ciId); |
| if (confidence != null) { |
| enhancements.add( |
| new TripleImpl(entityEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence))); |
| } |
| for (NonLiteral relatedTextAnnotation : textAnnotations) { |
| enhancements.add( |
| new TripleImpl(entityEnhancement, DC_RELATION, relatedTextAnnotation)); |
| } |
| for (UriRef entity : sameAsSet) { |
| enhancements.add( |
| new TripleImpl(entityEnhancement, ENHANCER_ENTITY_REFERENCE, entity)); |
| } |
| enhancements.add( |
| new TripleImpl(entityEnhancement, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(title))); |
| } |
| } |
| |
| /** |
| * Helper method to parse the confidence property for an resource. Zemanta |
| * does not the the xsd data type, because of that we need to parse the |
| * double value based on the string. |
| * |
| * @param tc the graph used to query for confidence value |
| * @param resource the resource holding the confidence property |
| * |
| * @return the confidence of <code>null</code> if no confidence property is |
| * present for the parsed resource of the value can not be converted to a |
| * double value. |
| * @see ZemantaOntologyEnum#confidence |
| */ |
| private static Double parseConfidence(TripleCollection tc, NonLiteral resource) { |
| String confidenceString = EnhancementEngineHelper.getString(tc, resource, ZemantaOntologyEnum.confidence.getUri()); |
| Double confidence; |
| if (confidenceString != null) { |
| try { |
| confidence = Double.valueOf(confidenceString); |
| } catch (NumberFormatException e) { |
| log.warn("Unable to parse Float confidence for Literal value '" + confidenceString + "'"); |
| confidence = null; |
| } |
| } else { |
| confidence = null; |
| } |
| return confidence; |
| } |
| |
| /** |
| * This Methods searches/creates text annotations for anchor points of Zemanta |
| * extractions. |
| * <p> |
| * First this method searches for text annotations that do use the anchor as |
| * selected text. Second it searches for occurrences of the anchor within the |
| * content of the content and checks if there is an text annotation for that |
| * occurrence. If not it creates an new one. |
| * |
| * @param enhancements the graph containing the meta data |
| * @param text the content as string |
| * @param ciId the ID of the content item |
| * @param anchor the anchor text |
| * @param confidence the confidence to be used for newly created text annotations |
| * |
| * @return a collection of all existing/created text annotations for the parsed anchor |
| */ |
| private Collection<NonLiteral> processTextAnnotation(MGraph enhancements, String text, UriRef ciId, String anchor, Double confidence) { |
| Collection<NonLiteral> textAnnotations = new ArrayList<NonLiteral>(); |
| int anchorLength = anchor.length(); |
| Literal anchorLiteral = new PlainLiteralImpl(anchor); |
| //first search for existing TextAnnotations for the anchor |
| Map<Integer, Collection<NonLiteral>> existingTextAnnotationsMap = searchExistingTextAnnotations(enhancements, anchorLiteral); |
| |
| for (int current = text.indexOf(anchor); current >= 0; current = text.indexOf(anchor, current + 1)) { |
| Collection<NonLiteral> existingTextAnnotations = existingTextAnnotationsMap.get(current); |
| if (existingTextAnnotations != null) { |
| //use the existing once |
| textAnnotations.addAll(existingTextAnnotations); |
| } else { |
| //we need to create an new one! |
| UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(enhancements, this, ciId); |
| textAnnotations.add(textAnnotation); |
| //write the selection |
| enhancements.add( |
| new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(current))); |
| enhancements.add( |
| new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(current + anchorLength))); |
| enhancements.add( |
| new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, anchorLiteral)); |
| //extract the selection context |
| int beginPos; |
| if(current <= SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){ |
| beginPos = 0; |
| } else { |
| int start = current-SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE; |
| beginPos = text.indexOf(' ',start); |
| if(beginPos < 0 || beginPos >= current){ //no words |
| beginPos = start; //begin within a word |
| } |
| } |
| int endPos; |
| if(current+anchorLength+SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= text.length()){ |
| endPos = text.length(); |
| } else { |
| int start = current+anchorLength+SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE; |
| endPos = text.lastIndexOf(' ', start); |
| if(endPos <= current+anchorLength){ |
| endPos = start; //end within a word; |
| } |
| } |
| enhancements.add(new TripleImpl(textAnnotation,ENHANCER_SELECTION_CONTEXT,new PlainLiteralImpl(text.substring(beginPos, endPos)))); |
| //TODO: Currently I use the confidence of the extraction, but I think this is more |
| // related to the annotated Entity rather to the selected text. |
| if (confidence != null) { |
| enhancements.add( |
| new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence))); |
| } |
| //TODO: No idea about the type of the Annotation, because we do not have an type of the entity! |
| // One would need to get the types from the referred Source |
| } |
| } |
| return textAnnotations; |
| } |
| |
| /** |
| * Search for existing TextAnnotations for an given selected text and |
| * returns an Map that uses the start position as an key and a list of |
| * text annotations as an value. |
| * |
| * @param enhancements the graph containing the enhancements to be searched |
| * @param anchorLiteral the Literal representing the selected text |
| * |
| * @return Map that uses the start position as an key and a list of |
| * text annotations as an value. |
| */ |
| private Map<Integer, Collection<NonLiteral>> searchExistingTextAnnotations(MGraph enhancements, Literal anchorLiteral) { |
| Iterator<Triple> textAnnotationsIterator = enhancements.filter(null, ENHANCER_SELECTED_TEXT, anchorLiteral); |
| Map<Integer, Collection<NonLiteral>> existingTextAnnotationsMap = new HashMap<Integer, Collection<NonLiteral>>(); |
| while (textAnnotationsIterator.hasNext()) { |
| NonLiteral subject = textAnnotationsIterator.next().getSubject(); |
| //test rdfType |
| if (enhancements.contains(new TripleImpl(subject, RDF_TYPE, ENHANCER_TEXTANNOTATION))) { |
| Integer start = EnhancementEngineHelper.get(enhancements, subject, ENHANCER_START, Integer.class, literalFactory); |
| if (start != null) { |
| Collection<NonLiteral> textAnnotationList = existingTextAnnotationsMap.get(start); |
| if (textAnnotationList == null) { |
| textAnnotationList = new ArrayList<NonLiteral>(); |
| existingTextAnnotationsMap.put(start, textAnnotationList); |
| } |
| textAnnotationList.add(subject); |
| } |
| } |
| } |
| return existingTextAnnotationsMap; |
| } |
| } |