blob: d847b15d54144ee7c16d4faff89b92553bae93e9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.entitycomention;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.CASE_SENSITIVE;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_CASE_SENSITIVE_MATCHING_STATE;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.DEFAULT_PROCESS_ONLY_PROPER_NOUNS_STATE;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.PROCESSED_LANGUAGES;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.PROCESS_ONLY_PROPER_NOUNS_STATE;
import static org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText;
import static org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;
import static org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.PROPERTY_NAME;
import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getReferences;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CONTRIBUTOR;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
import static org.osgi.framework.Constants.SERVICE_RANKING;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.clerezza.commons.rdf.Language;
import org.apache.clerezza.commons.rdf.Literal;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.BlankNodeOrIRI;
import org.apache.clerezza.commons.rdf.RDFTerm;
import org.apache.clerezza.commons.rdf.Triple;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl;
import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl;
import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.ConfigurationPolicy;
import org.apache.felix.scr.annotations.Deactivate;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
import org.apache.stanbol.enhancer.engines.entitycomention.impl.ContentItemMentionBuilder;
import org.apache.stanbol.enhancer.engines.entitylinking.Entity;
import org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException;
import org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer;
import org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig;
import org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.RedirectProcessingMode;
import org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig;
import org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig;
import org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker;
import org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity;
import org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence;
import org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.osgi.framework.BundleContext;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.osgi.util.tracker.ServiceTracker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The Entity Co-Mentiaon Engine builds a local knowledge base already extracted
* <code>fise:TextAnnotation</code>s and suggested
* <code>fise:EntityAnnotation</code>s. This information are then used to perform
* an entity linking process. By doing so this engine will be able to detect
* Co-Mentions of Entities within the processed document. <p>
*
*
*
* @author Rupert Westenthaler
*
*/
@Component(
configurationFactory = true,
policy = ConfigurationPolicy.REQUIRE, // the baseUri is required!
specVersion = "1.1",
metatype = true,
immediate = true,
inherit = true)
@org.apache.felix.scr.annotations.Properties(value={
@Property(name=PROPERTY_NAME),
@Property(name=CASE_SENSITIVE,boolValue=DEFAULT_CASE_SENSITIVE_MATCHING_STATE),
@Property(name=PROCESS_ONLY_PROPER_NOUNS_STATE, boolValue=DEFAULT_PROCESS_ONLY_PROPER_NOUNS_STATE),
@Property(name=PROCESSED_LANGUAGES,
cardinality=Integer.MAX_VALUE,
value={"*;lmmtip;uc=LINK;prop=0.75;pprob=0.75", // link multiple matchable tokens in chunks; link upper case words
"de;uc=MATCH", //in German all Nouns are upper case
"es;lc=Noun", //the OpenNLP POS tagger for Spanish does not support ProperNouns
"nl;lc=Noun"}), //same for Dutch
@Property(name=EntityCoMentionEngine.ADJUST_EXISTING_SUGGESTION_CONFIDENCE,
doubleValue=EntityCoMentionEngine.DEFAULT_CONFIDENCE_ADJUSTEMENT),
@Property(name=SERVICE_RANKING,intValue=0)
})
@Service(value=EnhancementEngine.class)
public class EntityCoMentionEngine extends AbstractEnhancementEngine<RuntimeException,RuntimeException> implements ServiceProperties {
/**
* Property used to configure if/how confidence values of existing suggestions
* are modified if a co-mention is detected for a fise:TextAnnotation.<p>
* Values MUST be in the range [0..1) the
* {@link #DEFAULT_CONFIDENCE_ADJUSTEMENT default} is <code>0.33</code> <p>
* Added with <a href="https://issues.apache.org/jira/browse/STANBOL-1219">STANBOL-1219</a>
*/
public static final String ADJUST_EXISTING_SUGGESTION_CONFIDENCE = "enhancer.engines.comention.adjustExistingConfidence";
/**
* Default value for {@link #ADJUST_EXISTING_SUGGESTION_CONFIDENCE}
*/
public static final double DEFAULT_CONFIDENCE_ADJUSTEMENT = 0.33;
/**
* first of the post processing engines (note STANBOL-1218)
*/
private static final Integer ENGINE_ORDERING = ServiceProperties.ORDERING_POST_PROCESSING + 80;
private static final Map<String,Object> SERVICE_PROPERTIES =
Collections.unmodifiableMap(Collections.singletonMap(
ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
(Object)ENGINE_ORDERING));
private final Logger log = LoggerFactory.getLogger(EntityCoMentionEngine.class);
private final LiteralFactory literalFactory = LiteralFactory.getInstance();
@Reference
protected NamespacePrefixService prefixService;
private ServiceTracker labelTokenizerTracker;
private double confidenceAdjustmentFactor;
// private BundleContext bundleContext;
/**
* EntityLinking configuration used for Co-Mention extractions
*/
private EntityLinkerConfig linkerConfig;
/**
* TextProcessingConfig used for Co-Mention extraction
*/
private TextProcessingConfig textProcessingConfig;
/**
* Default constructor as used by OSGI. This expects that
* {@link #activate(ComponentContext)} is called before usage
*/
public EntityCoMentionEngine() {
}
@Activate
@SuppressWarnings("unchecked")
protected void activate(ComponentContext ctx) throws ConfigurationException {
super.activate(ctx);
log.info("activate {}[name:{}]",getClass().getSimpleName(),getName());
Dictionary<String,Object> properties = ctx.getProperties();
// bundleContext = ctx.getBundleContext();
//extract TextProcessing and EnityLinking config from the provided properties
textProcessingConfig = TextProcessingConfig.createInstance(properties);
linkerConfig = EntityLinkerConfig.createInstance(properties,prefixService);
//some of the confiugration is predefined
linkerConfig.setNameField(CoMentionConstants.CO_MENTION_LABEL_FIELD);
linkerConfig.setTypeField(CoMentionConstants.CO_MENTION_TYPE_FIELD);
linkerConfig.setMaxSuggestions(5); //there should not be more as 5 suggestions
linkerConfig.setMinFoundTokens(1); //a single token is enough
linkerConfig.setMinLabelScore(0.24); //1/4 of the tokens
linkerConfig.setMinMatchScore( //labelScore * token match factor
linkerConfig.getMinLabelScore()*linkerConfig.getMinTokenMatchFactor());
linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.IGNORE);
//remove all type mappings
linkerConfig.setDefaultDcType(null);
Set<IRI> mappedUris = new HashSet<IRI>(linkerConfig.getTypeMappings().keySet());
for(IRI mappedUri : mappedUris){
linkerConfig.setTypeMapping(mappedUri.getUnicodeString(), null);
}
//parse confidence adjustment value (STANBOL-1219)
Object value = properties.get(ADJUST_EXISTING_SUGGESTION_CONFIDENCE);
final double confidenceAdjustment;
if(value == null){
confidenceAdjustment = DEFAULT_CONFIDENCE_ADJUSTEMENT;
} else if(value instanceof Number){
confidenceAdjustment = ((Number)value).doubleValue();
} else {
try {
confidenceAdjustment = Double.parseDouble(value.toString());
} catch (NumberFormatException e){
throw new ConfigurationException(ADJUST_EXISTING_SUGGESTION_CONFIDENCE,
"The confidence adjustement value for existing suggestions "
+ "MUST BE a double value in the range [0..1)", e);
}
}
if(confidenceAdjustment < 0 || confidenceAdjustment >= 1){
throw new ConfigurationException(ADJUST_EXISTING_SUGGESTION_CONFIDENCE,
"The confidence adjustement value for existing suggestions "
+ "MUST BE a double value in the range [0..1) (parsed: "
+ confidenceAdjustment +")!");
}
confidenceAdjustmentFactor = 1 - confidenceAdjustment;
//get the metadata later set to the enhancement engine
final BundleContext bc = ctx.getBundleContext();
labelTokenizerTracker = new ServiceTracker(bc, LabelTokenizer.class.getName(), null);
labelTokenizerTracker.open();
}
/**
* Deactivates this components.
*/
@Deactivate
protected void deactivate(ComponentContext ctx) {
log.info("deactivate {}[name:{}]",getClass().getSimpleName(),getName());
textProcessingConfig = null;
linkerConfig = null;
if(labelTokenizerTracker != null){
labelTokenizerTracker.close();
labelTokenizerTracker = null;
}
super.deactivate(ctx);
}
@Override
public int canEnhance(ContentItem ci) throws EngineException {
String language = getLanguage(this, ci, false);
if(language == null || textProcessingConfig.getConfiguration(language) == null){
log.debug("Engine {} ignores ContentItem {} becuase language {} is not condigured.",
new Object[]{ getName(), ci.getUri(), language});
return CANNOT_ENHANCE;
}
//we need a detected language, the AnalyzedText contentPart with Tokens.
AnalysedText at = getAnalysedText(this, ci, false);
return at != null && at.getTokens().hasNext() ?
ENHANCE_ASYNC : CANNOT_ENHANCE;
}
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText at = getAnalysedText(this, ci, true);
String language = getLanguage(this, ci, true);
LanguageProcessingConfig languageConfig = textProcessingConfig.getConfiguration(language);
if(languageConfig == null){
throw new IllegalStateException("The language '"+language+"' is not configured "
+ "to be processed by this Engine. As this is already checked within the "
+ "canEnhance(..) method this may indicate an bug in the used "
+ "EnhanceemntJobManager implementation!");
}
if(log.isDebugEnabled()){
log.debug("compute co-mentions for ContentItem {} language {} text={}",
new Object []{ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100)});
}
LabelTokenizer labelTokenizer = (LabelTokenizer)labelTokenizerTracker.getService();
if(labelTokenizer == null){
throw new EngineException(this, ci, "No LabelTokenizer available!",null);
}
//create the in-memory database for the mentioned Entities
ContentItemMentionBuilder entityMentionIndex = new ContentItemMentionBuilder(
labelTokenizer, language, linkerConfig.getDefaultLanguage());
Graph metadata = ci.getMetadata();
Set<IRI> textAnnotations = new HashSet<IRI>();
ci.getLock().readLock().lock();
try { //iterate over all TextAnnotations (mentions of Entities)
for(Iterator<Triple> it = metadata.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION); it.hasNext();){
IRI ta = (IRI)it.next().getSubject();
entityMentionIndex.registerTextAnnotation(ta, metadata);
textAnnotations.add(ta); //store the registered text annotations
}
} finally {
ci.getLock().readLock().unlock();
}
EntityLinker entityLinker = new EntityLinker(at,language,
languageConfig, entityMentionIndex, linkerConfig, labelTokenizer ,entityMentionIndex);
//process
try {
entityLinker.process();
} catch (EntitySearcherException e) {
log.error("Unable to link Entities with "+entityLinker,e);
throw new EngineException(this, ci, "Unable to link Entities with "+entityLinker, e);
}
//TODO: write results
ci.getLock().writeLock().lock();
try {
writeComentions(ci,entityLinker.getLinkedEntities().values(), language, textAnnotations);
} finally {
ci.getLock().writeLock().unlock();
}
}
private void writeComentions(ContentItem ci,Collection<LinkedEntity> comentions, String language,
Set<IRI> textAnnotations) {
Language languageObject = null;
if(language != null && !language.isEmpty()){
languageObject = new Language(language);
}
Graph metadata = ci.getMetadata();
//we MUST adjust the confidence level of existing annotations only once
//se we need to keep track of those
Set<BlankNodeOrIRI> adjustedSuggestions = new HashSet<BlankNodeOrIRI>();
log.debug("Write Co-Mentions:");
for(LinkedEntity comention : comentions){
log.debug(" > {}",comention);
//URIs of TextAnnotations for the initial mention of this co-mention
Collection<IRI> initialMentions = new ArrayList<IRI>(comention.getSuggestions().size());
for(Suggestion suggestion : comention.getSuggestions()){
Entity entity = suggestion.getEntity();
if(textAnnotations.contains(entity.getUri())){
// if(entity.getData().filter(entity.getUri(),RDF_TYPE,ENHANCER_TEXTANNOTATION).hasNext()){
//this is a textAnnotation
initialMentions.add(entity.getUri());
} //else TODO support also Entities!!
}
//create the TextAnnotations for the co-mention
for(Occurrence occurrence : comention.getOccurrences()){
Literal startLiteral = literalFactory.createTypedLiteral(occurrence.getStart());
Literal endLiteral = literalFactory.createTypedLiteral(occurrence.getEnd());
//search for existing text annotation
boolean ignore = false;
//search for textAnnotations with the same end
IRI textAnnotation = null;
Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
while(it.hasNext()){
Triple t = it.next();
Integer end = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_END, Integer.class, literalFactory);
if(end != null && textAnnotations.contains(t.getSubject())){
//metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
textAnnotation = (IRI)t.getSubject();
if(end > occurrence.getEnd()){
// there is an other TextAnnotation selecting a bigger Span
//so we should ignore this Occurrence
ignore = true;
}
}
}
it = metadata.filter(null, ENHANCER_END, endLiteral);
while(it.hasNext()){
Triple t = it.next();
Integer start = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_START, Integer.class, literalFactory);
if(start != null && textAnnotations.contains(t.getSubject())){
//metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
textAnnotation = (IRI)t.getSubject();
if(start < occurrence.getStart()){
// there is an other TextAnnotation selecting a bigger Span
//so we should ignore this Occurrence
ignore = true;
}
}
}
if(!ignore){
//collect confidence values of co-mentions
Double maxConfidence = null; //maximum confidence of suggestions of the initial mention
Double maxExistingConfidence = null; //maximum confidence of existing suggestions
if(textAnnotation == null){ //not found ... create a new TextAnnotation for the co-mention
textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
textAnnotations.add(textAnnotation); //add it to the set of TextAnnotations
metadata.add(new TripleImpl(textAnnotation,
Properties.ENHANCER_START,
startLiteral));
metadata.add(new TripleImpl(textAnnotation,
Properties.ENHANCER_END,
endLiteral));
metadata.add(new TripleImpl(textAnnotation,
Properties.ENHANCER_SELECTION_CONTEXT,
new PlainLiteralImpl(occurrence.getContext(),languageObject)));
metadata.add(new TripleImpl(textAnnotation,
Properties.ENHANCER_SELECTED_TEXT,
new PlainLiteralImpl(occurrence.getSelectedText(),languageObject)));
} else { //if existing add this engine as contributor
metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR,
new PlainLiteralImpl(this.getClass().getName())));
//maxConfidence = EnhancementEngineHelper.get(metadata, textAnnotation,
// ENHANCER_CONFIDENCE, Double.class, literalFactory);
}
//now process initial mention(s) for the co-mention
Set<IRI> dcTypes = new HashSet<IRI>();
for(IRI initialMention : initialMentions){
//get the dc:type(s) of the initial mentions
Iterator<IRI> dcTypesIt = getReferences(metadata, initialMention, DC_TYPE);
while(dcTypesIt.hasNext()){
dcTypes.add(dcTypesIt.next());
}
//check confidence of the initial mention (fise:TextAnnotation)
Double confidnece = EnhancementEngineHelper.get(metadata, initialMention,
ENHANCER_CONFIDENCE, Double.class, literalFactory);
if(confidnece != null){
if(maxConfidence == null){
maxConfidence = confidnece;
} else if(maxConfidence.compareTo(confidnece) <= 0){
maxConfidence = confidnece;
}
} //else nothing to do
//now we need to compare the suggestions of the initial
//mention(s) with the existing one.
//Get information about the suggestions of the initial mention
Map<RDFTerm,Double> initialSuggestions = new HashMap<RDFTerm,Double>();
Map<RDFTerm, RDFTerm> initialSuggestedEntities = new HashMap<RDFTerm,RDFTerm>();
for(Iterator<Triple> suggestions = metadata.filter(null, DC_RELATION, initialMention); suggestions.hasNext();){
if(!textAnnotations.contains(suggestions)) {
BlankNodeOrIRI suggestion = suggestions.next().getSubject();
RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, suggestion, ENHANCER_ENTITY_REFERENCE);
if(suggestedEntity != null){ //it has a suggestion
Double confidence = EnhancementEngineHelper.get(
metadata, suggestion, ENHANCER_CONFIDENCE, Double.class, literalFactory);
if(maxConfidence == null){
maxConfidence = confidence;
} else if(confidnece != null &&
maxConfidence.compareTo(confidnece) <= 0){
maxConfidence = confidnece;
} //else nothing to do
initialSuggestions.put(suggestion,confidence);
initialSuggestedEntities.put(suggestedEntity, suggestion);
} //no suggestion (dc:relation to some other resource)
} // else ignore dc:relation to other fise:TextAnnotations
}
//now we collect existing Suggestions for this TextAnnoation where we need
//to adjust the confidence (quite some things to check ....)
Map<BlankNodeOrIRI, Double> existingSuggestions = new HashMap<BlankNodeOrIRI,Double>();
if(maxConfidence != null && confidenceAdjustmentFactor < 1){
//suggestions are defined by incoming dc:releation
for(Iterator<Triple> esIt = metadata.filter(null, DC_RELATION, textAnnotation);esIt.hasNext();){
BlankNodeOrIRI existingSuggestion = esIt.next().getSubject();
//but not all of them are suggestions
if(!textAnnotations.contains(existingSuggestion)) { //ignore fise:TextAnnotations
Double existingConfidence = EnhancementEngineHelper.get(metadata, existingSuggestion,
ENHANCER_CONFIDENCE, Double.class, literalFactory);
//ignore fise:TextAnnotations also suggested for the initial mention
if(!initialSuggestions.containsKey(existingSuggestion)){
RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, existingSuggestion, ENHANCER_ENTITY_REFERENCE);
//we might also have different fise:TextAnnotations that
//fise:entity-reference to an Entity present in the
//suggestions for the initial mention
if(!initialSuggestedEntities.containsKey(suggestedEntity)){
//finally make sure that we adjust confidences only once
if(!adjustedSuggestions.contains(existingSuggestion)){
existingSuggestions.put(existingSuggestion, existingConfidence);
} //else confidence already adjusted
} else { // different fise:EntityAnnotation, but same reference Entity
//we need to check confidences to decide what to do
RDFTerm initialSuggestion = initialSuggestedEntities.get(suggestedEntity);
Double initialConfidence = initialSuggestions.get(initialSuggestion);
if(initialConfidence == null || (existingConfidence != null &&
existingConfidence.compareTo(initialConfidence) >= 0)){
//existing confidence >= initial .. keep existing
initialSuggestions.remove(initialSuggestion);
if(maxExistingConfidence == null){
maxExistingConfidence = existingConfidence;
} else if(maxExistingConfidence.compareTo(existingConfidence) <= 0){
maxExistingConfidence = existingConfidence;
}
} else { //initial has higher confidence
//adjust this one (if not yet adjusted)
if(!adjustedSuggestions.contains(existingSuggestion)){
existingSuggestions.put(existingSuggestion, existingConfidence);
}
}
}
} else { //a initial mention already present
//no need to process initial mention
initialSuggestions.remove(existingSuggestion);
if(maxExistingConfidence == null){
maxExistingConfidence = existingConfidence;
} else if(existingConfidence != null &&
maxExistingConfidence.compareTo(existingConfidence) <= 0){
maxExistingConfidence = existingConfidence;
} //else maxExistingConfidence == null (undefined)
}
} //else ignore dc:relations to other fise:TextAnnotations
}
for(Entry<BlankNodeOrIRI,Double> entry : existingSuggestions.entrySet()){
if(entry.getValue() != null){
double adjustedConfidence = entry.getValue() * confidenceAdjustmentFactor;
if(maxExistingConfidence == null || adjustedConfidence > maxExistingConfidence){
maxExistingConfidence = adjustedConfidence;
}
EnhancementEngineHelper.set(metadata, entry.getKey(),
ENHANCER_CONFIDENCE, adjustedConfidence, literalFactory);
adjustedSuggestions.add(entry.getKey()); //mark as adjusted
}
}
}
//add the suggestions of the initial mention to this one
for(RDFTerm suggestion : initialSuggestions.keySet()){
metadata.add(new TripleImpl((BlankNodeOrIRI)suggestion, DC_RELATION, textAnnotation));
}
//finally link the co-mentation with the initial one
metadata.add(new TripleImpl(textAnnotation, DC_RELATION, initialMention));
//metadata.add(new TripleImpl(initialMention, DC_RELATION, textAnnotation));
}
// Adapt the dc:type values of the fise:TextAnnotation
// - if Suggestions added by this engine do have the max confidence
// use the dc:type values of the initial mention
// - if the original suggestions do have a higher confidence keep the
// existing
// - in case both do have the same confidence we add all dc:types
boolean removeExistingDcTypes = maxConfidence != null && (maxExistingConfidence == null ||
maxConfidence.compareTo(maxExistingConfidence) >= 0);
boolean addCoMentionDcTypes = maxExistingConfidence == null ||
(maxConfidence != null && maxConfidence.compareTo(maxExistingConfidence) >= 1);
Iterator<IRI> existingDcTypesIt = getReferences(metadata, textAnnotation, DC_TYPE);
while(existingDcTypesIt.hasNext()){ //do not add existing
//remove dc:type triples if they are not re-added later and
//removeExistingDcTypes == true
if((!dcTypes.remove(existingDcTypesIt.next()) || !addCoMentionDcTypes )
&& removeExistingDcTypes){
existingDcTypesIt.remove(); //remove the dcType
}
}
if(addCoMentionDcTypes){
for(IRI dcType : dcTypes){ //add missing
metadata.add(new TripleImpl(textAnnotation, DC_TYPE, dcType));
}
}
//TODO: support also Entities
if(maxConfidence != null){ //set the confidence value (if known)
EnhancementEngineHelper.set(metadata, textAnnotation, ENHANCER_CONFIDENCE, maxConfidence, literalFactory);
}
} //else ignore this occurence
}
}
}
@Override
public Map<String,Object> getServiceProperties() {
return SERVICE_PROPERTIES;
}
}