blob: e39e1c01ccc96c39f616d883aae4c86968931a4f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engine.disambiguation.foaf;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import java.util.SortedMap;
import java.util.SortedSet;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.entityhub.servicesapi.model.Entity;
import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
import org.apache.stanbol.entityhub.servicesapi.site.Site;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* An abstraction of an EntityAnnotation
*
* @author Dileepa Jayakody
*/
public class EntityAnnotation implements Comparable<EntityAnnotation> {
private static final Logger log = LoggerFactory
.getLogger(EntityAnnotation.class);
/**
* foaf:name disambiguation ratio (2.0)
*/
public static final double FOAFNAME_DISAMBIGUATION_RATIO = 2.0;
/**
* URI Reference correlation disambiguation ratio (2.0)
*/
public static final double URI_CORRELATION_DISAMBIGUATION_RATIO = 2.0;
/**
* Default ratio for the original fise:confidence of suggested entities
*/
public static final double ORIGINAL_CONFIDNECE_RATIO = 1.0;
/**
* The weight for foaf:name disambiguation scores
*/
private double foafNameDisambiguationWeight = FOAFNAME_DISAMBIGUATION_RATIO
/ (FOAFNAME_DISAMBIGUATION_RATIO + ORIGINAL_CONFIDNECE_RATIO + URI_CORRELATION_DISAMBIGUATION_RATIO);
/**
* The weight for uri-correlation disambiguation scores
*/
private double uriCorrelationDisambiguationWeight = URI_CORRELATION_DISAMBIGUATION_RATIO
/ (FOAFNAME_DISAMBIGUATION_RATIO + ORIGINAL_CONFIDNECE_RATIO + URI_CORRELATION_DISAMBIGUATION_RATIO);
/**
* The weight for the original confidence scores
*
*/
private double confidenceWeight = ORIGINAL_CONFIDNECE_RATIO
/ (FOAFNAME_DISAMBIGUATION_RATIO + ORIGINAL_CONFIDNECE_RATIO + URI_CORRELATION_DISAMBIGUATION_RATIO);
private static final LiteralFactory lf = LiteralFactory.getInstance();
private static final UriRef ENTITYHUB_SITE = new UriRef(
RdfResourceEnum.site.getUri());
private UriRef uriLink;
private UriRef entityUri;
private Entity entity;
private Double originalConfidence = 0.0;
// private Double entityReferenceDisambiguationScore = 0.0;
private Double foafNameDisambiguationScore = 0.0;
private Double disambiguatedConfidence = 0.0;
private Double entityReferenceDisambiguatedConfidence = 0.0;
private Double foafNameDisambiguatedConfidence = 0.0;
// the score assigned based on the number of uri correlations with other
// entities
private int correlationScore;
// uri-references from this entity
private int referencesFromEntity;
private String site;
private String entityType;
private String entityLabel;
private EntityAnnotation(UriRef entityAnnotation) {
this.uriLink = entityAnnotation;
}
public EntityAnnotation(Entity entity) {
this.entity = entity;
this.entityUri = new UriRef(entity.getId());
this.site = entity.getSite();
}
/**
* Allows to create EntityAnnotations from existing fise:TextAnnotation
* contained in the metadata of the processed {@link ContentItem}
*
* @param graph
* @param uri
* @return EntityAnnotation
*/
public static EntityAnnotation createFromUri(TripleCollection graph,
UriRef uri) {
EntityAnnotation entityAnnotation = new EntityAnnotation(uri);
entityAnnotation.entityUri = EnhancementEngineHelper.getReference(
graph, uri, Properties.ENHANCER_ENTITY_REFERENCE);
if (entityAnnotation.entityUri == null) {
// most likely not a fise:EntityAnnotation
log.debug("Unable to create Suggestion for EntityAnnotation {} "
+ "because property {} is not present", uri,
Properties.ENHANCER_ENTITY_REFERENCE);
return null;
}
entityAnnotation.originalConfidence = EnhancementEngineHelper.get(
graph, uri, Properties.ENHANCER_CONFIDENCE, Double.class, lf);
if (entityAnnotation.originalConfidence == null) {
log.warn("EntityAnnotation {} does not define a value for "
+ "property {}. Will use '0' as fallback", uri,
Properties.ENHANCER_CONFIDENCE);
entityAnnotation.originalConfidence = 0.0;
}
entityAnnotation.site = EnhancementEngineHelper.getString(graph, uri,
ENTITYHUB_SITE);
entityAnnotation.entityType = EnhancementEngineHelper.getString(graph,
uri, Properties.ENHANCER_ENTITY_TYPE);
entityAnnotation.entityLabel = EnhancementEngineHelper.getString(graph,
uri, Properties.ENHANCER_ENTITY_LABEL);
return entityAnnotation;
}
public void calculateDisambiguatedConfidence() {
this.disambiguatedConfidence = (originalConfidence * confidenceWeight)
+ this.foafNameDisambiguatedConfidence
+ this.entityReferenceDisambiguatedConfidence;
}
public void calculateFoafNameDisambiguatedConfidence() {
this.foafNameDisambiguatedConfidence = (foafNameDisambiguationScore * foafNameDisambiguationWeight);
}
/**
* Calculates the disambiguation score obtained for entity's URIReference
* correlations. The score is normalized between [0..1]
*
* @param maximum
* correlation score of entities int max
* @param minimum
* correlation score of entities int min
*/
public void calculateEntityReferenceDisambiguatedConfidence(int max, int min) {
if ((max - min) > 0) {
double normalizedCorrelationScore = (correlationScore - min)
/ (max - min);
this.entityReferenceDisambiguatedConfidence = (normalizedCorrelationScore * uriCorrelationDisambiguationWeight);
}
}
/**
* The URI of the fise:EntityAnnotation representing this suggestion in the
* {@link ContentItem#getMetadata() metadata} of the processed
* {@link ContentItem}. This will be <code>null</code>
*
* @return the URI of the fise:EntityAnnotation or <code>null</code> if not
* present.
*/
public UriRef getUriLink() {
return uriLink;
}
/**
* Allows to set the URI of the fise:EntityAnnotation. This is required if
* the original enhancement structure shared one fise:EntityAnnotation
* instance for two fise:TextAnnotations (e.g. because both TextAnnotations
* had the exact same value for fise:selected-text). After disambiguation it
* is necessary to 'clone' fise:EntityAnnotations like that to give them
* different fise:confidence values. Because of that it is supported to set
* the new URI of the cloned fise:EntityAnnotation.
*
* @param uri
* the uri of the cloned fise:EntityAnnotation
*/
public void setEntityAnnotation(UriRef uri) {
this.uriLink = uri;
}
/**
* The URI of the Entity (MUST NOT be <code>null</code>)
*
* @return the URI
*/
public UriRef getEntityUri() {
return entityUri;
}
/**
* The original confidence of the fise:EntityAnnotation or <code>null</code>
* if not available.
*
* @return
*/
public Double getOriginalConfidnece() {
return originalConfidence;
}
/**
* The {@link Entity} or <code>null</code> if not available. For Suggestions
* that are created based on fise:EntityAnnotations the Entity is not
* available. Entities might be loaded as part of the Disambiguation
* process.
*
* @return the {@link Entity} or <code>null</code> if not available
*/
public Entity getEntity() {
return entity;
}
/**
* The confidence after disambiguation. Will be <code>null</code> at the
* beginning
*
* @return the disambiguated confidence or <code>null</code> if not yet
* disambiguated
*/
public Double getDisambiguatedConfidence() {
return disambiguatedConfidence;
}
/**
* The name of the Entityhub {@link Site} the suggested Entity is managed.
*
* @return the name of the Entityhub {@link Site}
*/
public String getSite() {
return site;
}
public void setEntityType(String entityType) {
this.entityType = entityType;
}
public String getEntityType() {
return entityType;
}
public void setEntityLabel(String entityLabel) {
this.entityLabel = entityLabel;
}
public String getEntityLabel() {
return entityLabel;
}
public void setCorrelationScore(int correlationScore) {
this.correlationScore = correlationScore;
}
/**
* Setter for the confidence after disambiguation
*
* @param disambiguatedConfidence
*/
public void setDisambiguatedConfidence(Double disambiguatedConfidence) {
this.disambiguatedConfidence = disambiguatedConfidence;
}
public void increaseCorrelationScore(int corefEntities) {
this.correlationScore += corefEntities;
}
public int getCorrelationScore() {
return correlationScore;
}
public void setReferencesFromEntity(int linksFromEntity) {
this.referencesFromEntity = linksFromEntity;
}
public int getReferencesFromEntity() {
return referencesFromEntity;
}
public void setFoafNameDisambiguationScore(
Double foafNameDisambiguationScore) {
this.foafNameDisambiguationScore = foafNameDisambiguationScore;
}
public Double getFoafNameDisambiguationScore() {
return foafNameDisambiguationScore;
}
public void setEntityReferenceDisambiguatedConfidence(
Double entityReferenceDisambiguatedConfidence) {
this.entityReferenceDisambiguatedConfidence = entityReferenceDisambiguatedConfidence;
}
public Double getEntityReferenceDisambiguatedConfidence() {
return entityReferenceDisambiguatedConfidence;
}
public void setFoafNameDisambiguatedConfidence(
Double foafNameDisambiguatedConfidence) {
this.foafNameDisambiguatedConfidence = foafNameDisambiguatedConfidence;
}
public Double getFoafNameDisambiguatedConfidence() {
return foafNameDisambiguatedConfidence;
}
@Override
public int hashCode() {
return entityUri.hashCode();
}
@Override
public boolean equals(Object obj) {
return obj instanceof EntityAnnotation
&& ((EntityAnnotation) obj).entityUri.equals(entityUri);
}
/**
* Compares based on the {@link #getDisambiguatedConfidence()} (if present)
* and falls back to the {@link #getOriginalConfidnece()}. If the original
* confidence value is not present or both Suggestions do have the same
* confidence the natural order of the Entities URI is used. This also
* ensures <code>(x.compareTo(y)==0) == (x.equals(y))</code> and allows to
* use this class with {@link SortedMap} and {@link SortedSet}
* implementations.
* <p>
*/
@Override
public int compareTo(EntityAnnotation other) {
int result;
if (disambiguatedConfidence != null
&& other.disambiguatedConfidence != null) {
result = other.disambiguatedConfidence
.compareTo(disambiguatedConfidence);
} else if (other.originalConfidence != null
&& originalConfidence != null) {
result = other.originalConfidence.compareTo(originalConfidence);
} else {
result = 0;
}
// ensure (x.compareTo(y)==0) == (x.equals(y))
return result == 0 ? entityUri.getUnicodeString().compareTo(
other.entityUri.getUnicodeString()) : result;
}
}