blob: 88c83703838ddc491c0e49641c5d11b330e511b8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
*/
package org.apache.stanbol.enhancer.engines.entitylinking.impl;
import static org.apache.stanbol.enhancer.engines.entitylinking.impl.LabelMatch.DEFAULT_LABEL_TOKEN_COMPARATOR;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.clerezza.commons.rdf.Language;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.clerezza.commons.rdf.Literal;
import org.apache.stanbol.enhancer.engines.entitylinking.Entity;
import org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcher;
/**
* A suggestion of an entity in the {@link EntitySearcher} for a part of the
* text. This class does not include the actual position within the Text,
* because it is intended to be used in combination with {@link LinkedEntity}.<p>
* This class also manages redirected entities and a state if redirects where
* already processed for this suggestion.<p>
* In addition this class also defines a set of {@link Comparator}s that are
* used to sort suggestions base on how well the fit the text.
* @author Rupert Westenthaler
*
*/
public class Suggestion {
private List<LabelMatch> labelMatches = new ArrayList<LabelMatch>();
private boolean labelMatchesSorted = true;
private final Entity entity;
private Entity redirectsTo;
private boolean redirectProcessed;
private double score;
/**
* used to allow overriding the MATCH of this suggestion
*/
private MATCH match;
public static enum MATCH {
/**
* No match (to less tokens, wrong oder ...)
*/
NONE,
/**
* Not all tokens but sufficient to suggest (with lower score)
*/
PARTIAL,
/**
* All requested Tokens match, but it is no exact match e.g. because
* the label defines some additional tokens
*/
FULL,
/**
* The label of the suggested Entity is exactly the requested string
*/
EXACT,
}
protected Suggestion(Entity entity){
if(entity == null){
throw new IllegalArgumentException("The parsed Result MUST NOT be NULL!");
}
this.entity = entity;
//TODO Do no longer use the resultScore as the score. We need to provide an
//own algorithm to calculate scores!
// this.resultScore = result.getFirst(RdfResourceEnum.resultScore.getUri(), Float.class);
}
/**
* Adds an new LabelMatch to this suggestion
* @param labelMatch the labelMatch
*/
public void addLabelMatch(LabelMatch labelMatch){
if(labelMatch == null || labelMatch.getMatch() == MATCH.NONE){
return; //ignore null an MATCH.NONE entries
}
labelMatches.add(labelMatch);
if(labelMatches.size() > 1){
labelMatchesSorted = false;
}
}
/**
* Getter for the best label in the given language
* @param suggestion the suggestion
* @param nameField the field used to search for labels
* @param language the language
* @return the best match or {@link Suggestion#getMatchedLabel()} if non is found
*/
public Literal getBestLabel(IRI nameField, String language){
Entity rep = getEntity();
//start with the matched label -> so if we do not find a better one
//we will use the matched!
Literal matchedLabel = getMatchedLabel();
Literal label = matchedLabel;
// 1. check if the returned Entity does has a label -> if not return null
// add labels (set only a single label. Use "en" if available!
Iterator<Literal> labels = rep.getText(nameField);
boolean matchFound = false;
while (labels.hasNext() && !matchFound) {
Literal actLabel = labels.next();
if(label == null){
label = actLabel;
}
//now we have already a label check the language
Language actLang = actLabel.getLanguage();
//use startWith to match also en-GB and en-US ...
if (actLang != null && actLang.toString().startsWith(language)) {
//prefer labels with the correct language
label = actLabel;
if(matchedLabel != null && matchedLabel.getLexicalForm().equalsIgnoreCase(label.getLexicalForm())){
//found label in that language that exactly matches the
//label used to match the text
matchFound = true;
}
}
}
return label;
}
/**
* Shorthand for {@link #getLabelMatch()}.getMatchedLabel()
* @return the label or <code>null</code> if {@link MATCH#NONE}
*/
public Literal getMatchedLabel() {
return getLabelMatch().getMatchedLabel();
}
protected void setMatch(MATCH matchType) {
this.match = matchType;
}
/**
* Getter for the {@link MATCH}. If not manually set
* this forwards to {@link #getLabelMatch()}.getMatch()
* @return the {@link MATCH} of this suggestion
*/
public MATCH getMatch() {
return match != null ? match : getLabelMatch().getMatch();
}
public final Entity getResult(){
return entity;
}
/**
* Getter for the EntityRank of the suggested Entity. In case of a
* redirected Entity it will return the maximum value
*/
public Float getEntityRank() {
final Float ranking = entity.getEntityRanking();
final Float rdRanking = redirectsTo == null ? null : redirectsTo.getEntityRanking();
return rdRanking != null ?
ranking == null || rdRanking.compareTo(ranking) > 0 ?
rdRanking : ranking : ranking;
}
/**
* @param score the score to set
*/
public void setScore(double score) {
this.score = score;
}
/**
* @return the score
*/
public double getScore() {
return score;
}
/**
* Returns <code>true</code> if the result has a registered redirect
* @return <code>true</code> if a redirect is present. Otherwise <code>false</code>
*/
public boolean isRedirect(){
return redirectsTo != null;
}
/**
* Setter for Entity the {@link #getResult() result} of this match redirects
* to. Also sets {@link #setRedirectProcessed(boolean)} to <code>true</code>
* @param redirect the redirected entity or <code>null</code> if no redirect
* is present
*/
protected void setRedirect(Entity redirect){
this.redirectsTo = redirect;
setRedirectProcessed(true);
}
/**
* Setter for the state if the redirects for this resultMatch where already
* processed. Calling {@link #setRedirect(Representation)} will set this
* automatically to <code>true</code>
* @param state the state.
*/
protected void setRedirectProcessed(boolean state){
this.redirectProcessed = state;
}
/**
* Getter for the state if the redirect was processed for this ResultMatch
* @return the state
*/
protected boolean isRedirectedProcessed(){
return redirectProcessed;
}
/**
* Getter for the Entity the {@link #getResult()} of this Entity redirects
* to. Returns <code>null</code> if there is no redirect.
* @return the entity the {@link #getResult()} redirects to or <code>null</code>
* if there is no redirect
*/
public Entity getRedirect(){
return redirectsTo;
}
/**
* getter for the Representation of this result. In case of
* <code>{@link #isRedirect()} == true</code> it returns the the
* {@link #getRedirect()} otherwise it returns the {@link #getResult()}.<p>
* To check explicitly for the result of the redirect one needs to use
* {@link #getRedirect()} and {@link #getRedirect()} instead.
* @return The representation for this match. might be directly the
* {@link #getResult() result} or if present the
* {@link #getRedirect() redirected} resource.
*/
public final Entity getEntity(){
return redirectsTo == null ? entity : redirectsTo;
}
/**
* Getter for the top ranked LabelMatch.
* @return the top ranked {@link LabelMatch} or {@link LabelMatch#NONE}
* if no match is present.
*/
public final LabelMatch getLabelMatch(){
if(!labelMatchesSorted){
Collections.sort(labelMatches, LabelMatch.DEFAULT_LABEL_TOKEN_COMPARATOR);
}
return labelMatches.isEmpty() ? LabelMatch.NONE : labelMatches.get(0);
}
/**
* Getter for the sorted list with all {@link LabelMatch}s of this Suggestion
* @return the sorted LabelMatches. Guaranteed NOT <code>null</code> and
* NOT empty. In case no match is present a singleton list containing
* {@link LabelMatch#NONE} is returned.
*/
public final List<LabelMatch> getLabelMatches(){
if(!labelMatchesSorted){
Collections.sort(labelMatches, LabelMatch.DEFAULT_LABEL_TOKEN_COMPARATOR);
}
if(labelMatches.isEmpty()){
return Collections.singletonList(LabelMatch.NONE);
} else {
return labelMatches;
}
}
@Override
public String toString() {
return labelMatches.isEmpty() ? "no match" :labelMatches.get(0)
+ " for "+entity.getId()
+ (redirectsTo != null ? " (redirects: "+redirectsTo.getId()+") " : "")
+ " ranking: "+getEntityRank();
}
/**
* Compares {@link Suggestion}s based on the {@link Suggestion#getScore()}.
* In case the scores are equals the call is forwarded to the
* {@link Suggestion#DEFAULT_LABEL_TOKEN_COMPARATOR}.<p>
* This is NOT the default {@link Comparator} because score values are
* usually only calculated relative to the best matching suggestions and
* therefore only available later.
*/
public static final Comparator<Suggestion> SCORE_COMPARATOR = new Comparator<Suggestion>() {
@Override
public int compare(Suggestion arg0, Suggestion arg1) {
return arg0.getScore() > arg1.getScore() ? -1 : //bigger score first
arg0.getScore() < arg1.getScore() ? 1 :
DEFAULT_LABEL_TOKEN_COMPARATOR.compare(arg0.getLabelMatch(), arg1.getLabelMatch());
}
};
/**
* Compares {@link Suggestion}s based on the {@link Suggestion#getEntityRank()}.
* <code>null</code> values are assumed to be the smallest.
*/
public static final Comparator<Suggestion> ENTITY_RANK_COMPARATOR = new Comparator<Suggestion>(){
@Override
public int compare(Suggestion arg0, Suggestion arg1) {
Float r1 = arg0.getEntityRank();
Float r2 = arg1.getEntityRank();
return r2 == null ? r1 == null ? 0 : -1 : r1 == null ? 1 : r2.compareTo(r1);
}
};
/**
* Compares {@link Suggestion} first based on the {@link Suggestion#getMatch()} value
* and secondly based on the {@link RdfResourceEnum#entityRank}.
*/
public static final Comparator<Suggestion> MATCH_TYPE_SUGGESTION_COMPARATOR = new Comparator<Suggestion>() {
@Override
public int compare(Suggestion arg0, Suggestion arg1) {
int labelMatch = DEFAULT_LABEL_TOKEN_COMPARATOR.compare(arg0.getLabelMatch(), arg1.getLabelMatch());
if(labelMatch == 0){
Float arg0Rank = arg0.getEntityRank();
if(arg0Rank == null){
arg0Rank = Float.valueOf(0);
}
Float arg1Rank = arg1.getEntityRank();
if(arg1Rank == null){
arg1Rank = Float.valueOf(0);
}
return arg1Rank.compareTo(arg0Rank); //higher ranks first
} else {
return labelMatch;
}
}
};
}