blob: afcab96c5eb79b20e4ccc655de755de213d7a2a8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.lucenefstlinking;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Map;
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Represents a Entity that Matches somewhere in the tagged text.
* <p>
* Matches are generated for {@link #id Lucene Document IDs} and
* {@link #uri Solr Document ids} (the URI of the matching entity). On the
* first access to the {@link #getLabels() labels}, {@link #getTypes() types}
* or {@link #getRedirects()} all those information are lazily retrieved by
* accessing the data stored in the index. The {@link FieldLoader} instance
* parsed in the constructor is used to load those information.
* Typically this is implemented by the {@link MatchPool} instance used to
* instantiate Match instances.
*
*
* @author Rupert Westenthaler
*
*/
public class Match {
private static final Logger log = LoggerFactory.getLogger(Match.class);
/**
* Lucene document id
*/
public final int id;
private FieldLoader fieldLoader;
private Map<FieldType,Object> values;
private boolean error = false;
private Literal matchLabel;
/**
* The score of the Match
*/
private double score;
Match(int id, FieldLoader fieldLoader){
this.id = id;
this.fieldLoader = fieldLoader;
}
public String getUri() {
return getValue(FieldType.id);
}
public Collection<Literal> getLabels(){
return getValues(FieldType.label);
}
public Collection<UriRef> getTypes(){
return getValues(FieldType.type);
}
public Collection<UriRef> getRedirects(){
return getValues(FieldType.redirect);
}
public Double getRanking(){
return getValue(FieldType.ranking);
}
private <T> Collection<T> getValues(FieldType type){
if(!type.isMultivalued()){
throw new IllegalArgumentException("The parsed field Type '" + type
+ "' is not multi valued!");
}
Object value = getValue(type);
return value == null ? Collections.EMPTY_SET : (Collection<T>)value;
}
private <T> T getValue(FieldType type){
if(error){
return null;
} else if(values == null){
try {
values = fieldLoader.load(id);
} catch (IOException e) {
log.warn("Unable to load Entity for Lucene DocId '"+id+"'!",e);
error = true;
return null;
} catch (RuntimeException e) {
log.warn("Error while loading Entity for Lucene DocId '"+id+"'!",e);
error = true;
return null;
}
}
return (T) values.get(type);
}
public void setMatch(double score, Literal matchLabel){
this.score = score;
this.matchLabel = matchLabel;
}
/**
* Allows to update the {@link #getScore() score} without changing the
* {@link #getMatchLabel() match}.
* @param score the new score
*/
public void updateScore(double score) {
this.score = score;
}
/**
* The score
* @return the score
*/
public double getScore() {
return score;
}
public Literal getMatchLabel() {
return matchLabel;
}
@Override
public int hashCode() {
return id;
}
@Override
public boolean equals(Object o) {
return o instanceof Match && id == ((Match)o).id;
}
@Override
public String toString() {
String uri = getUri();
return uri != null ? uri : "Match[id: "+id+"|(uri unknown)]";
}
static enum FieldType {
id(String.class),
label(Literal.class, true),
type(UriRef.class,true),
redirect(UriRef.class,true),
ranking(Double.class);
Class<?> valueType;
boolean multivalued;
FieldType(Class<?> type){
this(type,false);
}
FieldType(Class<?> type, boolean multivalued){
this.valueType = type;
this.multivalued = multivalued;
}
public Class<?> getValueType() {
return valueType;
}
public boolean isMultivalued() {
return multivalued;
}
}
static interface FieldLoader {
Map<FieldType,Object> load(int id) throws IOException;
}
/**
* Compares {@link Match} instances based on the {@link Match#getScore()}
*/
public static final Comparator<Match> SCORE_COMPARATOR = new Comparator<Match>() {
@Override
public int compare(Match a, Match b) {
return Double.compare(b.score,a.score); //higher first
}
};
/**
* Compares {@link Match} instances based on the {@link Match#getRanking()}.
* <code>null</code> values are assumed to be the smallest.
*/
public static final Comparator<Match> ENTITY_RANK_COMPARATOR = new Comparator<Match>(){
@Override
public int compare(Match arg0, Match arg1) {
Double r1 = arg0.getRanking();
Double r2 = arg1.getRanking();
return r2 == null ? r1 == null ? 0 : -1 : r1 == null ? 1 : r2.compareTo(r1);
}
};
}