| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.stanbol.enhancer.engines.lucenefstlinking; |
| |
| import java.io.IOException; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.Map; |
| |
| import org.apache.clerezza.rdf.core.Literal; |
| import org.apache.clerezza.rdf.core.UriRef; |
| import org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| /** |
| * Represents a Entity that Matches somewhere in the tagged text. |
| * <p> |
| * Matches are generated for {@link #id Lucene Document IDs} and |
| * {@link #uri Solr Document ids} (the URI of the matching entity). On the |
| * first access to the {@link #getLabels() labels}, {@link #getTypes() types} |
| * or {@link #getRedirects()} all those information are lazily retrieved by |
| * accessing the data stored in the index. The {@link FieldLoader} instance |
| * parsed in the constructor is used to load those information. |
| * Typically this is implemented by the {@link MatchPool} instance used to |
| * instantiate Match instances. |
| * |
| * |
| * @author Rupert Westenthaler |
| * |
| */ |
| public class Match { |
| |
| private static final Logger log = LoggerFactory.getLogger(Match.class); |
| |
| /** |
| * Lucene document id |
| */ |
| public final int id; |
| |
| private FieldLoader fieldLoader; |
| |
| private Map<FieldType,Object> values; |
| private boolean error = false; |
| |
| private Literal matchLabel; |
| /** |
| * The score of the Match |
| */ |
| private double score; |
| |
| Match(int id, FieldLoader fieldLoader){ |
| this.id = id; |
| this.fieldLoader = fieldLoader; |
| } |
| |
| public String getUri() { |
| return getValue(FieldType.id); |
| } |
| |
| public Collection<Literal> getLabels(){ |
| return getValues(FieldType.label); |
| } |
| |
| public Collection<UriRef> getTypes(){ |
| return getValues(FieldType.type); |
| } |
| |
| public Collection<UriRef> getRedirects(){ |
| return getValues(FieldType.redirect); |
| } |
| public Double getRanking(){ |
| return getValue(FieldType.ranking); |
| } |
| private <T> Collection<T> getValues(FieldType type){ |
| if(!type.isMultivalued()){ |
| throw new IllegalArgumentException("The parsed field Type '" + type |
| + "' is not multi valued!"); |
| } |
| Object value = getValue(type); |
| return value == null ? Collections.EMPTY_SET : (Collection<T>)value; |
| } |
| private <T> T getValue(FieldType type){ |
| if(error){ |
| return null; |
| } else if(values == null){ |
| try { |
| values = fieldLoader.load(id); |
| } catch (IOException e) { |
| log.warn("Unable to load Entity for Lucene DocId '"+id+"'!",e); |
| error = true; |
| return null; |
| } catch (RuntimeException e) { |
| log.warn("Error while loading Entity for Lucene DocId '"+id+"'!",e); |
| error = true; |
| return null; |
| } |
| } |
| return (T) values.get(type); |
| } |
| |
| public void setMatch(double score, Literal matchLabel){ |
| this.score = score; |
| this.matchLabel = matchLabel; |
| } |
| /** |
| * Allows to update the {@link #getScore() score} without changing the |
| * {@link #getMatchLabel() match}. |
| * @param score the new score |
| */ |
| public void updateScore(double score) { |
| this.score = score; |
| } |
| public double getScore() { |
| return score; |
| } |
| |
| public Literal getMatchLabel() { |
| return matchLabel; |
| } |
| |
| @Override |
| public int hashCode() { |
| return id; |
| } |
| |
| @Override |
| public boolean equals(Object o) { |
| return o instanceof Match && id == ((Match)o).id; |
| } |
| |
| @Override |
| public String toString() { |
| String uri = getUri(); |
| return uri != null ? uri : "Match[id: "+id+"|(uri unknown)]"; |
| } |
| |
| static enum FieldType { |
| id(String.class), |
| label(Literal.class, true), |
| type(UriRef.class,true), |
| redirect(UriRef.class,true), |
| ranking(Double.class); |
| |
| Class<?> valueType; |
| boolean multivalued; |
| |
| FieldType(Class<?> type){ |
| this(type,false); |
| } |
| FieldType(Class<?> type, boolean multivalued){ |
| this.valueType = type; |
| this.multivalued = multivalued; |
| } |
| public Class<?> getValueType() { |
| return valueType; |
| } |
| public boolean isMultivalued() { |
| return multivalued; |
| } |
| } |
| |
| static interface FieldLoader { |
| Map<FieldType,Object> load(int id) throws IOException; |
| } |
| /** |
| * Compares {@link Match} instances based on the {@link Match#getScore()} |
| */ |
| public static final Comparator<Match> SCORE_COMPARATOR = new Comparator<Match>() { |
| |
| @Override |
| public int compare(Match a, Match b) { |
| return Double.compare(b.score,a.score); //higher first |
| } |
| |
| }; |
| /** |
| * Compares {@link Match} instances based on the {@link Match#getRanking()}. |
| * <code>null</code> values are assumed to be the smallest. |
| */ |
| public static final Comparator<Match> ENTITY_RANK_COMPARATOR = new Comparator<Match>(){ |
| @Override |
| public int compare(Match arg0, Match arg1) { |
| Double r1 = arg0.getRanking(); |
| Double r2 = arg1.getRanking(); |
| return r2 == null ? r1 == null ? 0 : -1 : r1 == null ? 1 : r2.compareTo(r1); |
| } |
| }; |
| |
| } |