| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.stanbol.enhancer.engines.entitylinking.impl; |
| |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.List; |
| import java.util.Set; |
| |
| import org.apache.clerezza.commons.rdf.IRI; |
| import org.apache.stanbol.enhancer.nlp.model.Section; |
| import org.apache.stanbol.enhancer.nlp.model.Token; |
| |
| /** |
| * The occurrence of an detected Entity within the content. <p> |
| * Note that this class already stores the information in a structure as needed |
| * to write Enhancements as defined by the upcoming 2nd version of the |
| * Apache Stanbol Enhancement Structure (EntityAnnotation, TextOccurrence and |
| * EntitySuggestion). However it can also be used to write |
| * TextAnnotations and EntityAnnotations as defined by the 1st version |
| * @author Rupert Westenthaler |
| * |
| */ |
| public class LinkedEntity { |
| /** |
| * An mention of an linked entity within the text |
| * @author Rupert Westenthaler |
| * |
| */ |
| public final class Occurrence { |
| /** |
| * The maximum number of chars until that the current sentence is used |
| * as context for TextOcccurrences. If the sentence is longer a area of |
| * {@link #CONTEXT_TOKEN_COUNT} before and after the current selected |
| * text is used as context.<p> |
| * This is especially important in case no sentence detector is available |
| * for the current content. Because in this case the whole text is |
| * parsed as a single Sentence. |
| * TODO: Maybe find a more clever way to determine the context |
| */ |
| public static final int MAX_CONTEXT_LENGTH = 200; |
| /** |
| * The number of tokens surrounding the current selected text used to |
| * calculate the context if the current sentence is longer than |
| * {@link #MAX_CONTEXT_LENGTH} chars.<p> |
| * This is especially important in case no sentence detector is available |
| * for the current content. Because in this case the whole text is |
| * parsed as a single Sentence. |
| * TODO: Maybe find a more clever way to determine the context |
| */ |
| public static final int CONTEXT_TOKEN_COUNT = 5; |
| private final int start; |
| private final int end; |
| private final String context; |
| |
| private Occurrence(Section sentence,Token token) { |
| this(sentence,token,token); |
| } |
| private Occurrence(Section sentence,Token start,Token end){ |
| this.start = start.getStart(); |
| this.end = end.getEnd(); |
| String context = sentence.getSpan(); |
| if(context.length() > MAX_CONTEXT_LENGTH){ |
| context = start.getContext().getSpan().substring( |
| Math.max(0, this.start-CONTEXT_TOKEN_COUNT), |
| Math.min(this.end+CONTEXT_TOKEN_COUNT, start.getContext().getEnd())-1); |
| } |
| this.context = context; |
| } |
| /** |
| * The context (surrounding text) of the occurrence. |
| * @return |
| */ |
| public String getContext() { |
| return context; |
| } |
| /** |
| * The start index of the occurrence |
| * @return the start index relative to the start of the text |
| */ |
| public int getStart() { |
| return start; |
| } |
| /** |
| * the end index of the occurrence |
| * @return the end index relative to the start of the text |
| */ |
| public int getEnd() { |
| return end; |
| } |
| /** |
| * The selected text of this occurrence. Actually returns the value |
| * of {@link LinkedEntity#getSelectedText()}, because th |
| * @return |
| */ |
| public String getSelectedText(){ |
| return LinkedEntity.this.getSelectedText(); |
| } |
| @Override |
| public String toString() { |
| return start+","+end; |
| } |
| @Override |
| public int hashCode() { |
| return context.hashCode()+start+end; |
| } |
| @Override |
| public boolean equals(Object arg0) { |
| return arg0 instanceof Occurrence && |
| ((Occurrence)arg0).start == start && |
| ((Occurrence)arg0).end == end && |
| ((Occurrence)arg0).context.equals(context); |
| } |
| } |
| private final String selectedText; |
| private final Set<IRI> types; |
| private final List<Suggestion> suggestions; |
| private final Collection<Occurrence> occurrences = new ArrayList<Occurrence>(); |
| private final Collection<Occurrence> unmodOccurrences = Collections.unmodifiableCollection(occurrences); |
| /** |
| * Creates a new LinkedEntity for the parsed parameters |
| * @param selectedText the selected text |
| * @param suggestions the entity suggestions |
| * @param types the types of the linked entity. |
| */ |
| protected LinkedEntity(String selectedText, List<Suggestion> suggestions, Set<IRI> types) { |
| this.suggestions = Collections.unmodifiableList(suggestions); |
| this.selectedText = selectedText; |
| this.types = Collections.unmodifiableSet(types); |
| } |
| /** |
| * Creates a new Linked Entity including the first {@link Occurrence} |
| * @param section the sentence (context) for the occurrence. |
| * @param startToken the index of the start token |
| * @param tokenSpan the number of token included in this span |
| * @param suggestions the entity suggestions |
| * @param types the types of the linked entity. |
| */ |
| protected LinkedEntity(Section section,Token startToken,Token endToken, |
| List<Suggestion> suggestions, Set<IRI> types) { |
| this(startToken.getSpan().substring(startToken.getStart(), endToken.getEnd()), |
| suggestions,types); |
| addOccurrence(section, startToken,endToken); |
| } |
| /** |
| * Getter for the selected text |
| * @return the selected text |
| */ |
| public String getSelectedText() { |
| return selectedText; |
| } |
| |
| /** |
| * Getter for read only list of types |
| * @return the types |
| */ |
| public Set<IRI> getTypes() { |
| return types; |
| } |
| /** |
| * Adds an new Occurrence |
| * @param sentence the analysed sentence |
| * @param startToken the start token |
| * @param tokenSpan the number of tokens included in this span |
| * @return the new Occurrence also added to {@link #getOccurrences()} |
| */ |
| protected Occurrence addOccurrence(Section section,Token startToken,Token tokenSpan){ |
| Occurrence o = new Occurrence(section, startToken, tokenSpan); |
| occurrences.add(o); |
| return o; |
| } |
| /** |
| * Getter for the read only list of Occurrences |
| * @return the occurrences |
| */ |
| public Collection<Occurrence> getOccurrences(){ |
| return unmodOccurrences; |
| } |
| /** |
| * Getter for the read only list of Suggestions |
| * @return the suggestions |
| */ |
| public List<Suggestion> getSuggestions(){ |
| return suggestions; |
| } |
| |
| /** |
| * Getter for the Score |
| * @return The score of the first element in {@link #getSuggestions()} or |
| * <code>0</code> if there are no suggestions |
| */ |
| public double getScore(){ |
| return suggestions.isEmpty() ? 0f : suggestions.get(0).getScore(); |
| } |
| |
| /** |
| * Only considers the {@link #getSelectedText()}, because it is assumed that |
| * for the same selected text there MUST BE always the same suggestions with |
| * the same types and occurrences. |
| */ |
| @Override |
| public int hashCode() { |
| return selectedText.hashCode(); |
| } |
| /** |
| * Only considers the {@link #getSelectedText()}, because it is assumed that |
| * for the same selected text there MUST BE always the same suggestions with |
| * the same types and occurrences. |
| */ |
| @Override |
| public boolean equals(Object arg0) { |
| return arg0 instanceof LinkedEntity && |
| ((LinkedEntity)arg0).selectedText.equals(selectedText); |
| } |
| @Override |
| public String toString() { |
| return selectedText+'@'+occurrences+"->"+suggestions; |
| } |
| } |