enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/LinkedEntity.java - stanbol - Git at Google

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.stanbol.enhancer.engines.entitylinking.impl;

 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Set;

 import org.apache.clerezza.commons.rdf.IRI;
 import org.apache.stanbol.enhancer.nlp.model.Section;
 import org.apache.stanbol.enhancer.nlp.model.Token;

 /**
  * The occurrence of an detected Entity within the content. <p>
  * Note that this class already stores the information in a structure as needed
  * to write Enhancements as defined by the upcoming 2nd version of the
  * Apache Stanbol Enhancement Structure (EntityAnnotation, TextOccurrence and
  * EntitySuggestion). However it can also be used to write
  * TextAnnotations and EntityAnnotations as defined by the 1st version
  * @author Rupert Westenthaler
  *
  */
 public class LinkedEntity {
     /**
      * An mention of an linked entity within the text
      * @author Rupert Westenthaler
      *
      */
     public final class Occurrence {
         /**
          * The maximum number of chars until that the current sentence is used
          * as context for TextOcccurrences. If the sentence is longer a area of
          * {@link #CONTEXT_TOKEN_COUNT} before and after the current selected
          * text is used as context.<p>
          * This is especially important in case no sentence detector is available
          * for the current content. Because in this case the whole text is
          * parsed as a single Sentence.
          * TODO: Maybe find a more clever way to determine the context
          */
         public static final int MAX_CONTEXT_LENGTH = 200;
         /**
          * The number of tokens surrounding the current selected text used to
          * calculate the context if the current sentence is longer than
          * {@link #MAX_CONTEXT_LENGTH} chars.<p>
          * This is especially important in case no sentence detector is available
          * for the current content. Because in this case the whole text is
          * parsed as a single Sentence.
          * TODO: Maybe find a more clever way to determine the context
          */
         public static final int CONTEXT_TOKEN_COUNT = 5;
         private final int start;
         private final int end;
         private final String context;

         private Occurrence(Section sentence,Token token) {
             this(sentence,token,token);
         }
         private Occurrence(Section sentence,Token start,Token end){
             this.start = start.getStart();
             this.end = end.getEnd();
             String context = sentence.getSpan();
             if(context.length() > MAX_CONTEXT_LENGTH){
                 context = start.getContext().getSpan().substring(
                     Math.max(0, this.start-CONTEXT_TOKEN_COUNT),
                     Math.min(this.end+CONTEXT_TOKEN_COUNT, start.getContext().getEnd())-1);
             }
             this.context = context;
         }
         /**
          * The context (surrounding text) of the occurrence.
          * @return
          */
         public String getContext() {
             return context;
         }
         /**
          * The start index of the occurrence
          * @return the start index relative to the start of the text
          */
         public int getStart() {
             return start;
         }
         /**
          * the end index of the occurrence
          * @return the end index relative to the start of the text
          */
         public int getEnd() {
             return end;
         }
         /**
          * The selected text of this occurrence. Actually returns the value
          * of {@link LinkedEntity#getSelectedText()}, because th
          * @return
          */
         public String getSelectedText(){
             return LinkedEntity.this.getSelectedText();
         }
         @Override
         public String toString() {
             return start+","+end;
         }
         @Override
         public int hashCode() {
             return context.hashCode()+start+end;
         }
         @Override
         public boolean equals(Object arg0) {
             return arg0 instanceof Occurrence &&
                 ((Occurrence)arg0).start == start &&
                 ((Occurrence)arg0).end == end &&
                 ((Occurrence)arg0).context.equals(context);
         }
     }
     private final String selectedText;
     private final Set<IRI> types;
     private final List<Suggestion> suggestions;
     private final Collection<Occurrence> occurrences = new ArrayList<Occurrence>();
     private final Collection<Occurrence> unmodOccurrences = Collections.unmodifiableCollection(occurrences);
     /**
      * Creates a new LinkedEntity for the parsed parameters
      * @param selectedText the selected text
      * @param suggestions the entity suggestions
      * @param types the types of the linked entity.
      */
     protected LinkedEntity(String selectedText, List<Suggestion> suggestions, Set<IRI> types) {
         this.suggestions = Collections.unmodifiableList(suggestions);
         this.selectedText = selectedText;
         this.types = Collections.unmodifiableSet(types);
     }
    /**
      * Creates a new Linked Entity including the first {@link Occurrence}
      * @param section the sentence (context) for the occurrence.
      * @param startToken the index of the start token
      * @param tokenSpan the number of token included in this span
      * @param suggestions the entity suggestions
      * @param types the types of the linked entity.
      */
     protected LinkedEntity(Section section,Token startToken,Token endToken,
                            List<Suggestion> suggestions, Set<IRI> types) {
         this(startToken.getSpan().substring(startToken.getStart(), endToken.getEnd()),
             suggestions,types);
         addOccurrence(section, startToken,endToken);
     }
     /**
      * Getter for the selected text
      * @return the selected text
      */
     public String getSelectedText() {
         return selectedText;
     }

     /**
      * Getter for read only list of types
      * @return the types
      */
     public Set<IRI> getTypes() {
         return types;
     }
     /**
      * Adds an new Occurrence
      * @param sentence the analysed sentence
      * @param startToken the start token
      * @param tokenSpan the number of tokens included in this span
      * @return the new Occurrence also added to {@link #getOccurrences()}
      */
     protected Occurrence addOccurrence(Section section,Token startToken,Token tokenSpan){
         Occurrence o = new Occurrence(section, startToken, tokenSpan);
         occurrences.add(o);
         return o;
     }
     /**
      * Getter for the read only list of Occurrences
      * @return the occurrences
      */
     public Collection<Occurrence> getOccurrences(){
         return unmodOccurrences;
     }
     /**
      * Getter for the read only list of Suggestions
      * @return the suggestions
      */
     public List<Suggestion> getSuggestions(){
         return suggestions;
     }

     /**
      * Getter for the Score
      * @return The score of the first element in {@link #getSuggestions()} or
      * <code>0</code> if there are no suggestions
      */
     public double getScore(){
         return suggestions.isEmpty() ? 0f : suggestions.get(0).getScore();
     }

     /**
      * Only considers the {@link #getSelectedText()}, because it is assumed that
      * for the same selected text there MUST BE always the same suggestions with
      * the same types and occurrences.
      */
     @Override
     public int hashCode() {
         return selectedText.hashCode();
     }
     /**
      * Only considers the {@link #getSelectedText()}, because it is assumed that
      * for the same selected text there MUST BE always the same suggestions with
      * the same types and occurrences.
      */
     @Override
     public boolean equals(Object arg0) {
         return arg0 instanceof LinkedEntity &&
         ((LinkedEntity)arg0).selectedText.equals(selectedText);
     }
     @Override
     public String toString() {
         return selectedText+'@'+occurrences+"->"+suggestions;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.stanbol.enhancer.engines.entitylinking.impl;

	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.Collections;
	import java.util.List;
	import java.util.Set;

	import org.apache.clerezza.commons.rdf.IRI;
	import org.apache.stanbol.enhancer.nlp.model.Section;
	import org.apache.stanbol.enhancer.nlp.model.Token;

	/**
	* The occurrence of an detected Entity within the content. <p>
	* Note that this class already stores the information in a structure as needed
	* to write Enhancements as defined by the upcoming 2nd version of the
	* Apache Stanbol Enhancement Structure (EntityAnnotation, TextOccurrence and
	* EntitySuggestion). However it can also be used to write
	* TextAnnotations and EntityAnnotations as defined by the 1st version
	* @author Rupert Westenthaler
	*
	*/
	public class LinkedEntity {
	/**
	* An mention of an linked entity within the text
	* @author Rupert Westenthaler
	*
	*/
	public final class Occurrence {
	/**
	* The maximum number of chars until that the current sentence is used
	* as context for TextOcccurrences. If the sentence is longer a area of
	* {@link #CONTEXT_TOKEN_COUNT} before and after the current selected
	* text is used as context.<p>
	* This is especially important in case no sentence detector is available
	* for the current content. Because in this case the whole text is
	* parsed as a single Sentence.
	* TODO: Maybe find a more clever way to determine the context
	*/
	public static final int MAX_CONTEXT_LENGTH = 200;
	/**
	* The number of tokens surrounding the current selected text used to
	* calculate the context if the current sentence is longer than
	* {@link #MAX_CONTEXT_LENGTH} chars.<p>
	* This is especially important in case no sentence detector is available
	* for the current content. Because in this case the whole text is
	* parsed as a single Sentence.
	* TODO: Maybe find a more clever way to determine the context
	*/
	public static final int CONTEXT_TOKEN_COUNT = 5;
	private final int start;
	private final int end;
	private final String context;

	private Occurrence(Section sentence,Token token) {
	this(sentence,token,token);
	}
	private Occurrence(Section sentence,Token start,Token end){
	this.start = start.getStart();
	this.end = end.getEnd();
	String context = sentence.getSpan();
	if(context.length() > MAX_CONTEXT_LENGTH){
	context = start.getContext().getSpan().substring(
	Math.max(0, this.start-CONTEXT_TOKEN_COUNT),
	Math.min(this.end+CONTEXT_TOKEN_COUNT, start.getContext().getEnd())-1);
	}
	this.context = context;
	}
	/**
	* The context (surrounding text) of the occurrence.
	* @return
	*/
	public String getContext() {
	return context;
	}
	/**
	* The start index of the occurrence
	* @return the start index relative to the start of the text
	*/
	public int getStart() {
	return start;
	}
	/**
	* the end index of the occurrence
	* @return the end index relative to the start of the text
	*/
	public int getEnd() {
	return end;
	}
	/**
	* The selected text of this occurrence. Actually returns the value
	* of {@link LinkedEntity#getSelectedText()}, because th
	* @return
	*/
	public String getSelectedText(){
	return LinkedEntity.this.getSelectedText();
	}
	@Override
	public String toString() {
	return start+","+end;
	}
	@Override
	public int hashCode() {
	return context.hashCode()+start+end;
	}
	@Override
	public boolean equals(Object arg0) {
	return arg0 instanceof Occurrence &&
	((Occurrence)arg0).start == start &&
	((Occurrence)arg0).end == end &&
	((Occurrence)arg0).context.equals(context);
	}
	}
	private final String selectedText;
	private final Set<IRI> types;
	private final List<Suggestion> suggestions;
	private final Collection<Occurrence> occurrences = new ArrayList<Occurrence>();
	private final Collection<Occurrence> unmodOccurrences = Collections.unmodifiableCollection(occurrences);
	/**
	* Creates a new LinkedEntity for the parsed parameters
	* @param selectedText the selected text
	* @param suggestions the entity suggestions
	* @param types the types of the linked entity.
	*/
	protected LinkedEntity(String selectedText, List<Suggestion> suggestions, Set<IRI> types) {
	this.suggestions = Collections.unmodifiableList(suggestions);
	this.selectedText = selectedText;
	this.types = Collections.unmodifiableSet(types);
	}
	/**
	* Creates a new Linked Entity including the first {@link Occurrence}
	* @param section the sentence (context) for the occurrence.
	* @param startToken the index of the start token
	* @param tokenSpan the number of token included in this span
	* @param suggestions the entity suggestions
	* @param types the types of the linked entity.
	*/
	protected LinkedEntity(Section section,Token startToken,Token endToken,
	List<Suggestion> suggestions, Set<IRI> types) {
	this(startToken.getSpan().substring(startToken.getStart(), endToken.getEnd()),
	suggestions,types);
	addOccurrence(section, startToken,endToken);
	}
	/**
	* Getter for the selected text
	* @return the selected text
	*/
	public String getSelectedText() {
	return selectedText;
	}

	/**
	* Getter for read only list of types
	* @return the types
	*/
	public Set<IRI> getTypes() {
	return types;
	}
	/**
	* Adds an new Occurrence
	* @param sentence the analysed sentence
	* @param startToken the start token
	* @param tokenSpan the number of tokens included in this span
	* @return the new Occurrence also added to {@link #getOccurrences()}
	*/
	protected Occurrence addOccurrence(Section section,Token startToken,Token tokenSpan){
	Occurrence o = new Occurrence(section, startToken, tokenSpan);
	occurrences.add(o);
	return o;
	}
	/**
	* Getter for the read only list of Occurrences
	* @return the occurrences
	*/
	public Collection<Occurrence> getOccurrences(){
	return unmodOccurrences;
	}
	/**
	* Getter for the read only list of Suggestions
	* @return the suggestions
	*/
	public List<Suggestion> getSuggestions(){
	return suggestions;
	}

	/**
	* Getter for the Score
	* @return The score of the first element in {@link #getSuggestions()} or
	* <code>0</code> if there are no suggestions
	*/
	public double getScore(){
	return suggestions.isEmpty() ? 0f : suggestions.get(0).getScore();
	}

	/**
	* Only considers the {@link #getSelectedText()}, because it is assumed that
	* for the same selected text there MUST BE always the same suggestions with
	* the same types and occurrences.
	*/
	@Override
	public int hashCode() {
	return selectedText.hashCode();
	}
	/**
	* Only considers the {@link #getSelectedText()}, because it is assumed that
	* for the same selected text there MUST BE always the same suggestions with
	* the same types and occurrences.
	*/
	@Override
	public boolean equals(Object arg0) {
	return arg0 instanceof LinkedEntity &&
	((LinkedEntity)arg0).selectedText.equals(selectedText);
	}
	@Override
	public String toString() {
	return selectedText+'@'+occurrences+"->"+suggestions;
	}
	}