blob: df213235d04970fdb9a442f860da28fe2d07f16d [file] [log] [blame]
package org.apache.stanbol.enhancer.engine.disambiguation.mlt;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
/**
* Collects all data needed for Disambiguation
*
* @author Rupert Westenthaler
* @author Kritarth
*
*/
public class DisambiguationData {
/**
* used by #c
*/
private DisambiguationData() {}
/**
* Stores the URIs of fise:EntityAnnnotation as key and the
* fise:TextAnnotation they link to as value.<p>
* This is needed during writing the disambiguation results to the
* EnhancementStructure to know if one needs to clone an fise:EntityAnnotation
* or not.
*/
public Map<UriRef,Set<UriRef>> suggestionMap = new HashMap<UriRef,Set<UriRef>>();
/**
* Holds the center position of the fise:TextAnnotation fise:selected-text as key and the SavedEntity
* (representing the extracted data for the fise:TextAnnotation) as value.
* <p>
* Intended to do fast index based lookup for other TextAnnotations when building contexts for
* disambiguations.
*/
public NavigableMap<Integer,SavedEntity> directoryTextAnotation = new TreeMap<Integer,SavedEntity>();
/**
* Collection with the 'fise:selected-text' of all 'fise:TextAnnotations'
* Also those that are NOT included in {@link #textAnnotations} (e.g.
* because they are missing some required data)
*/
public Collection<String> allSelectedTexts = new HashSet<String>();
/**
* List of all fise:textAnnotations that can be used for disambiguation. the
* key is the URI and the value is the {@link SavedEntity} with the
* extracted information.
*/
public Map<UriRef, SavedEntity> textAnnotations = new HashMap<UriRef,SavedEntity>();
// List to contain old confidence values that are to removed
//List<Triple> loseConfidence = new ArrayList<Triple>();
// List to contain new confidence values to be added to metadata
//List<Triple> gainConfidence = new ArrayList<Triple>();
/*
* We create a data structure that stores the mapping of text annotation to List of Uri of all possible
* amiguations of the Text. Also it fills the list loseconfidence with confidence values of all the
* ambiguations for all entities (which will be removed eventually)
*/
public static DisambiguationData createFromContentItem(ContentItem ci) {
MGraph graph = ci.getMetadata();
DisambiguationData data = new DisambiguationData();
Iterator<Triple> it = graph.filter(null, RDF_TYPE, TechnicalClasses.ENHANCER_TEXTANNOTATION);
while (it.hasNext()) {
UriRef uri = (UriRef) it.next().getSubject();
// TODO: rwesten: do we really want to ignore fise:TextAnnotations that link to
// to an other one (typically two TextAnnotations that select the exact same text)
// if (graph.filter(uri, new UriRef(NamespaceEnum.dc + "relation"), null).hasNext()) {
// continue;
// }
SavedEntity savedEntity = SavedEntity.createFromTextAnnotation(graph, uri);
if (savedEntity != null) {
// data.allEntities.add(savedEntity.getContext());
data.directoryTextAnotation.put(
Integer.valueOf((savedEntity.getStart() + savedEntity.getEnd()) / 2),
savedEntity);
//add information to the #suggestionMap
for(Suggestion s :savedEntity.getSuggestions()){
Set<UriRef> textAnnotations = data.suggestionMap.get(s.getEntityAnnotation());
if(textAnnotations == null){
textAnnotations = new HashSet<UriRef>();
data.suggestionMap.put(s.getEntityAnnotation(), textAnnotations);
}
textAnnotations.add(savedEntity.getUri());
}
//NOTE (rwesten):
//changed the layout here. Now savedEntity contains the list
//of suggestions
data.textAnnotations.put(uri,savedEntity);
data.allSelectedTexts.add(savedEntity.getName());
} else { //some information are also needed for other TextAnnotations
//like the selectedText of TextAnnotations (regardless if they
//have suggestions or not
String selectedText = EnhancementEngineHelper.getString(graph, uri, ENHANCER_SELECTED_TEXT);
if(selectedText != null){
data.allSelectedTexts.add(selectedText);
}
}
}
return data;
}
}