enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java - stanbol - Git at Google

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.stanbol.enhancer.engines.lucenefstlinking;

 import static org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion.ENTITY_RANK_COMPARATOR;
 import static org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText;
 import static org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;
 import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getSelectionContext;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CONTRIBUTOR;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.NavigableMap;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.Map.Entry;

 import org.apache.clerezza.rdf.core.Language;
 import org.apache.clerezza.rdf.core.Literal;
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.PlainLiteral;
 import org.apache.clerezza.rdf.core.Resource;
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.commons.io.input.CharSequenceReader;
 import org.apache.commons.lang.StringUtils;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.OpenBitSet;
 import org.apache.solr.core.SolrCore;
 import org.apache.stanbol.enhancer.engines.entitylinking.Entity;
 import org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcher;
 import org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig;
 import org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig;
 import org.apache.stanbol.enhancer.engines.entitylinking.engine.EntityLinkingEngine;
 import org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity;
 import org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion;
 import org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence;
 import org.apache.stanbol.enhancer.engines.lucenefstlinking.TaggingSession.Corpus;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.opensextant.solrtexttagger.TagClusterReducer;
 import org.opensextant.solrtexttagger.Tagger;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 public class FstLinkingEngine implements EnhancementEngine, ServiceProperties {

     private final Logger log = LoggerFactory.getLogger(FstLinkingEngine.class);

     /**
      * Use the same {@link ServiceProperties#ENHANCEMENT_ENGINE_ORDERING} as the
      * {@link EntityLinkingEngine#DEFAULT_ORDER}
      */
     public static final Integer ENGINE_ORDERING = EntityLinkingEngine.DEFAULT_ORDER;
     private static final Map<String,Object> SERVICE_PROPERTIES = Collections.unmodifiableMap(Collections
             .singletonMap(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING, (Object) ENGINE_ORDERING));

     private final LiteralFactory literalFactory = LiteralFactory.getInstance();

     protected final String name;

     protected final TextProcessingConfig tpConfig;
     protected final EntityLinkerConfig elConfig;

     private IndexConfiguration indexConfig;


     public FstLinkingEngine(String name, IndexConfiguration indexConfig,
             TextProcessingConfig tpConfig, EntityLinkerConfig elConfig) {
         if (StringUtils.isBlank(name)) {
             throw new IllegalArgumentException("The parsed name MUST NOT be NULL nor blank!");
         }
         this.name = name;
         if (indexConfig == null) {
             throw new IllegalArgumentException("The parsed IndexConfiguration MUST NOT be NULL!");
         }
         this.indexConfig = indexConfig;
         if (tpConfig == null) {
             throw new IllegalArgumentException("The parsed Text Processing configuration MUST NOT be NULL");
         }
         this.tpConfig = tpConfig;
         if (elConfig == null) {
             throw new IllegalArgumentException("The parsed Entity Linking configuration MUST NOT be NULL");
         }
         this.elConfig = elConfig;
     }

     @Override
     public String getName() {
         return name;
     }

     @Override
     public Map<String,Object> getServiceProperties() {
         return SERVICE_PROPERTIES;
     }

     @Override
     public int canEnhance(ContentItem ci) throws EngineException {
         log.trace("canEnhancer {}", ci.getUri());
         String language = getLanguage(this, ci, false);
         if (language == null || !indexConfig.getFstConfig().isLanguage(language)) {
             log.debug("Engine {} ignores ContentItem {} becuase language {} is not condigured.",
                 new Object[] {getName(), ci.getUri(), language});
             return CANNOT_ENHANCE;
         }
         // we need a detected language, the AnalyzedText contentPart with
         // Tokens.
         AnalysedText at = getAnalysedText(this, ci, false);
         return at != null && at.getTokens().hasNext() ? ENHANCE_ASYNC : CANNOT_ENHANCE;
     }

     @Override
     public void computeEnhancements(ContentItem ci) throws EngineException {
         AnalysedText at = getAnalysedText(this, ci, true);
         log.debug("  > AnalysedText {}", at);
         String language = getLanguage(this, ci, true);
         log.debug("  > Language {}", language);
         if (log.isDebugEnabled()) {
             log.debug("computeEnhancements for ContentItem {} language {} text={}", new Object[] {
                     ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100)});
         }
         // TODO: we need to do the same for the the default matching language
         TaggingSession session;
         try {
             session = TaggingSession.createSession(indexConfig, language);
         } catch (CorpusException e) {
             throw new EngineException(this, ci, e);
         }
         long taggingStart = System.currentTimeMillis();
         final NavigableMap<int[],Tag> tags = new TreeMap<int[],Tag>(Tag.SPAN_COMPARATOR);
         try {
             //process the language of the document
             Corpus corpus = null;
             if(session.getLanguageCorpus() != null){
                 corpus = session.getLanguageCorpus();
                 long t = System.currentTimeMillis();
                 int d = tag(at, session,corpus,tags);
                 log.info(" - {}: fst: {}ms (callback: {}ms)", new Object[]{
                         corpus.getIndexedField(), System.currentTimeMillis()-t, d
                 });
             }
             if(session.getDefaultCorpus() != null){
                 if(corpus == null){
                     corpus = session.getDefaultCorpus();
                 }
                 long t = System.currentTimeMillis();
                 int d = tag(at, session, session.getDefaultCorpus(),tags);
                 log.info(" - {}: fst: {}ms (callback: {}ms)",new Object[]{
                         session.getDefaultCorpus().getIndexedField(),
                         System.currentTimeMillis()-t, d});
             }
             long taggingEnd = System.currentTimeMillis();
             if(corpus == null){
                 throw new EngineException(this,ci,"No FST corpus found to process contentItem "
                     + "language '"+session.getLanguage()+"'!",null);
             } else {
                 if(session.getLanguageCorpus() != null && session.getDefaultCorpus() != null){
                     log.info(" - sum fst: {} ms", taggingEnd - taggingStart);
                 }
             }
             int matches = match(at,tags.values());
             log.debug(" - loaded {} ({} loaded, {} cached, {} appended) Matches in {} ms",
                     new Object[]{matches, session.getSessionDocLoaded(),
                         session.getSessionDocCached(), session.getSessionDocAppended(),
                         System.currentTimeMillis()-taggingEnd});
             if(log.isDebugEnabled() && session.getDocumentCache() != null){
                 log.debug("EntityCache Statistics: {}",
                     session.getDocumentCache().printStatistics());
             }
         } catch (IOException e) {
             throw new EngineException(this,ci,e);
         } finally {
             session.close();
         }
         if(log.isTraceEnabled()){
             log.trace("Tagged Entities:");
             for(Tag tag : tags.values()){
                 log.trace("[{},{}]: {}", new Object[]{tag.getStart(),tag.getEnd(),tag.getMatches()});
             }
         }
         ci.getLock().writeLock().lock();
         try {
             writeEnhancements(ci,at.getSpan(),tags.values(),language);
         } finally {
             ci.getLock().writeLock().unlock();
         }
         tags.clear(); //help the GC
     }

     private int match(AnalysedText at, Collection<Tag> tags) {
         log.trace("  ... process matches for {} extracted Tags:",tags.size());
         int matchCount = 0;
         String text = at.getSpan();
         Iterator<Tag> tagIt = tags.iterator();
         while(tagIt.hasNext()){
             Tag tag = tagIt.next();
             String anchor = text.substring(tag.getStart(), tag.getEnd());
             log.trace(" {}: '{}'", tag, anchor);
             tag.setAnchor(anchor);
             if(!elConfig.isCaseSensitiveMatching()){
                 anchor = anchor.toLowerCase(Locale.ROOT);
             }

             int alength = anchor.length();
             List<Match> suggestions = new ArrayList<Match>(tag.getMatches().size());
             int i=1; //only for trace level debugging
             for(Match match : tag.getMatches()){
                 if(log.isTraceEnabled()){
                     log.trace(" {}. {}", i++,  match.getUri());
                 }
                 matchCount++;
                 if(!filterEntityByType(match.getTypes().iterator())){
                     int distance = Integer.MAX_VALUE;
                     Literal matchLabel = null;
                     for(Iterator<Literal> it = match.getLabels().iterator(); it.hasNext() && distance > 0;){
                         Literal literal = it.next();
                         String label = literal.getLexicalForm();
                         int d;
                         if(!elConfig.isCaseSensitiveMatching()){
                             label = label.toLowerCase(Locale.ROOT);
                         }
                         d = StringUtils.getLevenshteinDistance(anchor, label);
                         if(d < distance){
                             distance = d;
                             matchLabel = literal;
                         }
                     }
                     if(distance == 0){
                         match.setMatch(1.0, matchLabel);
                     } else {
                         double length = Math.max(alength, matchLabel.getLexicalForm().length());
                         match.setMatch(1d - ((double)distance/length),matchLabel);
                     }
                     log.trace(" ... add suggestion: label: '{}'; conf: {}",
                             matchLabel, match.getScore());
                     suggestions.add(match);
                 } else { //the type of the current Entity is blacklisted
                     log.trace("  ... filtered because of entity types");
                 }
             }
             if(suggestions.isEmpty()){
                 tagIt.remove(); // remove this tag as no match is left
             } else if(suggestions.size() > 1){ //if we have multiple suggestions
                 //sort based on score
                 Collections.sort(suggestions, Match.SCORE_COMPARATOR);
                 //adapt score based on entity ranking
                 adaptScoresForEntityRankings(suggestions);
                 //cut the list on the maximum nuber of suggestions
                 if(suggestions.size() > elConfig.getMaxSuggestions()){
                     suggestions = suggestions.subList(0, elConfig.getMaxSuggestions());
                 }
             }
             if(log.isTraceEnabled()){ //log the suggestion information
                 log.trace("Suggestions:");
                 int si=1;
                 for(Match m : suggestions){
                     log.trace(" {}. {} - {} ({})", new Object[]{
                             si,m.getScore(),m.getMatchLabel(),m.getUri()});
                     si++;
                 }
             }
             tag.setSuggestions(suggestions);
         }
         return matchCount;
     }
     /**
      * Applies the configured entity type based filters
      * @param entityTypes
      * @return
      */
     private boolean filterEntityByType(Iterator<UriRef> entityTypes){
         Map<UriRef, Integer> whiteList = elConfig.getWhitelistedTypes();
         Map<UriRef, Integer> blackList = elConfig.getBlacklistedTypes();
         Integer w = null;
         Integer b = null;
         while(entityTypes.hasNext()){
             UriRef type = entityTypes.next();
             Integer act = whiteList.get(type);
             if(act != null){
                 if(w == null || act.compareTo(w) < 0){
                     w = act;
                 }
                 if(act.intValue() == 0){
                     break;
                 }
             }
             act = blackList.get(type);
             if(act != null){
                 if(b == null || act.compareTo(b) < 0){
                     b = act;
                 }
                 if(act.intValue() == 0){
                     break;
                 }
             }
         }
         if(w == null && b == null){
             return !elConfig.isDefaultWhitelistTypes();
         } else if(w != null){
             return b == null || w.compareTo(b) < 0 ? false : true;
         } else { //w == null && b != null
             return true; //filter
         }
     }
     /**
      * Uses the {@link Corpus} to tag the the {@link AnalysedText} and adds
      * tagging results to the parsed tag map.
      * @param at the AnalyzedText
      * @param session the tagging session of the text
      * @param corpus the corpus o the session to tag the content with
      * @param tags the Tags map used to store the tagging results
      * @return the time in milliseconds spent in the tag callback.
      * @throws IOException on any error while accessing the {@link SolrCore}
      */
     private int tag(final AnalysedText at, final TaggingSession session,
             final Corpus corpus, final Map<int[],Tag> tags) throws IOException {
         final OpenBitSet matchDocIdsBS = new OpenBitSet(session.getSearcher().maxDoc());
         TokenStream baseTokenStream = corpus.getTaggingAnalyzer().tokenStream("",
             new CharSequenceReader(at.getText()));
         LinkableTokenFilter linkableTokenFilter = new LinkableTokenFilter(baseTokenStream,
             at, session.getLanguage(), tpConfig.getConfiguration(session.getLanguage()));
         //we use two TagClusterReducer implementations.
         // (1) the linkableTokenFilter filters all tags that do not overlap any
         //     linkable Token
         // (2) the LONGEST_DOMINANT_RIGHT reducer (TODO: make configurable)
         TagClusterReducer reducer = new ChainedTagClusterReducer(
             linkableTokenFilter,TagClusterReducer.LONGEST_DOMINANT_RIGHT);
         final long[] time = new long[]{0};
         new Tagger(corpus.getFst(), linkableTokenFilter, reducer,session.isSkipAltTokens()) {

             @Override
             protected void tagCallback(int startOffset, int endOffset, long docIdsKey) {
                 long start = System.nanoTime();
                 if(log.isTraceEnabled()){
                     log.trace(" > tagCallback for {}", at.getText().subSequence(startOffset, endOffset));
                 }
                 int[] span = new int[]{startOffset,endOffset};
                 Tag tag = tags.get(span);
                 if(tag == null){
                     tag = new Tag(span);
                     tags.put(span, tag);
                 }
                 // below caches, and also flags matchDocIdsBS
                 Set<Match> matches = createMatches(docIdsKey);
                 if(log.isTraceEnabled()){
                     log.trace("  - {} matches", matches.size());
                 }
                 tag.addIds(matches);
                 long dif = System.nanoTime()-start;
                 time[0] = time[0]+dif;
             }

             //NOTE: We can not use a cache, because we need to create different
             //      Match instances even for the same 'docIdsKey'. This is because
             //      the same result list might get generated for different
             //      surface forms in the text (e.g. if the SolrIndex is case
             //      insensitive, but the linking does consider the case when
             //      calculating the score). If we would use this cache Match
             //      instances would be used for several occurrences in the text
             //      and Match#getScore() values would get overridden when
             //      processing those multiple occurrences.
             //Map<Long,Set<Match>> docIdsListCache = new HashMap<Long,Set<Match>>(1024);

             private Set<Match> createMatches(long docIdsKey) {
                 IntsRef docIds = lookupDocIds(docIdsKey);
                 Set<Match> matches = new HashSet<Match>(docIds.length);
                 for (int i = docIds.offset; i < docIds.offset + docIds.length; i++) {
                     int docId = docIds.ints[i];
                     matchDocIdsBS.set(docId);// also, flip docid in bitset
                     matches.add(session.createMatch(docId));// translates here
                 }
                 return matches;
             }

         }.process();
         return (int)(time[0]/1000000);
     }
     /**
      * Adapts the scores of Matches with the same {@link Match#getScore() score}
      * but different {@link Match#getRanking() entity rankings} in a way that
      * suggestions with a higher ranking do have a slightly better score. The
      * score difference is never higher as <code>0.1</code>.
      * @param matches the matches
      */
     private void adaptScoresForEntityRankings(List<Match> matches) {
         List<Match> equalScoreList = new ArrayList<Match>(4);
         double score = 2f;
         for(Match match : matches){
             double actScore = match.getScore();
             if(score == actScore){
                 equalScoreList.add(match);
             } else {
                 if(equalScoreList.size() > 1){
                     adaptScoreForEntityRankings(equalScoreList, actScore);
                 }
                 score = actScore;
                 equalScoreList.clear();
                 equalScoreList.add(match);
             }
         }
         if(equalScoreList.size() > 1){
             adaptScoreForEntityRankings(equalScoreList,0);
         }
         //resort by score
         Collections.sort(matches, Match.SCORE_COMPARATOR);
     }
     /**
      * This method slightly adapts scores of Suggestions based on the Entity ranking.
      * It is used for Suggestions that would have the exact same score (e.g. 1.0) to
      * ensure ordering of the suggestions based on the rankings of the Entities
      * within the knowledge base linked against
      * @param equalScoreList Entities with the same {@link Suggestion#getScore()}
      * values. If this is not the case this method will change scores in unintended
      * ways
      * @param nextScore the score of the {@link Suggestion} with a lower score as the
      * list of suggestions parsed in the first parameter
      */
     private void adaptScoreForEntityRankings(List<Match> equalScoreList, double nextScore) {
         double score = equalScoreList.get(0).getScore();
         log.trace("  > Adapt Score of multiple Suggestions "
             + "with '{}' based on EntityRanking",score);
         //Adapt the score to reflect the entity ranking
         //but do not change order with entities of different
         //score. Also do not change the score more that 0.1
         //TODO: make the max change (0.1) configurable
         double dif = (Math.min(0.1, score-nextScore))/equalScoreList.size();
         Collections.sort(equalScoreList,Match.ENTITY_RANK_COMPARATOR);
         log.trace("    - keep socre of {} at {}", equalScoreList.get(0).getUri(), score);
         for(int i=1;i<equalScoreList.size();i++){
             score = score-dif;
             if(Match.ENTITY_RANK_COMPARATOR.compare(equalScoreList.get(i-1),
                 equalScoreList.get(i)) != 0){
                 equalScoreList.get(i).updateScore(score);
                 log.trace("    - set score of {} to {}", equalScoreList.get(i).getUri(), score);
             } else {
                 double lastScore = equalScoreList.get(i-1).getScore();
                 equalScoreList.get(i).updateScore(lastScore);
                 log.trace("    - set score of {} to {}", equalScoreList.get(i).getUri(), lastScore);
             }
         }
     }

     /**
      * Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
      * extracted from the parsed ContentItem
      * @param ci
      * @param tags
      * @param language
      */
     private void writeEnhancements(ContentItem ci, String text, Collection<Tag> tags, String language) {
         Language languageObject = null;
         if(language != null && !language.isEmpty()){
             languageObject = new Language(language);
         }

         MGraph metadata = ci.getMetadata();
         for(Tag tag : tags){
             Collection<UriRef> textAnnotations = new ArrayList<UriRef>(tags.size());
             //first create the TextAnnotations for the Occurrences
             Literal startLiteral = literalFactory.createTypedLiteral(tag.getStart());
             Literal endLiteral = literalFactory.createTypedLiteral(tag.getEnd());
             //search for existing text annotation
             Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
             UriRef textAnnotation = null;
             while(it.hasNext()){
                 Triple t = it.next();
                 if(metadata.filter(t.getSubject(), ENHANCER_END, endLiteral).hasNext() &&
                         metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
                     textAnnotation = (UriRef)t.getSubject();
                     break;
                 }
             }
             if(textAnnotation == null){ //not found ... create a new one
                 textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                 metadata.add(new TripleImpl(textAnnotation,
                     Properties.ENHANCER_START,
                     startLiteral));
                 metadata.add(new TripleImpl(textAnnotation,
                     Properties.ENHANCER_END,
                     endLiteral));
                 metadata.add(new TripleImpl(textAnnotation,
                     Properties.ENHANCER_SELECTION_CONTEXT,
                     new PlainLiteralImpl(getSelectionContext(text, tag.getAnchor(),
                         tag.getStart()),languageObject)));
                 metadata.add(new TripleImpl(textAnnotation,
                     Properties.ENHANCER_SELECTED_TEXT,
                     new PlainLiteralImpl(tag.getAnchor(),languageObject)));
                 metadata.add(new TripleImpl(textAnnotation,
                     Properties.ENHANCER_CONFIDENCE,
                     literalFactory.createTypedLiteral(tag.getScore())));
             } else { //if existing add this engine as contributor
                 metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR,
                     new PlainLiteralImpl(this.getClass().getName())));
             }
             //add dc:types (even to existing)
             for(UriRef dcType : getDcTypes(tag.getSuggestions())){
                 metadata.add(new TripleImpl(
                     textAnnotation, Properties.DC_TYPE, dcType));
             }
             textAnnotations.add(textAnnotation);
             //now the EntityAnnotations for the Suggestions
             for(Match match : tag.getSuggestions()){
                 UriRef entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
                 //should we use the label used for the match, or search the
                 //representation for the best label ... currently its the matched one
                 metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, match.getMatchLabel()));
                 metadata.add(new TripleImpl(entityAnnotation,ENHANCER_ENTITY_REFERENCE,
                     new UriRef(match.getUri())));
                 for(UriRef type : match.getTypes()){
                     metadata.add(new TripleImpl(entityAnnotation,
                         Properties.ENHANCER_ENTITY_TYPE, type));
                 }
                 metadata.add(new TripleImpl(entityAnnotation,
                     Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(match.getScore())));
                 //add the relation to the fise:TextAnnotation (the tag)
                 metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
                 //TODO: add origin information of the EntiySearcher
 //                for(Entry<UriRef,Collection<Resource>> originInfo : entitySearcher.getOriginInformation().entrySet()){
 //                    for(Resource value : originInfo.getValue()){
 //                        metadata.add(new TripleImpl(entityAnnotation,
 //                            originInfo.getKey(),value));
 //                    }
 //                }
                 //TODO: dereferencing
 //                if(linkerConfig.isDereferenceEntitiesEnabled() &&
 //                        dereferencedEntitis.add(entity.getUri())){ //not yet dereferenced
 //                    //add all outgoing triples for this entity
 //                    //NOTE: do not add all triples as there might be other data in the graph
 //                    for(Iterator<Triple> triples = entity.getData().filter(entity.getUri(), null, null);
 //                            triples.hasNext();metadata.add(triples.next()));
 //                }
             }
         }
     }

     /**
      * Retrieves all {@link EntitySearcher#getEncodedTypeField()} values of the parsed
      * {@link Suggestion}s and than lookup the {@link NamespaceEnum#dcTerms dc}:type
      * values for the {@link LinkedEntity#getTypes()} by using the configured
      * {@link EntityLinkerConfig#getTypeMappings() types mappings} (and if
      * no mapping is found the {@link EntityLinkerConfig#getDefaultDcType()
      * default} type.
      * @param conceptTypes The list of suggestions
      * @return the types values for the {@link LinkedEntity}
      */
     private Set<UriRef> getDcTypes(List<Match> matches){
         if(matches == null || matches.isEmpty()){
             return Collections.emptySet();
         }
         Collection<UriRef> conceptTypes = new HashSet<UriRef>();
         double score = -1; //only consider types of the best ranked Entities
         for(Match match : matches){
             double actScore = match.getScore();
             if(actScore < score){
                 break;
             }
             score = actScore;
             for(Iterator<UriRef> types = match.getTypes().iterator();
                 types.hasNext(); conceptTypes.add(types.next()));
         }
         Map<UriRef,UriRef> typeMappings = elConfig.getTypeMappings();
         Set<UriRef> dcTypes = new HashSet<UriRef>();
         for(UriRef conceptType : conceptTypes){
             UriRef dcType = typeMappings.get(conceptType);
             if(dcType != null){
                 dcTypes.add(dcType);
             }
         }
         if(dcTypes.isEmpty() && elConfig.getDefaultDcType() != null){
             dcTypes.add(elConfig.getDefaultDcType());
         }
         return dcTypes;
     }
 }