enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/config/EntityLinkerConfig.java - stanbol - Git at Google

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.stanbol.enhancer.engines.entitylinking.config;

 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Dictionary;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;

 import org.apache.clerezza.commons.rdf.IRI;
 import org.apache.stanbol.commons.namespaceprefix.NamespaceMappingUtils;
 import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
 import org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcher;
 import org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker;
 import org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion;
 import org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion.MATCH;
 import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
 import org.apache.stanbol.enhancer.nlp.model.Chunk;
 import org.apache.stanbol.enhancer.nlp.model.Token;
 import org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures;
 import org.apache.stanbol.enhancer.nlp.pos.Pos;
 import org.apache.stanbol.enhancer.nlp.pos.PosTag;
 import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 import org.osgi.service.cm.ConfigurationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 /**
  * The configuration for the {@link EntityLinker}. Typically this
  * configuration does not change often. Therefore it will be used for
  * several {@link EntityLinker} instances processing different
  * contents.
  * @author Rupert Westenthaler
  *
  */
 public class EntityLinkerConfig {

     private static final  Logger log = LoggerFactory.getLogger(EntityLinkerConfig.class);

     /**
      * The field used to search for labels in the vocabulary linked against
      */
     public static final String NAME_FIELD = "enhancer.engines.linking.labelField";
     /**
      * The field used as types for entities. While the type does not influence the
      * suggestions it is used for the <code>fise:entity-type</code> value of
      * <code>fise:EntityAnnotation</code>s and also to determine the
      * <code>dc:type</code> value of <code>fise:TextAnnotation</code>s via the
      * configured {@link #TYPE_MAPPINGS}.
      */
     public static final String TYPE_FIELD = "enhancer.engines.linking.typeField";
     /**
      * Allows to configure a list of entity types that are white/black listed.
      */
     public static final String ENTITY_TYPES = "enhancer.engines.linking.entityTypes";
     /**
      * Allows to enable/disable case sensitive matching
      */
     public static final String CASE_SENSITIVE = "enhancer.engines.linking.caseSensitive";
     /**
      * The field used to lookup redirects
      */
     public static final String REDIRECT_FIELD = "enhancer.engines.linking.redirectField";
     /**
      * If/how redirects (provided by the {@link #REDIRECT_FIELD}) are processed.
      */
     public static final String REDIRECT_MODE = "enhancer.engines.linking.redirectMode";
     /**
      * The maximum number of fise:EntityAnnotations created as suggestion for a fise:TextAnnotation
      */
     public static final String SUGGESTIONS = "enhancer.engines.linking.suggestions";
     /**
      * If enabled Suggestions with similar scores are included. This means also that
      * there might me more as {@link #SUGGESTIONS} results returned by the engine.
      */
     public static final String INCLUDE_SIMILAR_SCORE = "enhancer.engines.linking.includeSimilarScore";
     /**
      * If enabled {@link MorphoFeatures#getLemma()} values are used instead of the {@link Token#getSpan()} to
      * search/match Entities within the Vocabulary linked against.
      * @see EntityLinkerConfig#isLemmaMatching()
      * @see EntityLinkerConfig#DEFAULT_LEMMA_MATCHING_STATE
      */
     public static final String LEMMA_MATCHING_STATE = "enhancer.engines.linking.lemmaMatching";
     /**
      * Can be used to that the "default language" from <code>null</code>
      * (labels without language tag) to an other value (e.g. "en").<p>
      * The "default language" is used in addition to the language of the
      * processed text to search for labels.
      */
     public static final String DEFAULT_MATCHING_LANGUAGE = "enhancer.engines.linking.defaultMatchingLanguage";
     /**
      * Allows to configure entity type -> dc:type mappings as used for created
      * fise:TextAnnotations
      */
     public static final String TYPE_MAPPINGS = "enhancer.engines.linking.typeMappings";
     /**
      * How well single tokens of the Label needs to match a token of the Text so that they
      * are considered to match. Matching does only allow differences at the end of the
      * token (e.g. "London" -> "Londons major")
      */
     public static final String MIN_TOKEN_SCORE = "enhancer.engines.linking.minTokenScore";
     /**
      * The minimum number of matching tokens. Only "matchable" tokens are counted.
      * For full matches (where all tokens of the Label do match tokens in the text)
      * this parameter is ignored.<p>
      * This parameter is strongly related with the {@link #MIN_LABEL_SCORE}.
      * Typical setting are<ul>
      * <li> <code>{@link #MIN_FOUND_TOKENS}=1</code> and <code>{@link #MIN_LABEL_SCORE} > 0.5</code> (e.g. 0.75)
      * <li> <code>{@link #MIN_FOUND_TOKENS}=2</code> and <code>{@link #MIN_LABEL_SCORE} <= 0.5</code> (e.g. 0.5)
      * </ul>
      * as both settings will ensures that Labels with two tokens where only a single one
      * does match with the text are not suggested.<p>
      * If used in combination with an disambiguation Engine one might want to consider
      * Entities where their labels do match only a single token is such cases a
      * <code>{@link #MIN_FOUND_TOKENS}=1</code> and <code>{@link #MIN_LABEL_SCORE} <= 0.5</code>
      * might be also a meaningful configuration. In such cases users will also want to set the
      * <code>{@link #SUGGESTIONS} > 10</code>.
      */
     public static final String MIN_FOUND_TOKENS = "enhancer.engines.linking.minFoundTokens";
     /**
      * The "Label Score" [0..1] represents how much of the
      * Label of an Entity matches with the Text. It compares the number
      * of Tokens of the Label with the number of Tokens matched to the
      * Text. Not exact matches for Tokens, or if the Tokens within the
      * label do appear in an other order than in the text do also
      * reduce this score. <p>
      * The default is {@link EntityLinkerConfig#DEFAULT_MIN_LABEL_SCORE}
      * (value: {@value EntityLinkerConfig#DEFAULT_MIN_LABEL_SCORE})
      */
     public static final String MIN_LABEL_SCORE = "enhancer.engines.linking.minLabelScore";
     /**
      * The "Text Score" [0..1] represents how well the
      * Label of an Entity matches to the selected Span in the Text.
      * It compares the number of matched {@link Token} from
      * the label with the number of Tokens enclosed by the Span
      * in the Text an Entity is suggested for. Not exact matches
      * for Tokens, or if the Tokens within the label do appear in
      * an other order than in the text do also reduce this score.<p>
      * The default is {@link EntityLinkerConfig#DEFAULT_MIN_TEXT_SCORE}
      * (value: {@value EntityLinkerConfig#DEFAULT_MIN_TEXT_SCORE})
      */
     public static final String MIN_TEXT_SCORE = "enhancer.engines.linking.minTextScore";
     /**
      * Defined as the product of the "Text Score" with the
      * "Label Score" - meaning that this value represents
      * both how well the label matches the text and how much of the
      * label is matched with the text.<p>
      * The default is {@link EntityLinkerConfig#DEFAULT_MIN_MATCH_SCORE}
      * (value: {@value EntityLinkerConfig#DEFAULT_MIN_MATCH_SCORE})
      * @see #MIN_TEXT_SCORE
      * @see #MIN_LABEL_SCORE
      */
     public static final String MIN_MATCH_FACTOR = "enhancer.engines.linking.minMatchScore";
     /**
      * The minimum score an Entity must match matchable {@link Token}s within a processable
      * {@link Chunk}. By {@link #DEFAULT_MIN_CHUNK_MATCH_SCORE default} this is
      * set to <code>51%</code> to filter Entities that do only match a single token
      * within a NounPhrase of two words. This feature was introduced with
      * <a href="https://issues.apache.org/jira/browse/STANBOL-1211">STANBOL-1211</a>
      */
     public static final String MIN_CHUNK_MATCH_SCORE = "enhancer.engines.linking.minChunkMatchScore";
     /**
      * The maximum number of {@link Token} used as search terms with the
      * {@link EntitySearcher#lookup(String, Set, java.util.List, String[], Integer)}
      * method
      */
     public static final String MAX_SEARCH_TOKENS = "enhancer.engines.linking.maxSearchTokens";
     /**
      * The maximum number of {@link Token} searched around a linkable Token for
      * additional search tokens.<p>
      * As an Example in the text section "at the University of Munich a new procedure to"
      * only "Munich" would be classified as {@link Pos#ProperNoun} and considered as
      * linkable. However for searching it makes sence to use additional Tokens to
      * reduce (or correctly rank) the expected high number of results for "Munich".
      * Because of that "matchable" words surrounding the linkable are considered as
      * included for searches.<p>
      * This parameter allows to configure the maximum distance surounding the current
      * linkable Token other linkable tokens can be included in searches.
      */
     public static final String MAX_SEARCH_TOKEN_DISTANCE = "enhancer.engines.linking.maxSearchTokenDistance";
     /**
      * Adds the dereference feature (STANBOL-333) also to this engine.
      * @deprecated Use a Dereference Engine instead (STANBOL-336)
      */
     @Deprecated
     public static final String DEREFERENCE_ENTITIES = "enhancer.engines.linking.dereference";
     /**
      * Allows to add a list of fields that are included when dereferencing Entities
      * @deprecated Use a Dereference Engine instead (STANBOL-336)
      */
     @Deprecated
     public static final String DEREFERENCE_ENTITIES_FIELDS = "enhancer.engines.linking.dereferenceFields";
     /**
      * Allows to enable/disable sorting of suggestion that have the same score
      * based on the entity ranking (popularity of the entity within the knowledge base)
      */
     public static final String RANK_EQUAL_SCORES_BASED_ON_ENTITY_RANKINGS = "enhancer.engines.linking.useEntityRankings";
     /**
      * Allows to enable/disable the inclusion of the <code>fise:entity-ranking</code>
      * property to <code>fise:EntityAnnotation</code> created by the linking engine.
      */
     public static final String WRITE_ENTITY_RANKINGS = "enhancer.engines.linking.writeEntityRankings";


     /**
      * The default number for the maximum number of terms suggested for a word
      */
     public static final int DEFAULT_SUGGESTIONS = 3;
     /**
      * By default {@link #INCLUDE_SIMILAR_SCORE} is deactivated
      */
     public static final boolean DEFAULT_INCLUDE_SIMILAR_SCORE = false;
     /**
      * Default value for the number of tokens that must be contained in
      * suggested terms. The default is <code>1</code>
      */
     public static final int DEFAULT_MIN_FOUND_TOKENS = 1;
     /**
      * Multiple Tokens can be sent to the {@link EntitySearcher} service. The
      * service uses this as optional parameters for the search. Therefore
      * returned Concepts MUST contain at least a single of the parsed
      * tokens. <p>
      * The default value of <code>2</code> should be enough for nearly all
      * Taxonomies to sufficiently reduce the number of results.<p>
      * NOTE that the labels (nameField) of the results are compared as a
      * whole. So even if only 2 Tokens are used for the search there may be
      * more mapped to the actual label of an result.
      */
     public static final int DEFAULT_MAX_SEARCH_TOKENS = 2;
     /**
      * Default value for the maximum distance tokens are
      * considered to be used (in addition to the currently processed on)
      * for searches of Entities.<p>
      * The default is set to <code>3</code>
      */
     public static final int DEFAULT_MAX_SEARCH_DISTANCE = 3;

     /**
      * Default value for {@link #getNameField()} (rdfs:label)
      */
     public static final IRI DEFAULT_NAME_FIELD = new IRI(
         "http://www.w3.org/2000/01/rdf-schema#label");
     /**
      * Default value for {@link #getTypeField()} (rdf:type)
      */
     public static final IRI DEFAULT_TYPE_FIELD = new IRI(
         "http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
     /**
      * Default value for {@link #getRedirectField()} (rdf:seeAlso)
      */
     public static final IRI DEFAULT_REDIRECT_FIELD = new IRI(
         "http://www.w3.org/2000/01/rdf-schema#seeAlso");
     /**
      * The default language used to search for labels regardless of the language
      * of the text. The default value is <code>null</code> causing to include
      * labels that do not have a language assigned.
      */
     public static final String DEFAULT_LANGUAGE = null;
     /**
      * The default for case sensitive matching is set to <code>false</code>
      */
     public static final boolean DEFAULT_CASE_SENSITIVE_MATCHING_STATE = false;
     /**
      * By default Lemma based matching is deactivated.
      */
     public static final boolean DEFAULT_LEMMA_MATCHING_STATE = false;
     public static final double DEFAULT_MIN_LABEL_SCORE = 0.75;
     public static final double DEFAULT_MIN_TEXT_SCORE = 0.4;
     public static final double DEFAULT_MIN_MATCH_SCORE = 0.3;
     /**
      * By default more as 50% of the matchable tokens of a processable chunk
      * need to match so that a Entity is considered to be mentioned in the text
      * (STANBOL-1211)
      */
     public static final double DEFAULT_MIN_CHUNK_MATCH_SCORE = 0.51;

     /**
      * Default mapping for Concept types to dc:type values added for
      * TextAnnotations.
      */
     public static final Map<IRI,IRI> DEFAULT_ENTITY_TYPE_MAPPINGS;

     static { //the default mappings for the three types used by the Stanbol Enhancement Structure
         Map<IRI,IRI> mappings = new HashMap<IRI,IRI>();
         mappings.put(OntologicalClasses.DBPEDIA_ORGANISATION, OntologicalClasses.DBPEDIA_ORGANISATION);
         mappings.put(new IRI("http://dbpedia.org/ontology/Newspaper"), OntologicalClasses.DBPEDIA_ORGANISATION);
         mappings.put(new IRI("http://schema.org/Organization"), OntologicalClasses.DBPEDIA_ORGANISATION);
 //        mappings.put(NamespaceEnum.dailymed+"organization",OntologicalClasses.DBPEDIA_ORGANISATION);

         mappings.put(OntologicalClasses.DBPEDIA_PERSON, OntologicalClasses.DBPEDIA_PERSON);
         mappings.put(new IRI("http://xmlns.com/foaf/0.1/Person"), OntologicalClasses.DBPEDIA_PERSON);
         mappings.put(new IRI("http://schema.org/Person"), OntologicalClasses.DBPEDIA_PERSON);

         mappings.put(OntologicalClasses.DBPEDIA_PLACE, OntologicalClasses.DBPEDIA_PLACE);
         mappings.put(new IRI("http://schema.org/Place"), OntologicalClasses.DBPEDIA_PLACE);
         mappings.put(new IRI("http://www.opengis.net/gml/_Feature"), OntologicalClasses.DBPEDIA_PLACE);

         mappings.put(OntologicalClasses.SKOS_CONCEPT, OntologicalClasses.SKOS_CONCEPT);

 //        IRI DRUG = new IRI(NamespaceEnum.drugbank+"drugs");
 //        mappings.put(DRUG.getUnicodeString(), DRUG);
 //        mappings.put(NamespaceEnum.dbpediaOnt+"Drug", DRUG);
 //        mappings.put(NamespaceEnum.dailymed+"drugs", DRUG);
 //        mappings.put(NamespaceEnum.sider+"drugs", DRUG);
 //        mappings.put(NamespaceEnum.tcm+"Medicine", DRUG);
 //
 //        IRI DISEASE = new IRI(NamespaceEnum.diseasome+"diseases");
 //        mappings.put(DISEASE.getUnicodeString(), DISEASE);
 //        mappings.put(NamespaceEnum.linkedct+"condition", DISEASE);
 //        mappings.put(NamespaceEnum.tcm+"Disease", DISEASE);
 //
 //        IRI SIDE_EFFECT = new IRI(NamespaceEnum.sider+"side_effects");
 //        mappings.put(SIDE_EFFECT.getUnicodeString(), SIDE_EFFECT);
 //
 //        IRI INGREDIENT = new IRI(NamespaceEnum.dailymed+"ingredients");
 //        mappings.put(INGREDIENT.getUnicodeString(), INGREDIENT);

         DEFAULT_ENTITY_TYPE_MAPPINGS = Collections.unmodifiableMap(mappings);
     }
     /**
      * Enumeration over the different possibilities on how to deal with
      * redirects (similar to Browsers following HTTP status 303 and RDF defining
      * the "rdf:seeAlso" relation.
      * @author Rupert Westenthaler
      */
     public static enum RedirectProcessingMode {
         /**
          * Ignore redirects
          */
         IGNORE,
         /**
          * Follow redirects, but only add the values (e.g. labels, types) such
          * entities to the original one.
          */
         ADD_VALUES,
         /**
          * Follow the redirect.
          */
         FOLLOW
     }
     /**
      * The default value for how to process redirect is set to
      * {@link RedirectProcessingMode#IGNORE}
      */
     public static RedirectProcessingMode DEFAULT_REDIRECT_PROCESSING_MODE =
         RedirectProcessingMode.IGNORE;

     /**
      * The dereferenceEntitiesState as set in {@link #activateEntityDereference(Dictionary)}
      */
     private boolean dereferenceEntitiesState;
     /**
      * The the maximum number of terms suggested for a word
      */
     private int maxSuggestions = DEFAULT_SUGGESTIONS;

     private boolean includeSuggestionsWithSimilarScore = DEFAULT_INCLUDE_SIMILAR_SCORE;
     /**
      * The minimum number of Tokens in the text that must match with
      * a label of the Entity so that also non-exact matches are
      * used for suggestions
      */
     private int minFoundTokens = DEFAULT_MIN_FOUND_TOKENS;
     /**
      * The maximum numbers of Tokens sent to the {@link EntitySearcher} to search
      * for concepts. <p>
      * NOTE that the labels (nameField) of the results are compared as a
      * whole. So even if only e.g. 2 tokens are used for the search there may be
      * more mapped to the actual label of an result.
      */
     private int maxSearchTokens = DEFAULT_MAX_SEARCH_TOKENS;
     /**
      * Defines the maximum distance tokens are
      * considered to be used (in addition to the currently processed on)
      * for searches of Entities.<p>
      */
     private int maxSearchDistance = DEFAULT_MAX_SEARCH_DISTANCE;

     private boolean caseSensitiveMatchingState = DEFAULT_CASE_SENSITIVE_MATCHING_STATE;
     /**
      * Holds the mappings of rdf:type used by concepts to dc:type values used
      * by TextAnnotations.
      */
     private Map<IRI,IRI> typeMappings;
     private Map<IRI, IRI> unmodTypeMappings;
     /**
      * The mode on how to process redirect for Entities.
      */
     private RedirectProcessingMode redirectProcessingMode;
     /**
      * the default DC Type
      */
     private IRI defaultDcType;
     private IRI nameField;
     private IRI redirectField;
     private IRI typeField;
     private Map<IRI,Integer> blacklistedTypes = new HashMap<IRI,Integer>();
     private Map<IRI,Integer> whitelistedTypes = new HashMap<IRI,Integer>();
     private Boolean defaultWhitelistTypes = null;
     private Set<IRI> dereferencedFields = new HashSet<IRI>();

     private Set<IRI> __selectedFields;
     /**
      * The language always included in searches (regardless of the language
      * detected for the text.
      */
     private String defaultLanguage = DEFAULT_LANGUAGE;

     /**
      * Default for the maximum number of non-processable tokens that are
      * allowed to not match before no further tokens are matched against a label
      * of an Entity. <p>
      * This allows e.g. to match "Dr. Richard Dogles" with "Dr Richard Dogles"
      * as '.' is a non-processable token in the text that is missing in the
      * label.<p>
      * The default is set to <code>1</code>
      */
     public final static int DEFAULT_MAX_NOT_FOUND = 1;
     /**
      * Value of the maximum number of non-processable tokens that are
      * allowed to not match before no further tokens are matched against a label
      * of an Entity. <p>
      * This allows e.g. to match "Dr. Richard Dogles" with "Dr Richard Dogles"
      * as '.' is a non-processable token in the text that is missing in the
      * label.
     */
     private int maxNotFound;
     /**
      * Default value for the minimum token match factor.
      * If Tokens match is determined by comparing them using some algorithm.
      * Results need to be in the range [0..1]. This factor defines the minimum
      * similarity value so that a match is assumed. Not that this factor only
      * is used for filtering out non-matching tokens. The similarity value will
      * still used for calculating the confidence.<p>
      * The default is set to <code>0.7</code>.
      */
     public final static float DEFAULT_MIN_TOKEN_SCORE = 0.7f;

     /**
      * By default Entities are dereferenced. Default chanted to <code>false</code>
      * as this is now deprecated
      * @deprecated Use a Dereference Engine instead (STANBOL-336)
      */
     @Deprecated
     public static final boolean DEFAULT_DEREFERENCE_ENTITIES_STATE = false;

     /**
      * The default value for the state if entities that would have the same score
      * should get their score slightly changed to ensure that entities with an
      * higher ranking (popularity) do have an higher score.
      */
     public static final boolean DEFAULT_RANK_EQUAL_SCORES_BASED_ON_ENTITY_RANKINGS = true;

     /**
      * By default the <code>fise:entity-ranking</code> property is not added to
      * <code>fise:EntityAnnotation</code>.
      */
     public static final boolean DEFAULT_WRITE_ENTITY_RANKINGS = false;

     /**
      * If Tokens match is determined by comparing them using some algorithm.
      * Results need to be in the range [0..1]. This factor defines the minimum
      * similarity value so that a match is assumed. Not that this factor only
      * is used for filtering out non-matching tokens. The similarity value will
      * still used for calculating the confidence
      */
     private float minTokenMatchFactor;
     /**
      * If lemmas are used instead of the Tokens as present in the text to search
      * and match Entities within the linked vocabulary
      */
     private boolean lemmaMatchingState = DEFAULT_LEMMA_MATCHING_STATE;
     private double minLabelScore = DEFAULT_MIN_LABEL_SCORE;
     private double minTextScore = DEFAULT_MIN_TEXT_SCORE;
     private double minMatchScore = DEFAULT_MIN_MATCH_SCORE;
     /**
      * The minimum score an entity needs to match matchable tokens within a
      * chunk so that is is considered as a mentions (STANBOL-1211)
      */
     private double minChunkMatchScore = DEFAULT_MIN_CHUNK_MATCH_SCORE;

     private boolean rankEqualScoresBasedOnEntityRankings = DEFAULT_RANK_EQUAL_SCORES_BASED_ON_ENTITY_RANKINGS;

     private boolean writeEntityRankings = DEFAULT_WRITE_ENTITY_RANKINGS;

     /**
      * Default constructor the initializes the configuration with the
      * default values
      */
     public EntityLinkerConfig(){
         setMaxSuggestions(DEFAULT_SUGGESTIONS);
         setMaxSearchTokens(DEFAULT_MAX_SEARCH_TOKENS);
         setRedirectProcessingMode(DEFAULT_REDIRECT_PROCESSING_MODE);
         typeMappings = new HashMap<IRI,IRI>(DEFAULT_ENTITY_TYPE_MAPPINGS);
         unmodTypeMappings = Collections.unmodifiableMap(typeMappings);
         setDefaultDcType(typeMappings.remove(null));
         setNameField(DEFAULT_NAME_FIELD);
         setRedirectField(DEFAULT_REDIRECT_FIELD);
         setTypeField(DEFAULT_TYPE_FIELD);
         setMaxNotFound(DEFAULT_MAX_NOT_FOUND);
         setMinTokenMatchFactor(DEFAULT_MIN_TOKEN_SCORE);
         setDereferenceEntitiesState(DEFAULT_DEREFERENCE_ENTITIES_STATE);
     }

     /**
      * Creates a new {@link EntityLinkerConfig} based on the properties
      * in the parsed {@link Dictionary}
      * @param configuration the configuration
      * @param prefixService Optionally a namespace prefix service used to
      * convert '{prefix}:{localname}' parameters in the configuration to URIs.
      * If <code>null</code> is parsed this feature is not supported and parameters
      * are not changed.
      * @return the configured {@link EntityLinkerConfig}
      * @throws ConfigurationException if the parsed configuration is not valid
      */
     public static EntityLinkerConfig createInstance(Dictionary<String,Object> configuration,
                                                     NamespacePrefixService prefixService) throws ConfigurationException {
         EntityLinkerConfig elc = new EntityLinkerConfig();
         setConfiguration(elc, configuration, prefixService);
         return elc;
     }
     /**
      * Sets the configuration as parsed by the {@link Dictionary} to the
      * parsed {@link EntityLinkerConfig}.
      * @param linkerConfig the instance to apply the configuration to
      * @param configuration the configuration
      * @param prefixService Optionally a namespace prefix service used to
      * convert '{prefix}:{localname}' parameters in the configuration to URIs.
      * If <code>null</code> is parsed this feature is not supported and parameters
      * are not changed.
      * @throws ConfigurationException in case the configuration is invalid
      */
     public static void setConfiguration(EntityLinkerConfig linkerConfig,Dictionary<String,Object> configuration,NamespacePrefixService prefixService) throws ConfigurationException {
         Object value;
         value = configuration.get(NAME_FIELD);
         if(value != null){
             if(value.toString().isEmpty()){
                 throw new ConfigurationException(NAME_FIELD,"The configured name field MUST NOT be empty");
             }
             linkerConfig.setNameField(new IRI(
                 getFullName(prefixService,NAME_FIELD,value.toString())));
         }

         //init case sensitivity
         value = configuration.get(CASE_SENSITIVE);
         if(value instanceof Boolean){
             linkerConfig.setCaseSensitiveMatchingState((Boolean)value);
         } else if(value != null && !value.toString().isEmpty()){
             linkerConfig.setCaseSensitiveMatchingState(Boolean.valueOf(value.toString()));
         } //if NULL or empty use default

         //init TYPE_FIELD
         value = configuration.get(TYPE_FIELD);
         if(value != null){
             if(value.toString().isEmpty()){
                 throw new ConfigurationException(TYPE_FIELD,"The configured name field MUST NOT be empty");
             }
             linkerConfig.setTypeField(new IRI(
                 getFullName(prefixService, TYPE_FIELD, value.toString())));
         }

         //init REDIRECT_FIELD
         value = configuration.get(REDIRECT_FIELD);
         if(value != null){
             if(value.toString().isEmpty()){
                 throw new ConfigurationException(NAME_FIELD,"The configured name field MUST NOT be empty");
             }
             linkerConfig.setRedirectField(new IRI(
                 getFullName(prefixService,REDIRECT_FIELD,value.toString())));
         }

         //init MAX_SUGGESTIONS
         value = configuration.get(SUGGESTIONS);
         Integer maxSuggestions;
         if(value instanceof Integer){
             maxSuggestions = (Integer)value;
         } else if (value != null){
             try {
                 maxSuggestions = Integer.valueOf(value.toString());
             } catch(NumberFormatException e){
                 throw new ConfigurationException(SUGGESTIONS, "Values MUST be valid Integer values > 0",e);
             }
         } else {
             maxSuggestions = null;
         }
         if(maxSuggestions != null){
             if(maxSuggestions < 1){
                 throw new ConfigurationException(SUGGESTIONS, "Values MUST be valid Integer values > 0");
             }
             linkerConfig.setMaxSuggestions(maxSuggestions);
         }
         //init INCLUDE_SIMILAR_SCORE
         value = configuration.get(INCLUDE_SIMILAR_SCORE);
         if(value instanceof Boolean){
             linkerConfig.setIncludeSuggestionsWithSimilarScore((Boolean)value);
         } else if(value != null){
             linkerConfig.setIncludeSuggestionsWithSimilarScore(Boolean.parseBoolean(value.toString()));
         }

         //init MIN_FOUND_TOKENS
         value = configuration.get(MIN_FOUND_TOKENS);
         Integer minFoundTokens;
         if(value instanceof Integer){
             minFoundTokens = (Integer)value;
         } else if(value != null){
             try {
                 minFoundTokens = Integer.valueOf(value.toString());
             } catch(NumberFormatException e){
                 throw new ConfigurationException(MIN_FOUND_TOKENS, "Values MUST be valid Integer values > 0",e);
             }
         } else {
             minFoundTokens = null;
         }
         if(minFoundTokens != null){
             if(minFoundTokens < 1){
                 throw new ConfigurationException(MIN_FOUND_TOKENS, "Values MUST be valid Integer values > 0");
             }
             linkerConfig.setMinFoundTokens(minFoundTokens);
         }

         //init Label Score parameters
         value = configuration.get(MIN_LABEL_SCORE);
         Double minLabelMatchFactor = null;
         if(value instanceof Number){
             minLabelMatchFactor = Double.valueOf(((Number)value).doubleValue());
         } else if(value != null){
             try {
                 minLabelMatchFactor = Double.valueOf(value.toString());
             } catch (NumberFormatException e) {
                 throw new ConfigurationException(MIN_LABEL_SCORE, "Parsed value '"
                         +value+"' is not an valid double!");
             }
         }
         try {
             linkerConfig.setMinLabelScore(minLabelMatchFactor);
         } catch (IllegalArgumentException e){
             throw new ConfigurationException(MIN_LABEL_SCORE, e.getMessage());
         }
         value = configuration.get(MIN_TEXT_SCORE);
         Double minTextMatchFactor = null;
         if(value instanceof Number){
             minTextMatchFactor = Double.valueOf(((Number)value).doubleValue());
         } else if(value != null){
             try {
                 minTextMatchFactor = Double.valueOf(value.toString());
             } catch (NumberFormatException e) {
                 throw new ConfigurationException(MIN_TEXT_SCORE, "Parsed value '"
                         +value+"' is not an valid double!");
             }
         }
         try {
             linkerConfig.setMinTextScore(minTextMatchFactor);
         } catch (IllegalArgumentException e){
             throw new ConfigurationException(MIN_TEXT_SCORE, e.getMessage());
         }
         value = configuration.get(MIN_MATCH_FACTOR);
         Double minMatchFactor = null;
         if(value instanceof Number){
             minMatchFactor = Double.valueOf(((Number)value).doubleValue());
         } else if(value != null){
             try {
                 minMatchFactor = Double.valueOf(value.toString());
             } catch (NumberFormatException e) {
                 throw new ConfigurationException(MIN_MATCH_FACTOR, "Parsed value '"
                         +value+"' is not an valid double!");
             }
         }
         try {
             linkerConfig.setMinMatchScore(minMatchFactor);
         } catch (IllegalArgumentException e){
             throw new ConfigurationException(MIN_MATCH_FACTOR, e.getMessage());
         }

         value = configuration.get(MIN_CHUNK_MATCH_SCORE);
         Double minChunkMatchScore = null;
         if(value instanceof Number){
             minChunkMatchScore = Double.valueOf(((Number)value).doubleValue());
         } else if(value != null){
             try {
                 minChunkMatchScore = Double.valueOf(value.toString());
             } catch (NumberFormatException e) {
                 throw new ConfigurationException(MIN_CHUNK_MATCH_SCORE, "Parsed value '"
                         +value+"' is not an valid double!");
             }
         }
         try {
             linkerConfig.setMinChunkMatchScore(minChunkMatchScore);
         } catch (IllegalArgumentException e){
             throw new ConfigurationException(MIN_CHUNK_MATCH_SCORE, e.getMessage());
         }

         //init LEMMA_MATCHING_STATE
         value = configuration.get(LEMMA_MATCHING_STATE);
         if(value instanceof Boolean){
             linkerConfig.setLemmaMatchingState((Boolean)value);
         } else if (value != null){
             linkerConfig.setLemmaMatchingState(Boolean.parseBoolean(value.toString()));
         }

         //init MAX_SEARCH_TOKENS
         value = configuration.get(MAX_SEARCH_TOKENS);
         Integer maxSearchTokens;
         if(value instanceof Integer){
             maxSearchTokens = (Integer)value;
         } else if (value != null){
             try {
                 maxSearchTokens = Integer.valueOf(value.toString());
             } catch(NumberFormatException e){
                 throw new ConfigurationException(MAX_SEARCH_TOKENS, "Values MUST be valid Integer values > 0",e);
             }
         } else {
             maxSearchTokens = null;
         }
         if(maxSearchTokens != null){
             if(maxSearchTokens < 1){
                 throw new ConfigurationException(MAX_SEARCH_TOKENS, "Values MUST be valid Integer values > 0");
             }
             linkerConfig.setMaxSearchTokens(maxSearchTokens);
         }

         //init the MAX_SEARCH_TOKEN_DISTANCE
         value = configuration.get(MAX_SEARCH_TOKEN_DISTANCE);
         Integer maxSearchDistance;
         if(value instanceof Integer){
             maxSearchDistance = (Integer)value;
         } else if (value != null){
             try {
                 maxSearchDistance = Integer.valueOf(value.toString());
             } catch(NumberFormatException e){
                 throw new ConfigurationException(MAX_SEARCH_TOKEN_DISTANCE, "Values MUST be valid Integer values > 0",e);
             }
         } else {
             maxSearchDistance = null;
         }
         if(maxSearchDistance != null){
             if(maxSearchDistance < 1){
                 throw new ConfigurationException(MAX_SEARCH_TOKEN_DISTANCE, "Values MUST be valid Integer values > 0");
             }
             linkerConfig.setMaxSearchDistance(maxSearchDistance);
         }

         //init the REDIRECT_PROCESSING_MODE
         value = configuration.get(REDIRECT_MODE);
         if(value != null){
             try {
                 linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.valueOf(value.toString()));
             } catch (IllegalArgumentException e) {
                 throw new ConfigurationException(REDIRECT_MODE, "Values MUST be one of "+
                     Arrays.toString(RedirectProcessingMode.values()));
             }
         }

         //init the DEFAULT_LANGUAGE
         value = configuration.get(DEFAULT_MATCHING_LANGUAGE);
         if(value != null){
             String defaultLang = value.toString().trim();
             if(defaultLang.isEmpty()){
                 linkerConfig.setDefaultLanguage(null);
             } else if(defaultLang.length() == 1){
                 throw new ConfigurationException(DEFAULT_MATCHING_LANGUAGE, "Illegal language code '"+
                     defaultLang+"'! Language Codes MUST BE at least 2 chars long.");
             } else {
                 linkerConfig.setDefaultLanguage(defaultLang);
             }
         }

         // init MIN_TOKEN_MATCH_FACTOR
         value=configuration.get(MIN_TOKEN_SCORE);
         float minTokenMatchFactor;
         if(value instanceof Number){
             minTokenMatchFactor = ((Number)value).floatValue();
         } else if(value != null){
             try {
                 minTokenMatchFactor = Float.valueOf(value.toString());
             } catch (NumberFormatException e) {
                 throw new ConfigurationException(MIN_TOKEN_SCORE,
                     "Unable to parse the minimum token match factor from the parsed value "+value,e);
             }
             if(minTokenMatchFactor < 0){
                 minTokenMatchFactor = EntityLinkerConfig.DEFAULT_MIN_TOKEN_SCORE;
             }
         } else {
             minTokenMatchFactor = EntityLinkerConfig.DEFAULT_MIN_TOKEN_SCORE;
         }
         if(minTokenMatchFactor == 0 || minTokenMatchFactor > 1){
             throw new ConfigurationException(MIN_TOKEN_SCORE,
                 "The minimum token match factor MUST be > 0 and <= 1 (negative values for the default)");
         }
         linkerConfig.setMinTokenMatchFactor(minTokenMatchFactor);

         //init type mappings
         value = configuration.get(TYPE_MAPPINGS);
         if(value instanceof String[]){ //support array
             value = Arrays.asList((String[])value);
         } else if(value instanceof String) { //single value
             value = Collections.singleton(value);
         }
         if(value instanceof Collection<?>){ //and collection
             log.info("Init Type Mappings");
             configs :
             for(Object o : (Iterable<?>)value){
                 if(o != null){
                     StringBuilder usage = new StringBuilder("useages: ");
                     usage.append("a: '{uri}' short for {uri} > {uri} | ");
                     usage.append("b: '{source1};{source2};..;{sourceN} > {target}'");
                     String[] config = o.toString().split(">");
                     if(config[0].isEmpty()){
                         log.warn("Invalid Type Mapping Config '{}': Missing Source Type ({}) -> ignore this config",
                             o,usage);
                         continue configs;
                     }
                     String[] sourceTypes = config[0].split(";");
                     if(sourceTypes.length > 1 && (config.length < 2 || config[1].isEmpty())){
                         log.warn("Invalid Type Mapping Config '{}': Missing Target Type '{}' ({}) -> ignore this config",
                             o,usage);
                         continue configs;
                     }
                     String targetType = config.length < 2 ? sourceTypes[0] : config[1];
                     targetType = getFullName(prefixService,TYPE_MAPPINGS,targetType.trim()); //support for ns:localName
                     try { //validate
                         new URI(targetType);
                     } catch (URISyntaxException e) {
                         log.warn("Invalid URI '{}' in Type Mapping Config '{}' -> ignore this config",
                             sourceTypes[0],o);
                         continue configs;
                     }
                     IRI targetUri = new IRI(targetType);
                     for(String sourceType : sourceTypes){
                         if(!sourceType.isEmpty()){
                             sourceType = getFullName(prefixService,TYPE_MAPPINGS,sourceType.trim()); //support for ns:localName
                             try { //validate
                                 new URI(sourceType);
                                 IRI old = linkerConfig.setTypeMapping(sourceType, targetUri);
                                 if(old == null){
                                     log.info(" > add type mapping {} > {}", sourceType,targetType);
                                 } else {
                                     log.info(" > set type mapping {} > {} (old: {})",
                                         new Object[]{sourceType,targetType,old.getUnicodeString()});
                                 }
                             } catch (URISyntaxException e) {
                                 log.warn("Invalid URI '{}' in Type Mapping Config '{}' -> ignore this source type",
                                     sourceTypes[0],o);
                             }
                         }
                     }
                 }
             }
         } else {
             log.debug("No Type mappings configured");
         }
         //dereference entities
         value = configuration.get(DEREFERENCE_ENTITIES);
         if(value instanceof Boolean){
             linkerConfig.setDereferenceEntitiesState(((Boolean)value).booleanValue());
         } else if(value != null && !value.toString().isEmpty()){
             linkerConfig.setDereferenceEntitiesState(Boolean.parseBoolean(value.toString()));
         }
         if(linkerConfig.isDereferenceEntitiesEnabled()){
             log.warn("DereferenceEntities is deprecated for the Engine. Please use the "
                 + "EntityhubDereferenceEngine instead (see STANBOL-1223 for details)");
         }
         if(linkerConfig.isDereferenceEntitiesEnabled()){
             value = configuration.get(DEREFERENCE_ENTITIES_FIELDS);
             if(value instanceof String[]){
                 for(String field : (String[])value){
                     if(field != null && !field.isEmpty()){
                         linkerConfig.getDereferencedFields().add(
                             new IRI(getFullName(prefixService,DEREFERENCE_ENTITIES_FIELDS,field)));
                     }
                 }
             } else if(value instanceof Collection<?>){
                 for(Object field : (Collection<?>)value){
                     if(field != null && !field.toString().isEmpty()){
                         linkerConfig.getDereferencedFields().add(
                             new IRI(getFullName(prefixService,DEREFERENCE_ENTITIES_FIELDS,field.toString())));
                     }
                 }
             } else if(value instanceof String){
                 if(!value.toString().isEmpty()){
                     linkerConfig.getDereferencedFields().add(
                         new IRI(getFullName(prefixService,DEREFERENCE_ENTITIES_FIELDS,value.toString())));
                 }
             } else if(value != null){
                 throw new ConfigurationException(DEREFERENCE_ENTITIES_FIELDS,
                     "Dereference Entities_Fields MUST BE parsed as String[], Collection<String> or "
                     + "String (single value). The actual value '"+value+"'(type: '"+value.getClass()
                     + "') is NOT supported");
             }else { //value == null
             	log.debug("No deference fields for entity configured");
             }
         }

         //init USE ENTITY RANKINGS (STANBOL-1030)
         value = configuration.get(RANK_EQUAL_SCORES_BASED_ON_ENTITY_RANKINGS);
         if(value instanceof Boolean){
             linkerConfig.setRankEqualScoresBasedOnEntityRankings(((Boolean)value).booleanValue());
         } else if (value != null){
             linkerConfig.setRankEqualScoresBasedOnEntityRankings(
                 Boolean.parseBoolean(value.toString()));
         } else {
             linkerConfig.setRankEqualScoresBasedOnEntityRankings(
                 DEFAULT_RANK_EQUAL_SCORES_BASED_ON_ENTITY_RANKINGS);
         }
         //init WRITE ENTITY RANKINGS (STANBOL-1292)
         value = configuration.get(WRITE_ENTITY_RANKINGS);
         if(value instanceof Boolean){
             linkerConfig.setWriteEntityRankings(((Boolean)value).booleanValue());
         } else if (value != null){
             linkerConfig.setWriteEntityRankings(Boolean.parseBoolean(value.toString()));
         } else {
             linkerConfig.setWriteEntityRankings(DEFAULT_WRITE_ENTITY_RANKINGS);
         }

         //init the list of whitelisted/blacklisted types
         value = configuration.get(ENTITY_TYPES);
         List<String> entityTypesConfig; //first collect and cleanup the config
         if(value == null){
             entityTypesConfig = Collections.emptyList();
         } else if(value instanceof String[]){
             entityTypesConfig = new ArrayList<String>();
             for(String type : (String[])value){
                 if(type != null){
                     type = type.trim();
                     if(!type.isEmpty()){
                         entityTypesConfig.add(type);
                     }
                 }
             }
         } else if(value instanceof Collection<?>){
             entityTypesConfig = new ArrayList<String>();
             for(Object o : (Collection<Object>)value){
                 if(o != null){
                     String type = o.toString().trim();
                     if(!type.isEmpty()){
                         entityTypesConfig.add(type);
                     }
                 }
             }
         } else if(value instanceof String){ //support parsing single values as string
             String type = value.toString().trim();
             if(type.isEmpty()){
                 entityTypesConfig = Collections.emptyList();
             } else {
                 entityTypesConfig = Collections.singletonList(type);
             }
         } else {
             throw new ConfigurationException(ENTITY_TYPES, "The list of ignored types (if present) "
                 + "MUST BE a collection or a string array (present: "+value.getClass().getName()+")!");
         }
         //apply the config
         for(int i = 0; i < entityTypesConfig.size(); i++){
             String type = entityTypesConfig.get(i);
             if("*".equals(type)){
                 linkerConfig.setDefaultWhitelistTypes(Boolean.TRUE);
             } else {
                 boolean blacklisted = type.charAt(0) == '!';
                 if(blacklisted && type.length() < 2){
                     throw new ConfigurationException(ENTITY_TYPES, "The list of whitelisted/blacklisted "
                         + "MUST NOT contain '!' (configured: "+entityTypesConfig+")!");
                 }
                 IRI uri = new IRI(getFullName(prefixService, ENTITY_TYPES,
                     blacklisted ? type.substring(1) : type));
                 if(blacklisted){
                     linkerConfig.addBlacklistType(uri, Integer.valueOf(i));
                 } else {
                     linkerConfig.addWhitelistType(uri, Integer.valueOf(i));
                 }
             }
         }
     }
     /**
      * Gets the full URI for the parsed value by using the parsed {@link NamespacePrefixService}
      * @param prefixService the {@link NamespacePrefixService} used to lookup the full URI
      * @param property the config property (just used to create a {@link ConfigurationException}
      * in case the used namespace prefix is unknown by the namespace prefix service)
      * @param value the configured value (might be both a short or a full URI)
      * @return the full URI
      * @throws ConfigurationException
      */
     private static String getFullName(NamespacePrefixService prefixService, String property,String value) throws ConfigurationException {
         String prefix = NamespaceMappingUtils.getPrefix(value);
         if(prefixService == null){
             if(prefix != null){
                 throw new ConfigurationException(property, "'{prefix}:{localname}' tpye configurations "
                     + "are not supported if no "+NamespacePrefixService.class.getSimpleName()
                     + "is present (configured value='"+value+"')!");
             } else {
                 return value;
             }
         } else {
             String uri = prefixService.getFullName(value);
             if(uri == null){
                 throw new ConfigurationException(property, "The prefix '"+prefix
                         + "' as used by the configured value '"+value+"' is unknow to the"
                         + NamespacePrefixService.class.getSimpleName());
             }
             log.debug("mapped '{}' -> '{}'",value,uri);
             return uri;
         }
     }

     /**
      * Getter for the uri of the field used for the names in the taxonomy
      * (e.g. rdfs:label, skos:prefLabel). Needs to return the full URI
      * @return the field used for the names of in the Taxonomy.
      */
     public final IRI getNameField() {
         return nameField;
     }
     /**
      * Setter for the uri of the field used for the names in the taxonomy
      * (e.g. rdfs:label, skos:prefLabel).
      * @param nameField the nameField to set
      */
     public final void setNameField(IRI nameField) {
         this.nameField = nameField;
         __selectedFields = null;
     }
     /**
      * Getter for the dereferencedFields. This is a read- and write-able
      * set that allows to configure the fields that should be dereferenced
      * @return
      */
     public final Set<IRI> getDereferencedFields(){
         return dereferencedFields;
     }
     /**
      * The field used to follow redirects (typically rdf:seeAlso)
      * @return the redirect field
      */
     public final IRI getRedirectField() {
         return redirectField;
     }
     /**
      * The field used to follow redirects (typically rdf:seeAlso)
      * @param redirectField the redirectField to set
      */
     public final void setRedirectField(IRI redirectField) {
         this.redirectField = redirectField;
         __selectedFields = null;
     }
     /**
      * The field used to lookup the types (typically rdf:type)
      * @return the field name used to lookup types
      */
     public final IRI getTypeField() {
         return typeField;
     }
     /**
      * The field used to lookup the types (typically rdf:type)
      * @param typeField the typeField to set
      */
     public final void setTypeField(IRI typeField) {
         this.typeField = typeField;
         __selectedFields = null;
     }
     /**
      * Setter for the maximum number of suggestion returned.
      * @param maxSuggestions the maxSuggestions to set
      */
     public void setMaxSuggestions(int maxSuggestions) {
         this.maxSuggestions = maxSuggestions;
     }
     /**
      * Getter for the maximum number of suggestion returned.
      * @return the maxSuggestions
      */
     public int getMaxSuggestions() {
         return maxSuggestions;
     }

     public boolean isIncludeSuggestionsWithSimilarScore(){
         return includeSuggestionsWithSimilarScore;
     }
     public void setIncludeSuggestionsWithSimilarScore(Boolean state){
         if(state == null){
             includeSuggestionsWithSimilarScore = DEFAULT_INCLUDE_SIMILAR_SCORE;
         } else {
             includeSuggestionsWithSimilarScore = state;
         }
     }

     /**
      * Setter for the minimum number of Tokens (of the content) that MUST match
      * with a {@link EntitySearcher#getNameField() label} of a
      * {@link EntitySearcher#lookup(java.util.List, String...) concept of the taxonomy}
      * so that it is {@link Suggestion suggested} even if the match is only
      * {@link MATCH#PARTIAL}. Entities that match less than that are only included
      * if a label is an {@link MATCH#EXACT EXACT} match with the current position
      * in the text.
      * @param minFoundTokens the minFoundTokens to set
      */
     public void setMinFoundTokens(int minFoundTokens) {
         this.minFoundTokens = minFoundTokens;
     }
     /**
      * Getter for the minimum number of Tokens (of the content) that MUST match
      * with a {@link EntitySearcher#getNameField() label} of a
      * {@link EntitySearcher#lookup(java.util.List, String...) concept of the taxonomy}
      * so that it is {@link Suggestion suggested} even if the match is only
      * {@link MATCH#PARTIAL}. Entities that match less than that are only included
      * if a label is an {@link MATCH#EXACT EXACT} match with the current position
      * in the text.
      * @return the minFoundTokens
      */
     public int getMinFoundTokens() {
         return minFoundTokens;
     }
     /**
      * Getter for the  maximum number of tokens parsed to
      * {@link EntitySearcher#lookup(java.util.List, String...)}
      * @return the maxSearchTokens
      */
     public final int getMaxSearchTokens() {
         return maxSearchTokens;
     }
     /**
      * The maximum number of tokens parsed to
      * {@link EntitySearcher#lookup(java.util.List, String...)}. This is NOT the
      * maximum number of Tokens mapped for Entities returned by such queries.<p>
      * In case {@link Chunk}s are available in the parsed {@link AnalysedText}
      * searches can be scoped by such chunks. However if no chunks are available,
      * than this value is used to collect this number of words in the text.<p>
      * The {@link #DEFAULT_MAX_SEARCH_TOKENS default value} of <code>2</code>
      * should be ok in most cases.
      * @param maxSearchTokens the maxSearchTokens to set
      */
     public final void setMaxSearchTokens(int maxSearchTokens) {
         if(maxSearchTokens == 0){
             this.maxSearchTokens = DEFAULT_MAX_SEARCH_TOKENS;
         } else if (maxSearchTokens < 0){
             throw new IllegalArgumentException("The maxSearchToken value MUST BE >= 0 (0 for setting the default)");
         } else {
             this.maxSearchTokens = maxSearchTokens;
         }
     }
     /**
      * Getter for the case sensitive matching state
      * @return the state
      */
     public boolean isCaseSensitiveMatching() {
         return caseSensitiveMatchingState;
     }
     /**
      * Setter for the case sensitive matching state
      * @param caseSensitiveMatchingState the state
      */
     public void setCaseSensitiveMatchingState(boolean state) {
         this.caseSensitiveMatchingState = state;
     }
     /* REMOVED because getTypemappings.remove(conceptType) can be used anyway
      * Removes the mapping for the parsed concept type
      * @param conceptType the concept type to remove the mapping
      * @return the previously mapped dc:type value or <code>null</code> if
      * no mapping for the parsed concept type was present
     public IRI removeTypeMapping(IRI conceptType){
         return typeMappings.remove(conceptType);
     }
      */
     /**
      *
      * @param conceptType the type of the concept or <code>null</code> to
      * add the default dc:type mapping. See also {@link #setDefaultDcType(IRI)}
      * @param dcType the dc:type for the parsed concept type
      * @return the previously mapped dc:type value if an existing mapping
      * was updated or <code>null</code> if a new mapping was added.
      */
     public IRI setTypeMapping(String conceptType, IRI dcType){
         if(dcType == null) {
             return typeMappings.remove(conceptType == null ? null : new IRI(conceptType));
         } else {
             if(conceptType == null){ //handle setting of the default dc:type value
                 IRI oldDefault = getDefaultDcType();
                 setDefaultDcType(dcType);
                 return oldDefault;
             }
             return typeMappings.put(new IRI(conceptType), dcType);
         }
     }

     /**
      * Setter for the default dc:type of linked entities if for none of the
      * types of the suggestions a {@link #getTypeMappings()} exists. Set this
      * to <code>null</code> to specify that no dc:type should be set in such
      * cases.
      * @param defaultDcType the defaultDcType to set
      */
     public void setDefaultDcType(IRI defaultDcType) {
         this.defaultDcType = defaultDcType;
     }
     /**
      * The default type for Entities if no {@link #getTypeMappings() type mapping}
      * is present. <code>null</code> means that no type should be set if no
      * explicit mapping exists
      * @return the defaultDcType
      */
     public IRI getDefaultDcType() {
         return defaultDcType;
     }
     /**
      * Setter for the mode on how to deal with redirects
      * @param redirectProcessingMode the redirectProcessingMode to set
      */
     public void setRedirectProcessingMode(RedirectProcessingMode redirectProcessingMode) {
         this.redirectProcessingMode = redirectProcessingMode;
         __selectedFields = null;
     }
     /**
      * Getter for the mode how to deal with redirects
      * @return the redirectProcessingMode
      */
     public RedirectProcessingMode getRedirectProcessingMode() {
         return redirectProcessingMode;
     }
     /**
      * Getter for the read only mappings of type mappings
      * @return the type mappings (read only)
      */
     public Map<IRI,IRI> getTypeMappings() {
         return unmodTypeMappings;
     }
     /**
      * Setter for the language of labels searched in addition to the current
      * language of the text. Setting this to <code>null</code> (also the default)
      * will cause to search labels without any defined language.<p>
      * Changing this makes only sense if a dataset (such as dbpedia.org) adds
      * language tags to labels even if they are typically used in any language.
      * @param defaultLanguage the default language
      */
     public void setDefaultLanguage(String defaultLanguage) {
         this.defaultLanguage = defaultLanguage;
     }
     /**
      * Getter for the language of labels searched in addition to the current
      * language of the text.
      * @return the default language
      */
     public String getDefaultLanguage() {
         return defaultLanguage;
     }
     /**
      * Getter for the maximum number of non-processable tokens that are
      * allowed to not match before no further tokens are matched against a label
      * of an Entity. <p>
      * This allows e.g. to match "Dr. Richard Dogles" with "Dr Richard Dogles"
      * as '.' is a non-processable token in the text that is missing in the
      * label.
      * @return the maxNotFound
      */
     public int getMaxNotFound() {
         return maxNotFound;
     }
     /**
      * Setter for the maximum number of non-processable tokens that are
      * allowed to not match before no further tokens are matched against a label
      * of an Entity. <p>
      * This allows e.g. to match "Dr. Richard Dogles" with "Dr Richard Dogles"
      * as '.' is a non-processable token in the text that is missing in the
      * label.
      * @param maxNotFound the maxNotFound to set
      */
     public void setMaxNotFound(int maxNotFound) {
         if(maxNotFound < 0){
             this.maxNotFound = DEFAULT_MAX_NOT_FOUND;
         } else {
             this.maxNotFound = maxNotFound;
         }
     }
     /**
      * Getter for the minimum token match Factor.
      * If Tokens match is determined by comparing them using some algorithm.
      * Results need to be in the range [0..1]. This factor defines the minimum
      * similarity value so that a match is assumed. Not that this factor only
      * is used for filtering out non-matching tokens. The similarity value will
      * still used for calculating the confidence
      * @return the minTokenMatchFactor
      */
     public float getMinTokenMatchFactor() {
         return minTokenMatchFactor;
     }
     /**
      * Setter for the minimum token match Factor.
      * If Tokens match is determined by comparing them using some algorithm.
      * Results need to be in the range [0..1]. This factor defines the minimum
      * similarity value so that a match is assumed. Not that this factor only
      * is used for filtering out non-matching tokens. The similarity value will
      * still used for calculating the confidence
      * @param minTokenMatchFactor the minTokenMatchFactor to set
      */
     public void setMinTokenMatchFactor(float minTokenMatchFactor) {
         if(minTokenMatchFactor < 0 ){
             this.minTokenMatchFactor = DEFAULT_MIN_TOKEN_SCORE;
         } else if(minTokenMatchFactor == 0 || minTokenMatchFactor > 1){
             throw new IllegalArgumentException("minimum Token Match Facter MUST be > 0 <= 1 (parsed: "+minTokenMatchFactor+")!");
         } else {
             this.minTokenMatchFactor = minTokenMatchFactor;
         }
     }
     /**
      * Getter for the maximum distance tokens are
      * considered to be used (in addition to the currently processed on)
      * for searches of Entities.
      * @return the maximum search token distance
      */
     public int getMaxSearchDistance() {
         return maxSearchDistance;
     }
     /**
     /**
      * Getter for the maximum distance tokens are
      * considered to be used (in addition to the currently processed on)
      * for searches of Entities.
      * @param maxSearchDistance the maximum search token distance. If
      * values &lt;= 0 are parsed the value is set to
      *  {@link #DEFAULT_MAX_SEARCH_DISTANCE}
      */
     public void setMaxSearchDistance(int maxSearchDistance) {
         if(maxSearchDistance <= 0){
             maxSearchDistance = DEFAULT_MAX_SEARCH_DISTANCE;
         } else {
             this.maxSearchDistance = maxSearchDistance;
         }
     }
     public boolean isLemmaMatching() {
         return lemmaMatchingState;
     }

     public void setLemmaMatchingState(Boolean lemmaMatchingState) {
         if(lemmaMatchingState == null){
             this.lemmaMatchingState = DEFAULT_LEMMA_MATCHING_STATE;
         } else {
             this.lemmaMatchingState = lemmaMatchingState;
         }
     }
     /**
      * The minimum LabelScore required to suggest an Entity.<p>
      * The "Label Score" [0..1] represents how much of the
      * Label of an Entity matches with the Text. It compares the number
      * of Tokens of the Label with the number of Tokens matched to the
      * Text. Not exact matches for Tokens, or if the Tokens within the
      * label do appear in an other order than in the text do also
      * reduce this score.
      * @return the minimum required LabelScore
      */
     public double getMinLabelScore() {
         return minLabelScore;
     }
     /**
      * Setter for the minimum label score for suggested entities
      * @param score the score [0..1] or <code>null</code> to reset
      * to the default.
      */
     public void setMinLabelScore(Double score){
         if(score == null){
             minLabelScore = DEFAULT_MIN_LABEL_SCORE;
         } else if(score > 1 || score < 0) {
             throw new IllegalArgumentException("The parsed MinLabelScore '"
                 + score + "' MUST BE in the range [0..1]!");
         } else {
             minLabelScore = score;
         }
     }
     /**
      * The minimum Text Score required to suggest an Entity.<p>
      * The "Text Score" [0..1] represents how well the
      * Label of an Entity matches to the selected Span in the Text.
      * It compares the number of matched {@link Token} from
      * the label with the number of Tokens enclosed by the Span
      * in the Text an Entity is suggested for. Not exact matches
      * for Tokens, or if the Tokens within the label do appear in
      * an other order than in the text do also reduce this score
      * @return the minimum required Text Score for labels of suggested
      * Entities
      */
     public double getMinTextScore() {
         return minTextScore;
     }
     /**
      * Setter for the minimum text score for suggested entities
      * @param score the score [0..1] or <code>null</code> to reset
      * to the default.
      */
     public void setMinTextScore(Double score){
         if(score == null){
             minTextScore = DEFAULT_MIN_TEXT_SCORE;
         } else if(score > 1 || score < 0) {
             throw new IllegalArgumentException("The parsed MinTextScore '"
                 + score + "' MUST BE in the range [0..1]!");
         } else {
             minTextScore = score;
         }
     }
     /**
      * Getter for the minimum amount of matchable {@link Token}s an Entity must match
      * within an {@link Chunk} to be considered (see STANBOL-1211).<p>
      * The default is <code>&gt;0.5</code> to omit matches for a single token
      * in a chunk - typically a noun phrase - including two words.
      * @return the minimum chunk match score.
      */
     public double getMinChunkMatchScore() {
         return minChunkMatchScore;
     }
     /**
      * Setter for the minimum amount of matchable {@link Token}s an Entity must match
      * within an {@link Chunk} to be considered (see STANBOL-1211).<p>
      * The default is <code>&gt;0.5</code> to omit matches for a single token
      * in a chunk - typically a noun phrase - including two words.
      * @param minChunkMatchScore the minimum chunk match score or <code>null</code>
      * to reset to the default value
      */
     public void setMinChunkMatchScore(Double minChunkMatchScore) {
         if(minChunkMatchScore == null){
             this.minChunkMatchScore = DEFAULT_MIN_CHUNK_MATCH_SCORE;
         } else if(minChunkMatchScore < 0.0 || minChunkMatchScore > 1.0){
             throw new IllegalArgumentException("The minChunkMatchScore MUST BE "
                 + "in the range [0..1] (parsed: "+minChunkMatchScore+")!");
         } else {
             this.minChunkMatchScore = minChunkMatchScore;
         }
     }
     /**
      * Getter for the minimum match Score of Entity labels against the
      * Text.<p>
      * This is the product of the {@link #getMinLabelScore()} with the
      * {@link #getMinTextScore()} - meaning that this value represents
      * both how well the label matches the text and how much of the
      * label is matched with the text.
      * @return
      */
     public double getMinMatchScore() {
         return minMatchScore;
     }
     /**
      * Setter for the minimum text score for suggested entities
      * @param score the score [0..1] or <code>null</code> to reset
      * to the default.
      */
     public void setMinMatchScore(Double score){
         if(score == null){
             minMatchScore = DEFAULT_MIN_MATCH_SCORE;
         } else if(score > 1 || score < 0) {
             throw new IllegalArgumentException("The parsed MinMatchScore '"
                 + score + "' MUST BE in the range [0..1]!");
         } else {
             minMatchScore = score;
         }
     }
     /**
      * Setter for the dereference entities state.
      * @param state the state or <code>null</code> to set the
      * default.
      * @deprecated Use a Dereference Engine instead (STANBOL-336)
      */
     public void setDereferenceEntitiesState(Boolean state) {
         if(state == null){
             this.dereferenceEntitiesState = DEFAULT_DEREFERENCE_ENTITIES_STATE;
         } else {
             this.dereferenceEntitiesState = state;
         }
         __selectedFields = null;
     }
     /**
      * Getter for the dereference entities state
      * @return <code>true</code> if enabled otherwise <code>false</code>
      * @deprecated Use a Dereference Engine instead (STANBOL-336)
      */
     public boolean isDereferenceEntitiesEnabled(){
         return dereferenceEntitiesState;
     }

     /**
      * Getter for all fields that need to be selected based on the
      * current EntityLinker configuration. This includes<ul>
      * <li> {@link #getNameField()}
      * <li> {@link #getTypeField()}
      * <li> {@link #getRedirectField()} if {@link #getRedirectProcessingMode()}
      * != {@link RedirectProcessingMode#IGNORE}
      * <li> {@link #getDereferencedFields()} if {@link #isDereferenceEntitiesEnabled()}
      * </ul>
      * @return the selected fields for queries against the linked vocabulary.
      * @deprecated Use a Dereference Engine instead (STANBOL-336)
      */
     public Set<IRI> getSelectedFields() {
         if(__selectedFields == null){
             Set<IRI> fields = new HashSet<IRI>();
             fields.add(nameField);
             fields.add(typeField);
             if(redirectProcessingMode != RedirectProcessingMode.IGNORE){
                 fields.add(redirectField);
             }
             if(dereferenceEntitiesState){
                 fields.addAll(dereferencedFields);
             }
             __selectedFields = Collections.unmodifiableSet(fields);
             return __selectedFields;
         } else {
             return __selectedFields;
         }
     }
     /**
      * If suggested entities that would have the same score (e.g. 1.0 - for a
      * perfect match) should have their score slightly adapted so that they
      * are sorted based on their entity ranking.<p>
      * The entity ranking is defined as the importance (popularity, connectivity, ...)
      * of an entity within the knowledge base
      * @return the state
      */
     public boolean isRankEqualScoresBasedOnEntityRankings() {
         return rankEqualScoresBasedOnEntityRankings;
     }
     /**
      * Setter for the state if suggested  that would have the same score (e.g. 1.0 - for a
      * perfect match) should have their score slightly adapted so that they
      * are sorted based on their entity ranking.<p>
      * The entity ranking is defined as the importance (popularity, connectivity, ...)
      * of an entity within the knowledge base
      * @param state the state
      */
     public void setRankEqualScoresBasedOnEntityRankings(boolean state) {
         this.rankEqualScoresBasedOnEntityRankings = state;
     }

     /**
      * getter for the state if <code>fise:entity-ranking</code> values should
      * be added to <code>fise:EntityAnnotation</code> (if rankings are available
      * for the linked datasets
      * @return the write entity ranking state
      */
     public boolean isWriteEntityRankings() {
         return writeEntityRankings;
     }
     /**
      * Setter for the {@link #WRITE_ENTITY_RANKINGS} state.
      * @param writeEntityRankings the state. Parse <code>null</code> to set
      * to the default
      */
     public void setWriteEntityRankings(Boolean writeEntityRankings) {
         if(writeEntityRankings == null){
             this.writeEntityRankings = DEFAULT_WRITE_ENTITY_RANKINGS;
         } else {
             this.writeEntityRankings = writeEntityRankings;
         }
     }


     /**
      * Adds an type to the blacklist
      */
     public final void addBlacklistType(IRI type, Integer order) {
         if(type != null && order != null){
             blacklistedTypes.put(type, order);
         }
     }
     /**
      * Adds an type to the blacklist
      */
     public final void addWhitelistType(IRI type, Integer order) {
         if(type != null && order != null){
             whitelistedTypes.put(type, order);
         }
     }

     public final void setDefaultWhitelistTypes(Boolean state){
         this.defaultWhitelistTypes = state;
     }


     public final boolean isDefaultWhitelistTypes(){
         if(Boolean.FALSE.equals(defaultWhitelistTypes) && whitelistedTypes.isEmpty()){
             //illegal configuration ... ignore
             return true;
         } else {
             return defaultWhitelistTypes != null ? defaultWhitelistTypes.booleanValue() :
                 whitelistedTypes.isEmpty(); //if whitelist is empty ... true
         }
     }

     /**
      * @param ignoredTypes the ignoredTypes to set
      */
     public final Map<IRI, Integer> getBlacklistedTypes() {
         return blacklistedTypes;
     }


     /**
      * @param ignoredTypes the ignoredTypes to set
      */
     public final Map<IRI, Integer> getWhitelistedTypes() {
         return whitelistedTypes;
     }
     /**
      * checks if EntityType filtering is active or not
      */
     public final boolean isEntityTypeFilteringActive(){
         if(whitelistedTypes.isEmpty() && blacklistedTypes.isEmpty()){
             return false;
         } else {
             return true;
         }
     }

 }