| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| stanbol.enhancer.engine.name.name=Name |
| stanbol.enhancer.engine.name.description=The name of the enhancement engine as \ |
| used in the RESTful interface '/engine/<name>' |
| |
| service.ranking.name=Ranking |
| service.ranking.description=If two enhancement engines with the same name are active the \ |
| one with the higher ranking will be used to process parsed content items. |
| |
| |
| #=============================================================================== |
| #Properties and Options used to configure |
| #=============================================================================== |
| org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine.name=Apache \ |
| Stanbol Enhancer Engine: Keyword Linking |
| org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine.description=An engine \ |
| that extracts keywords present within a Controlled Vocabulary mentioned within parsed ContentItem |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.referencedSiteId.name=Referenced Site |
| org.apache.stanbol.enhancer.engines.keywordextraction.referencedSiteId.description=The ID of the \ |
| Entityhub Referenced Site holding the Controlled Vocabulary (e.g. a taxonomy or just a set of \ |
| named entities) |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer.name=Use Simple Tokenizer |
| org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer.description=This allows to \ |
| deactivate the use of Language specific Tokenizers. For most European languages the Simple Tokenizer \ |
| is sufficient. |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.minSearchTokenLength.name=Min Token Length |
| org.apache.stanbol.enhancer.engines.keywordextraction.minSearchTokenLength.description=The minimum \ |
| length of Tokens used to lookup Entities within the Controlled Vocabulary. This parameter is ignored \ |
| in case a POS (Part of Speech) tagger is available for the language of the parsed content. |
| |
| #org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker.name=Use Chunker |
| #org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker.description=This allows to enable/disable the use of a Chunker. Even if enabled it will only be used if one is present for the language of the content. |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.nameField.name=Label Field |
| org.apache.stanbol.enhancer.engines.keywordextraction.nameField.description=The field used to match \ |
| Entities with a mentions within the parsed text. |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.typeField.name=Type Field |
| org.apache.stanbol.enhancer.engines.keywordextraction.typeField.description=The field used to \ |
| retrieve the types of matched Entities. Values of that field are expected to be URIs |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.caseSensitive.name=Case Sensitivity |
| org.apache.stanbol.enhancer.engines.keywordextraction.caseSensitive.description=Allows to enable/disable \ |
| case sensitive matching |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.redirectField.name=Redirect Field |
| org.apache.stanbol.enhancer.engines.keywordextraction.redirectField.description=Entities may \ |
| define redirects to other Entities (e.g. "USA"(http://dbpedia.org/resource/USA) -> \ |
| "United States"(http://dbpedia.org/resource/United_States). Values of this field are \ |
| expected to link to other entities part of the controlled vocabulary |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.maxSuggestions.name=Suggestions |
| org.apache.stanbol.enhancer.engines.keywordextraction.maxSuggestions.description=The maximal \ |
| number of suggestions returned for a single mention. |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.minFoundTokens.name=Number of Required Tokens |
| org.apache.stanbol.enhancer.engines.keywordextraction.minFoundTokens.description=For lookups with \ |
| several words (e.g. Dr Patrick Marshall) this is the minimum number of Tokens the label of an \ |
| entity must match to be suggested. This is only used of the label does not exactly match a part \ |
| of the text. |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.name=Redirect Mode |
| org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.description=Defines how to \ |
| process redirects of Entities mentioned in the parsed content.. Three modes to deal with such \ |
| links are supported: Ignore redirects; Add values from redirected Entities to extracted; Follow \ |
| Redirects and suggest the redirected Entity instead of the extracted. |
| org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.follow=Follow Redirects |
| org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.addValues=Keep extracted \ |
| Entity, but add information of the redirected |
| org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.ignore=Ignore Redirects |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.processedLanguages.name=Languages |
| org.apache.stanbol.enhancer.engines.keywordextraction.processedLanguages.description=Languages to \ |
| process. An empty text indicates that all languages are processed. Use ',' as separator for \ |
| languages (e.g. 'en,de' to enhance only English and German texts). |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.name=Default Matching Language |
| org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.description=The language \ |
| used in addition to the language detected for the analysed text to search for Entities. Typically this \ |
| configuration is an empty string to search for labels without any language defined, but for some data \ |
| sets (such as DBpedia.org) that add languages to any labels it might improve resuls to change this \ |
| configuration (e.g. to 'en' in the case of DBpedia.org). |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.dereference.name=Dereference Entities |
| org.apache.stanbol.enhancer.engines.keywordextraction.dereference.description=If enabled additional \ |
| data for suggested Entities are included. |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.typeMappings.name=Type Mappings |
| org.apache.stanbol.enhancer.engines.keywordextraction.typeMappings.description=This allows to add \ |
| additional entity-type > text-annotation-type mappings. Such mappings are used to determine the \ |
| 'dc:type' value of the 'fise:TextAnnotation' created for extracted entities. Usage: \ |
| variant (a) '{uri}' short for {uri} > {uri} or (b) '{source1};{source2};..;{sourceN} > {target}'. \ |
| Note that a {source} may be only mapped to a single {target}. Multiple {source} types \ |
| can be mapped to the same {target}. |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.keywordTokenizer.name=Keyword Tokenizer |
| org.apache.stanbol.enhancer.engines.keywordextraction.keywordTokenizer.description=This allows \ |
| to use a special Tokenizer for matching keywords and alpha numeric IDs. Typical language \ |
| specific Tokenizers tned to split such IDs in several tokens and therefore might prevent \ |
| a correct matching. |
| |
| org.apache.stanbol.enhancer.engines.keywordextraction.minTokenMatchFactor.name=Minimum Token Match Factor |
| org.apache.stanbol.enhancer.engines.keywordextraction.minTokenMatchFactor.description=If a Token \ |
| of the text is compared with a Token in the Label of an Entity the similarity of those is \ |
| expressed in the range [0..1]. This factor specifies the minimum similarity of two Tokens \ |
| so that they are considered to match. Lower values will allow more Tokens to match (e.g \ |
| inflected forms of words) but may also result in false positives. Regardless of the \ |
| configured value the similarity will influence the confidence of suggestions. |