blob: f1674019615983e395fe24bfe972fdf253237ce8 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
stanbol.enhancer.engine.name.name=Name
stanbol.enhancer.engine.name.description=The name of the enhancement engine as \
used in the RESTful interface '/engine/<name>'
service.ranking.name=Ranking
service.ranking.description=If two enhancement engines with the same name are active the \
one with the higher ranking will be used to process parsed content items.
#===============================================================================
#Properties and Options used to configure
#===============================================================================
org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine.name=Apache \
Stanbol Enhancer Engine: Keyword Linking
org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine.description=An engine \
that extracts keywords present within a Controlled Vocabulary mentioned within parsed ContentItem
org.apache.stanbol.enhancer.engines.keywordextraction.referencedSiteId.name=Referenced Site
org.apache.stanbol.enhancer.engines.keywordextraction.referencedSiteId.description=The ID of the \
Entityhub Referenced Site holding the Controlled Vocabulary (e.g. a taxonomy or just a set of \
named entities)
org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer.name=Use Simple Tokenizer
org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer.description=This allows to \
deactivate the use of Language specific Tokenizers. For most European languages the Simple Tokenizer \
is sufficient.
org.apache.stanbol.enhancer.engines.keywordextraction.minSearchTokenLength.name=Min Token Length
org.apache.stanbol.enhancer.engines.keywordextraction.minSearchTokenLength.description=The minimum \
length of Tokens used to lookup Entities within the Controlled Vocabulary. This parameter is ignored \
in case a POS (Part of Speech) tagger is available for the language of the parsed content.
#org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker.name=Use Chunker
#org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker.description=This allows to enable/disable the use of a Chunker. Even if enabled it will only be used if one is present for the language of the content.
org.apache.stanbol.enhancer.engines.keywordextraction.nameField.name=Label Field
org.apache.stanbol.enhancer.engines.keywordextraction.nameField.description=The field used to match \
Entities with a mentions within the parsed text.
org.apache.stanbol.enhancer.engines.keywordextraction.typeField.name=Type Field
org.apache.stanbol.enhancer.engines.keywordextraction.typeField.description=The field used to \
retrieve the types of matched Entities. Values of that field are expected to be URIs
org.apache.stanbol.enhancer.engines.keywordextraction.caseSensitive.name=Case Sensitivity
org.apache.stanbol.enhancer.engines.keywordextraction.caseSensitive.description=Allows to enable/disable \
case sensitive matching
org.apache.stanbol.enhancer.engines.keywordextraction.redirectField.name=Redirect Field
org.apache.stanbol.enhancer.engines.keywordextraction.redirectField.description=Entities may \
define redirects to other Entities (e.g. "USA"(http://dbpedia.org/resource/USA) -> \
"United States"(http://dbpedia.org/resource/United_States). Values of this field are \
expected to link to other entities part of the controlled vocabulary
org.apache.stanbol.enhancer.engines.keywordextraction.maxSuggestions.name=Suggestions
org.apache.stanbol.enhancer.engines.keywordextraction.maxSuggestions.description=The maximal \
number of suggestions returned for a single mention.
org.apache.stanbol.enhancer.engines.keywordextraction.minFoundTokens.name=Number of Required Tokens
org.apache.stanbol.enhancer.engines.keywordextraction.minFoundTokens.description=For lookups with \
several words (e.g. Dr Patrick Marshall) this is the minimum number of Tokens the label of an \
entity must match to be suggested. This is only used of the label does not exactly match a part \
of the text.
org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.name=Redirect Mode
org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.description=Defines how to \
process redirects of Entities mentioned in the parsed content.. Three modes to deal with such \
links are supported: Ignore redirects; Add values from redirected Entities to extracted; Follow \
Redirects and suggest the redirected Entity instead of the extracted.
org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.follow=Follow Redirects
org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.addValues=Keep extracted \
Entity, but add information of the redirected
org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.ignore=Ignore Redirects
org.apache.stanbol.enhancer.engines.keywordextraction.processedLanguages.name=Languages
org.apache.stanbol.enhancer.engines.keywordextraction.processedLanguages.description=Languages to \
process. An empty text indicates that all languages are processed. Use ',' as separator for \
languages (e.g. 'en,de' to enhance only English and German texts).
org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.name=Default Matching Language
org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.description=The language \
used in addition to the language detected for the analysed text to search for Entities. Typically this \
configuration is an empty string to search for labels without any language defined, but for some data \
sets (such as DBpedia.org) that add languages to any labels it might improve resuls to change this \
configuration (e.g. to 'en' in the case of DBpedia.org).
org.apache.stanbol.enhancer.engines.keywordextraction.dereference.name=Dereference Entities
org.apache.stanbol.enhancer.engines.keywordextraction.dereference.description=If enabled additional \
data for suggested Entities are included.
org.apache.stanbol.enhancer.engines.keywordextraction.typeMappings.name=Type Mappings
org.apache.stanbol.enhancer.engines.keywordextraction.typeMappings.description=This allows to add \
additional entity-type > text-annotation-type mappings. Such mappings are used to determine the \
'dc:type' value of the 'fise:TextAnnotation' created for extracted entities. Usage: \
variant (a) '{uri}' short for {uri} > {uri} or (b) '{source1};{source2};..;{sourceN} > {target}'. \
Note that a {source} may be only mapped to a single {target}. Multiple {source} types \
can be mapped to the same {target}.
org.apache.stanbol.enhancer.engines.keywordextraction.keywordTokenizer.name=Keyword Tokenizer
org.apache.stanbol.enhancer.engines.keywordextraction.keywordTokenizer.description=This allows \
to use a special Tokenizer for matching keywords and alpha numeric IDs. Typical language \
specific Tokenizers tned to split such IDs in several tokens and therefore might prevent \
a correct matching.
org.apache.stanbol.enhancer.engines.keywordextraction.minTokenMatchFactor.name=Minimum Token Match Factor
org.apache.stanbol.enhancer.engines.keywordextraction.minTokenMatchFactor.description=If a Token \
of the text is compared with a Token in the Label of an Entity the similarity of those is \
expressed in the range [0..1]. This factor specifies the minimum similarity of two Tokens \
so that they are considered to match. Lower values will allow more Tokens to match (e.g \
inflected forms of words) but may also result in false positives. Regardless of the \
configured value the similarity will influence the confidence of suggestions.