blob: 6101743d42e1f9504b9d5a21c6a4e423c9adda61 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
stanbol.enhancer.engine.name.name=Name
stanbol.enhancer.engine.name.description=The name of the enhancement engine as \
used in the RESTful interface '/engine/<name>'
service.ranking.name=Ranking
service.ranking.description=If two enhancement engines with the same name are active the \
one with the higher ranking will be used to process parsed content items.
#===============================================================================
#Properties specific to the FST linking engine
#===============================================================================
org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.name=Apache \
Stanbol Enhancer Engine: FST Linking
org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.description=Lucene \
FST based Entity Linking Engine implementation.
enhancer.engines.linking.lucenefst.solrcore.name=Solr Core
enhancer.engines.linking.lucenefst.solrcore.description=The reference to the SolrCore. \
Supports the '{server-name}:{core-name}' syntax to reference a specific Managed- / \
Referenced SolrServer. If {server-name} is not present the configured {core-name} is \
assumed to be available on the default SolrServer. Remote SolrServer are NOT supported!
enhancer.engines.linking.lucenefst.fieldEncoding.name=Field Name Encoding
enhancer.engines.linking.lucenefst.fieldEncoding.description=Specifies how FieldNames \
of the SolrCore are encoded. This is mainly used to specify the pattern used to \
name fields holding entity labels of different languages. The 'SolrYard' supports \
the encoding used by the Stanbol Entityhub SolrYard implementation. If 'None' is \
selected the exact field names used by the SolrCore need to be configured.
enhancer.engines.linking.lucenefst.fieldEncoding.option.none=None
enhancer.engines.linking.lucenefst.fieldEncoding.option.solrYard=SolrYard
enhancer.engines.linking.lucenefst.fieldEncoding.option.minusPrefix='-' Prefix: '{lang}-{name}'
enhancer.engines.linking.lucenefst.fieldEncoding.option.underscorePrefix='_' Prefix: '{lang}_{name}'
enhancer.engines.linking.lucenefst.fieldEncoding.option.minusSuffix='-' Suffix: '{name}-{lang}'
enhancer.engines.linking.lucenefst.fieldEncoding.option.underscoreSuffix='_' Suffix: '{name}_{lang}'
enhancer.engines.linking.lucenefst.fieldEncoding.option.atPrefix='@' Prefix: '{lang}@{name}'
enhancer.engines.linking.lucenefst.fieldEncoding.option.atSuffix='@' Suffix: '{name}@{lang}'
enhancer.engines.linking.lucenefst.fstconfig.name=FST Corpora configuration
enhancer.engines.linking.lucenefst.fstconfig.description=Configuration for the FST \
Corpora. Syntax: '{lang};{param-name}={param-value};{param-name}={param-value};...' \
Supported {param-name}s: 'field' ... the field name of the SolrIndex used for the \
FST corpus (default: rdfs:label). The configured field name is encoded using the \
Field Name Encoding. \
'fst' ... the {base-name} of the file with the serialized FST model (default: {field} with \
none alpha-numeric chars replaced by '_'). The actual file name is '{base-name}.{lang}.fst'. \
Files are located in the 'fst' folder relative to the instance directory of the \
configured SolrCore. \
'generate' ... Boolean switch that allows to enable runtime generation of FST \
corpora (default: false)
enhancer.engines.linking.lucenefst.fstfolder.name=FST Folder
enhancer.engines.linking.lucenefst.fstfolder.description=The Folder used to store \
FST files. This supports property substitution (${property-name}) with all \
OSGI and System properties. In addition the following properties are supported: \
${solr-data-dir} ... the data directory of the configured SolrCore; \
${solr-index-dir} ... the index directory of the configured SolrCore; \
${solr-server-name} ... the name of the Referenced/Managed SolrServer of the SolrCore \
${solr-core-name} ... the name of the SolrCore
enhancer.engines.linking.lucenefst.typeField.name=Entity Type Field
enhancer.engines.linking.lucenefst.typeField.description=The Solr Field holding the \
type information of Entities. Values are expected to be URIs
enhancer.engines.linking.lucenefst.rankingField.name=Entity Ranking Field
enhancer.engines.linking.lucenefst.rankingField.description=The Solr Field holding the \
Entity Ranking (importance of the Entity within the knowledge base). Values \
are expected to be floating point numbers.
enhancer.engines.linking.lucenefst.fstThreadPoolSize.name=FST Thread Pool Size
enhancer.engines.linking.lucenefst.fstThreadPoolSize.description=The size of the \
thread pool used for the runtime creation of FST models. NOTE that memory allocation \
during creation is considerable higher as for holding the built model (up to to times) \
so creation multiple models in parallel may require a lot of heap space. If memory \
allocation is not an issue this value should be set based on the available CPU cores \
and the resources one would like to assign to the creation of FST models.
enhancer.engines.linking.lucenefst.entityCacheSize.name=Entity Cache Size
enhancer.engines.linking.lucenefst.entityCacheSize.description=Used to configure \
the size of the Cache used to for Entity information. While the FST linking is \
fully performed in memory this engine needs still to load tagging relevant fields \
(labels, types, redirectes and entity ranking) for matched entities from the disc. \
The EntityCache is a LRU cache for such information (default is 65k entities)
#===============================================================================
#Properties and Options used to configure
#===============================================================================
enhancer.engines.linking.suggestions.name=Max Suggestions
enhancer.engines.linking.suggestions.description=The maximum number of suggestions
enhancer.engines.linking.minSearchTokenLength.name=Min Token Length
enhancer.engines.linking.minSearchTokenLength.description=The minimum \
length of Tokens used to lookup Entities within the Controlled Vocabulary. This parameter is ignored \
in case a POS (Part of Speech) tagger is available for the language of the parsed content.
enhancer.engines.linking.caseSensitive.name=Case Sensitivity
enhancer.engines.linking.caseSensitive.description=Allows to enable/disable \
case sensitive ranking. NOTE that the linking is based on the Solr FieldType of the \
FST field. This only affects the ranking (fise:confidence value) of suggestions.
enhancer.engines.linking.properNounsState.name=Link ProperNouns only
enhancer.engines.linking.properNounsState.description=If activated \
only ProperNouns will be matched against the Vocabulary. If deactivated any Noun will be matched. \
NOTE that this parameter requires a tag of the POS TagSet to be mapped against 'olia:PorperNoun'. \
Otherwise mapping will not work as expected.
enhancer.engines.linking.processedLanguages.name=Processed Languages
enhancer.engines.linking.processedLanguages.description=Languages to \
process and optionally language specific configurations. Syntax "{lang};{param-name}={param-value};\
{param-name}={param-value};...". Supported {param-name}s: "lc" - processed Lexical Categories (see \
LexicalCategory enumeration for possible values); "pos" - processed Pos types (see Pos enumeration \
for possible values); "tag" - processed string pos tags; "prob" - minumum probability of pos annotations.
enhancer.engines.linking.defaultMatchingLanguage.name=Default Matching Language
enhancer.engines.linking.defaultMatchingLanguage.description=The language \
used in addition to the language detected for the analysed text to search for Entities. Typically this \
configuration is an empty string to search for labels without any language defined, but for some data \
sets (such as DBpedia.org) that add languages to any labels it might improve resuls to change this \
configuration (e.g. to 'en' in the case of DBpedia.org).
enhancer.engines.linking.typeMappings.name=Type Mappings
enhancer.engines.linking.typeMappings.description=This allows to add \
additional entity-type > text-annotation-type mappings. Such mappings are used to determine the \
'dc:type' value of the 'fise:TextAnnotation' created for extracted entities. Usage: \
variant (a) '{uri}' short for {uri} > {uri} or (b) '{source1};{source2};..;{sourceN} > {target}'. \
Note that a {source} may be only mapped to a single {target}. Multiple {source} types \
can be mapped to the same {target}.
enhancer.engines.linking.typeField.name=Type Field
enhancer.engines.linking.typeField.description=The field used to \
retrieve the types of matched Entities. Values of that field are expected to be URIs
enhancer.engines.linking.entityTypes.name=Entity Type Filter
enhancer.engines.linking.entityTypes.description=Allows to define a white/black list \
based on the types of Entities. Use '!{uri}' for black listing and '{uri}' for white \
listing. Include '*' to force white listing (e.g. to allow Entities without any type). \
Rules are processed based on their oder.