| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| stanbol.enhancer.engine.name.name=Name |
| stanbol.enhancer.engine.name.description=The name of the enhancement engine as \ |
| used in the RESTful interface '/engine/<name>' |
| |
| service.ranking.name=Ranking |
| service.ranking.description=If two enhancement engines with the same name are active the \ |
| one with the higher ranking will be used to process parsed content items. |
| |
| #=============================================================================== |
| #Properties specific to the FST linking engine |
| #=============================================================================== |
| org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.name=Apache \ |
| Stanbol Enhancer Engine: FST Linking |
| org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.description=Lucene \ |
| FST based Entity Linking Engine implementation. |
| |
| enhancer.engines.linking.lucenefst.solrcore.name=Solr Core |
| enhancer.engines.linking.lucenefst.solrcore.description=The reference to the SolrCore. \ |
| Supports the '{server-name}:{core-name}' syntax to reference a specific Managed- / \ |
| Referenced SolrServer. If {server-name} is not present the configured {core-name} is \ |
| assumed to be available on the default SolrServer. Remote SolrServer are NOT supported! |
| |
| enhancer.engines.linking.lucenefst.fieldEncoding.name=Field Name Encoding |
| enhancer.engines.linking.lucenefst.fieldEncoding.description=Specifies how FieldNames \ |
| of the SolrCore are encoded. This is mainly used to specify the pattern used to \ |
| name fields holding entity labels of different languages. The 'SolrYard' supports \ |
| the encoding used by the Stanbol Entityhub SolrYard implementation. If 'None' is \ |
| selected the exact field names used by the SolrCore need to be configured. |
| enhancer.engines.linking.lucenefst.fieldEncoding.option.none=None |
| enhancer.engines.linking.lucenefst.fieldEncoding.option.solrYard=SolrYard |
| enhancer.engines.linking.lucenefst.fieldEncoding.option.minusPrefix='-' Prefix: '{lang}-{name}' |
| enhancer.engines.linking.lucenefst.fieldEncoding.option.underscorePrefix='_' Prefix: '{lang}_{name}' |
| enhancer.engines.linking.lucenefst.fieldEncoding.option.minusSuffix='-' Suffix: '{name}-{lang}' |
| enhancer.engines.linking.lucenefst.fieldEncoding.option.underscoreSuffix='_' Suffix: '{name}_{lang}' |
| enhancer.engines.linking.lucenefst.fieldEncoding.option.atPrefix='@' Prefix: '{lang}@{name}' |
| enhancer.engines.linking.lucenefst.fieldEncoding.option.atSuffix='@' Suffix: '{name}@{lang}' |
| |
| enhancer.engines.linking.lucenefst.fstconfig.name=FST Corpora configuration |
| enhancer.engines.linking.lucenefst.fstconfig.description=Configuration for the FST \ |
| Corpora. Syntax: '{lang};{param-name}={param-value};{param-name}={param-value};...' \ |
| Supported {param-name}s: 'field' ... the field name of the SolrIndex used for the \ |
| FST corpus (default: rdfs:label). The configured field name is encoded using the \ |
| Field Name Encoding. \ |
| 'fst' ... the {base-name} of the file with the serialized FST model (default: {field} with \ |
| none alpha-numeric chars replaced by '_'). The actual file name is '{base-name}.{lang}.fst'. \ |
| Files are located in the 'fst' folder relative to the instance directory of the \ |
| configured SolrCore. \ |
| 'generate' ... Boolean switch that allows to enable runtime generation of FST \ |
| corpora (default: false) |
| enhancer.engines.linking.lucenefst.fstfolder.name=FST Folder |
| enhancer.engines.linking.lucenefst.fstfolder.description=The Folder used to store \ |
| FST files. This supports property substitution (${property-name}) with all \ |
| OSGI and System properties. In addition the following properties are supported: \ |
| ${solr-data-dir} ... the data directory of the configured SolrCore; \ |
| ${solr-index-dir} ... the index directory of the configured SolrCore; \ |
| ${solr-server-name} ... the name of the Referenced/Managed SolrServer of the SolrCore \ |
| ${solr-core-name} ... the name of the SolrCore |
| |
| enhancer.engines.linking.lucenefst.typeField.name=Entity Type Field |
| enhancer.engines.linking.lucenefst.typeField.description=The Solr Field holding the \ |
| type information of Entities. Values are expected to be URIs |
| |
| enhancer.engines.linking.lucenefst.rankingField.name=Entity Ranking Field |
| enhancer.engines.linking.lucenefst.rankingField.description=The Solr Field holding the \ |
| Entity Ranking (importance of the Entity within the knowledge base). Values \ |
| are expected to be floating point numbers. |
| |
| enhancer.engines.linking.lucenefst.fstThreadPoolSize.name=FST Thread Pool Size |
| enhancer.engines.linking.lucenefst.fstThreadPoolSize.description=The size of the \ |
| thread pool used for the runtime creation of FST models. NOTE that memory allocation \ |
| during creation is considerable higher as for holding the built model (up to to times) \ |
| so creation multiple models in parallel may require a lot of heap space. If memory \ |
| allocation is not an issue this value should be set based on the available CPU cores \ |
| and the resources one would like to assign to the creation of FST models. |
| |
| enhancer.engines.linking.lucenefst.entityCacheSize.name=Entity Cache Size |
| enhancer.engines.linking.lucenefst.entityCacheSize.description=Used to configure \ |
| the size of the Cache used to for Entity information. While the FST linking is \ |
| fully performed in memory this engine needs still to load tagging relevant fields \ |
| (labels, types, redirectes and entity ranking) for matched entities from the disc. \ |
| The EntityCache is a LRU cache for such information (default is 65k entities) |
| |
| #=============================================================================== |
| #Properties and Options used to configure |
| #=============================================================================== |
| |
| enhancer.engines.linking.suggestions.name=Max Suggestions |
| enhancer.engines.linking.suggestions.description=The maximum number of suggestions |
| |
| enhancer.engines.linking.minSearchTokenLength.name=Min Token Length |
| enhancer.engines.linking.minSearchTokenLength.description=The minimum \ |
| length of Tokens used to lookup Entities within the Controlled Vocabulary. This parameter is ignored \ |
| in case a POS (Part of Speech) tagger is available for the language of the parsed content. |
| |
| enhancer.engines.linking.caseSensitive.name=Case Sensitivity |
| enhancer.engines.linking.caseSensitive.description=Allows to enable/disable \ |
| case sensitive ranking. NOTE that the linking is based on the Solr FieldType of the \ |
| FST field. This only affects the ranking (fise:confidence value) of suggestions. |
| |
| enhancer.engines.linking.properNounsState.name=Link ProperNouns only |
| enhancer.engines.linking.properNounsState.description=If activated \ |
| only ProperNouns will be matched against the Vocabulary. If deactivated any Noun will be matched. \ |
| NOTE that this parameter requires a tag of the POS TagSet to be mapped against 'olia:PorperNoun'. \ |
| Otherwise mapping will not work as expected. |
| |
| enhancer.engines.linking.processedLanguages.name=Processed Languages |
| enhancer.engines.linking.processedLanguages.description=Languages to \ |
| process and optionally language specific configurations. Syntax "{lang};{param-name}={param-value};\ |
| {param-name}={param-value};...". Supported {param-name}s: "lc" - processed Lexical Categories (see \ |
| LexicalCategory enumeration for possible values); "pos" - processed Pos types (see Pos enumeration \ |
| for possible values); "tag" - processed string pos tags; "prob" - minumum probability of pos annotations. |
| |
| enhancer.engines.linking.defaultMatchingLanguage.name=Default Matching Language |
| enhancer.engines.linking.defaultMatchingLanguage.description=The language \ |
| used in addition to the language detected for the analysed text to search for Entities. Typically this \ |
| configuration is an empty string to search for labels without any language defined, but for some data \ |
| sets (such as DBpedia.org) that add languages to any labels it might improve resuls to change this \ |
| configuration (e.g. to 'en' in the case of DBpedia.org). |
| |
| enhancer.engines.linking.typeMappings.name=Type Mappings |
| enhancer.engines.linking.typeMappings.description=This allows to add \ |
| additional entity-type > text-annotation-type mappings. Such mappings are used to determine the \ |
| 'dc:type' value of the 'fise:TextAnnotation' created for extracted entities. Usage: \ |
| variant (a) '{uri}' short for {uri} > {uri} or (b) '{source1};{source2};..;{sourceN} > {target}'. \ |
| Note that a {source} may be only mapped to a single {target}. Multiple {source} types \ |
| can be mapped to the same {target}. |
| |
| enhancer.engines.linking.typeField.name=Type Field |
| enhancer.engines.linking.typeField.description=The field used to \ |
| retrieve the types of matched Entities. Values of that field are expected to be URIs |
| |
| enhancer.engines.linking.entityTypes.name=Entity Type Filter |
| enhancer.engines.linking.entityTypes.description=Allows to define a white/black list \ |
| based on the types of Entities. Use '!{uri}' for black listing and '{uri}' for white \ |
| listing. Include '*' to force white listing (e.g. to allow Entities without any type). \ |
| Rules are processed based on their oder. |
| |