enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties - stanbol - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 stanbol.enhancer.engine.name.name=Name
 stanbol.enhancer.engine.name.description=The name of the enhancement engine as \
 used in the RESTful interface '/engine/<name>'

 service.ranking.name=Ranking
 service.ranking.description=If two enhancement engines with the same name are active the \
 one with the higher ranking will be used to process parsed content items.

 #===============================================================================
 #Properties specific to the FST linking engine
 #===============================================================================
 org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.name=Apache \
 Stanbol Enhancer Engine: FST Linking
 org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.description=Lucene \
 FST based Entity Linking Engine implementation.

 enhancer.engines.linking.lucenefst.solrcore.name=Solr Core
 enhancer.engines.linking.lucenefst.solrcore.description=The reference to the SolrCore. \
 Supports the '{server-name}:{core-name}' syntax to reference a specific Managed- / \
 Referenced SolrServer. If {server-name} is not present the configured {core-name} is \
 assumed to be available on the default SolrServer. Remote SolrServer are NOT supported!

 enhancer.engines.linking.lucenefst.fieldEncoding.name=Field Name Encoding
 enhancer.engines.linking.lucenefst.fieldEncoding.description=Specifies how FieldNames \
 of the SolrCore are encoded. This is mainly used to specify the pattern used to \
 name fields holding entity labels of different languages. The 'SolrYard' supports \
 the encoding used by the Stanbol Entityhub SolrYard implementation. If 'None' is \
 selected the exact field names used by the SolrCore need to be configured.
 enhancer.engines.linking.lucenefst.fieldEncoding.option.none=None
 enhancer.engines.linking.lucenefst.fieldEncoding.option.solrYard=SolrYard
 enhancer.engines.linking.lucenefst.fieldEncoding.option.minusPrefix='-' Prefix: '{lang}-{name}'
 enhancer.engines.linking.lucenefst.fieldEncoding.option.underscorePrefix='_' Prefix: '{lang}_{name}'
 enhancer.engines.linking.lucenefst.fieldEncoding.option.minusSuffix='-' Suffix: '{name}-{lang}'
 enhancer.engines.linking.lucenefst.fieldEncoding.option.underscoreSuffix='_' Suffix: '{name}_{lang}'
 enhancer.engines.linking.lucenefst.fieldEncoding.option.atPrefix='@' Prefix: '{lang}@{name}'
 enhancer.engines.linking.lucenefst.fieldEncoding.option.atSuffix='@' Suffix: '{name}@{lang}'

 enhancer.engines.linking.lucenefst.fstconfig.name=FST Corpora configuration
 enhancer.engines.linking.lucenefst.fstconfig.description=Configuration for the FST \
 Corpora. Syntax: '{lang};{param-name}={param-value};{param-name}={param-value};...' \
 Supported {param-name}s: 'field' ... the field name of the SolrIndex used for the \
 FST corpus (default: rdfs:label). The configured field name is encoded using the \
 Field Name Encoding. \
 'fst' ... the {base-name} of the file with the serialized FST model (default: {field} with \
 none alpha-numeric chars replaced by '_'). The actual file name is '{base-name}.{lang}.fst'. \
 Files are located in the 'fst' folder relative to the instance directory of the \
 configured SolrCore. \
 'generate' ... Boolean switch that allows to enable runtime generation of FST \
 corpora (default: false)
 enhancer.engines.linking.lucenefst.fstfolder.name=FST Folder
 enhancer.engines.linking.lucenefst.fstfolder.description=The Folder used to store \
 FST files. This supports property substitution (${property-name}) with all \
 OSGI and System properties. In addition the following properties are supported: \
 ${solr-data-dir} ... the data directory of the configured SolrCore; \
 ${solr-index-dir} ... the index directory of the configured SolrCore; \
 ${solr-server-name} ... the name of the Referenced/Managed SolrServer of the SolrCore \
 ${solr-core-name} ... the name of the SolrCore

 enhancer.engines.linking.lucenefst.typeField.name=Entity Type Field
 enhancer.engines.linking.lucenefst.typeField.description=The Solr Field holding the \
 type information of Entities. Values are expected to be URIs

 enhancer.engines.linking.lucenefst.rankingField.name=Entity Ranking Field
 enhancer.engines.linking.lucenefst.rankingField.description=The Solr Field holding the \
 Entity Ranking (importance of the Entity within the knowledge base). Values \
 are expected to be floating point numbers.

 enhancer.engines.linking.lucenefst.fstThreadPoolSize.name=FST Thread Pool Size
 enhancer.engines.linking.lucenefst.fstThreadPoolSize.description=The size of the \
 thread pool used for the runtime creation of FST models. NOTE that memory allocation \
 during creation is considerable higher as for holding the built model (up to to times) \
 so creation multiple models in parallel may require a lot of heap space. If memory \
 allocation is not an issue this value should be set based on the available CPU cores \
 and the resources one would like to assign to the creation of FST models.

 enhancer.engines.linking.lucenefst.entityCacheSize.name=Entity Cache Size
 enhancer.engines.linking.lucenefst.entityCacheSize.description=Used to configure \
 the size of the Cache used to for Entity information. While the FST linking is \
 fully performed in memory this engine needs still to load tagging relevant fields \
 (labels, types, redirectes and entity ranking) for matched entities from the disc. \
 The EntityCache is a LRU cache for such information (default is 65k entities)

 #===============================================================================
 #Properties and Options used to configure
 #===============================================================================

 enhancer.engines.linking.suggestions.name=Max Suggestions
 enhancer.engines.linking.suggestions.description=The maximum number of suggestions

 enhancer.engines.linking.minSearchTokenLength.name=Min Token Length
 enhancer.engines.linking.minSearchTokenLength.description=The minimum \
 length of Tokens used to lookup Entities within the Controlled Vocabulary. This parameter is ignored \
 in case a POS (Part of Speech) tagger is available for the language of the parsed content.

 enhancer.engines.linking.caseSensitive.name=Case Sensitivity
 enhancer.engines.linking.caseSensitive.description=Allows to enable/disable \
 case sensitive ranking. NOTE that the linking is based on the Solr FieldType of the \
 FST field. This only affects the ranking (fise:confidence value) of suggestions.

 enhancer.engines.linking.properNounsState.name=Link ProperNouns only
 enhancer.engines.linking.properNounsState.description=If activated \
 only ProperNouns will be matched against the Vocabulary. If deactivated any Noun will be matched. \
 NOTE that this parameter requires a tag of the POS TagSet to be mapped against 'olia:PorperNoun'. \
 Otherwise mapping will not work as expected.

 enhancer.engines.linking.processedLanguages.name=Processed Languages
 enhancer.engines.linking.processedLanguages.description=Languages to \
 process and optionally language specific configurations. Syntax "{lang};{param-name}={param-value};\
 {param-name}={param-value};...". Supported {param-name}s: "lc" - processed Lexical Categories (see \
 LexicalCategory enumeration for possible values); "pos" - processed Pos types (see Pos enumeration \
 for possible values); "tag" - processed string pos tags; "prob" - minumum probability of pos annotations.

 enhancer.engines.linking.defaultMatchingLanguage.name=Default Matching Language
 enhancer.engines.linking.defaultMatchingLanguage.description=The language \
 used in addition to the language detected for the analysed text to search for Entities. Typically this \
 configuration is an empty string to search for labels without any language defined, but for some data \
 sets (such as DBpedia.org) that add languages to any labels it might improve resuls to change this \
 configuration (e.g. to 'en' in the case of DBpedia.org).

 enhancer.engines.linking.typeMappings.name=Type Mappings
 enhancer.engines.linking.typeMappings.description=This allows to add \
 additional entity-type > text-annotation-type mappings. Such mappings are used to determine the \
 'dc:type' value of the 'fise:TextAnnotation' created for extracted entities. Usage: \
 variant (a) '{uri}' short for {uri} > {uri} or (b) '{source1};{source2};..;{sourceN} > {target}'. \
 Note that a {source} may be only mapped to a single {target}. Multiple {source} types \
 can be mapped to the same {target}.

 enhancer.engines.linking.typeField.name=Type Field
 enhancer.engines.linking.typeField.description=The field used to \
 retrieve the types of matched Entities. Values of that field are expected to be URIs

 enhancer.engines.linking.entityTypes.name=Entity Type Filter
 enhancer.engines.linking.entityTypes.description=Allows to define a white/black list \
 based on the types of Entities. Use '!{uri}' for black listing and '{uri}' for white \
 listing. Include '*' to force white listing (e.g. to allow Entities without any type). \
 Rules are processed based on their oder.
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	stanbol.enhancer.engine.name.name=Name
	stanbol.enhancer.engine.name.description=The name of the enhancement engine as \
	used in the RESTful interface '/engine/<name>'

	service.ranking.name=Ranking
	service.ranking.description=If two enhancement engines with the same name are active the \
	one with the higher ranking will be used to process parsed content items.

	#===============================================================================
	#Properties specific to the FST linking engine
	#===============================================================================
	org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.name=Apache \
	Stanbol Enhancer Engine: FST Linking
	org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.description=Lucene \
	FST based Entity Linking Engine implementation.

	enhancer.engines.linking.lucenefst.solrcore.name=Solr Core
	enhancer.engines.linking.lucenefst.solrcore.description=The reference to the SolrCore. \
	Supports the '{server-name}:{core-name}' syntax to reference a specific Managed- / \
	Referenced SolrServer. If {server-name} is not present the configured {core-name} is \
	assumed to be available on the default SolrServer. Remote SolrServer are NOT supported!

	enhancer.engines.linking.lucenefst.fieldEncoding.name=Field Name Encoding
	enhancer.engines.linking.lucenefst.fieldEncoding.description=Specifies how FieldNames \
	of the SolrCore are encoded. This is mainly used to specify the pattern used to \
	name fields holding entity labels of different languages. The 'SolrYard' supports \
	the encoding used by the Stanbol Entityhub SolrYard implementation. If 'None' is \
	selected the exact field names used by the SolrCore need to be configured.
	enhancer.engines.linking.lucenefst.fieldEncoding.option.none=None
	enhancer.engines.linking.lucenefst.fieldEncoding.option.solrYard=SolrYard
	enhancer.engines.linking.lucenefst.fieldEncoding.option.minusPrefix='-' Prefix: '{lang}-{name}'
	enhancer.engines.linking.lucenefst.fieldEncoding.option.underscorePrefix='_' Prefix: '{lang}_{name}'
	enhancer.engines.linking.lucenefst.fieldEncoding.option.minusSuffix='-' Suffix: '{name}-{lang}'
	enhancer.engines.linking.lucenefst.fieldEncoding.option.underscoreSuffix='_' Suffix: '{name}_{lang}'
	enhancer.engines.linking.lucenefst.fieldEncoding.option.atPrefix='@' Prefix: '{lang}@{name}'
	enhancer.engines.linking.lucenefst.fieldEncoding.option.atSuffix='@' Suffix: '{name}@{lang}'

	enhancer.engines.linking.lucenefst.fstconfig.name=FST Corpora configuration
	enhancer.engines.linking.lucenefst.fstconfig.description=Configuration for the FST \
	Corpora. Syntax: '{lang};{param-name}={param-value};{param-name}={param-value};...' \
	Supported {param-name}s: 'field' ... the field name of the SolrIndex used for the \
	FST corpus (default: rdfs:label). The configured field name is encoded using the \
	Field Name Encoding. \
	'fst' ... the {base-name} of the file with the serialized FST model (default: {field} with \
	none alpha-numeric chars replaced by '_'). The actual file name is '{base-name}.{lang}.fst'. \
	Files are located in the 'fst' folder relative to the instance directory of the \
	configured SolrCore. \
	'generate' ... Boolean switch that allows to enable runtime generation of FST \
	corpora (default: false)
	enhancer.engines.linking.lucenefst.fstfolder.name=FST Folder
	enhancer.engines.linking.lucenefst.fstfolder.description=The Folder used to store \
	FST files. This supports property substitution (${property-name}) with all \
	OSGI and System properties. In addition the following properties are supported: \
	${solr-data-dir} ... the data directory of the configured SolrCore; \
	${solr-index-dir} ... the index directory of the configured SolrCore; \
	${solr-server-name} ... the name of the Referenced/Managed SolrServer of the SolrCore \
	${solr-core-name} ... the name of the SolrCore

	enhancer.engines.linking.lucenefst.typeField.name=Entity Type Field
	enhancer.engines.linking.lucenefst.typeField.description=The Solr Field holding the \
	type information of Entities. Values are expected to be URIs

	enhancer.engines.linking.lucenefst.rankingField.name=Entity Ranking Field
	enhancer.engines.linking.lucenefst.rankingField.description=The Solr Field holding the \
	Entity Ranking (importance of the Entity within the knowledge base). Values \
	are expected to be floating point numbers.

	enhancer.engines.linking.lucenefst.fstThreadPoolSize.name=FST Thread Pool Size
	enhancer.engines.linking.lucenefst.fstThreadPoolSize.description=The size of the \
	thread pool used for the runtime creation of FST models. NOTE that memory allocation \
	during creation is considerable higher as for holding the built model (up to to times) \
	so creation multiple models in parallel may require a lot of heap space. If memory \
	allocation is not an issue this value should be set based on the available CPU cores \
	and the resources one would like to assign to the creation of FST models.

	enhancer.engines.linking.lucenefst.entityCacheSize.name=Entity Cache Size
	enhancer.engines.linking.lucenefst.entityCacheSize.description=Used to configure \
	the size of the Cache used to for Entity information. While the FST linking is \
	fully performed in memory this engine needs still to load tagging relevant fields \
	(labels, types, redirectes and entity ranking) for matched entities from the disc. \
	The EntityCache is a LRU cache for such information (default is 65k entities)

	#===============================================================================
	#Properties and Options used to configure
	#===============================================================================

	enhancer.engines.linking.suggestions.name=Max Suggestions
	enhancer.engines.linking.suggestions.description=The maximum number of suggestions

	enhancer.engines.linking.minSearchTokenLength.name=Min Token Length
	enhancer.engines.linking.minSearchTokenLength.description=The minimum \
	length of Tokens used to lookup Entities within the Controlled Vocabulary. This parameter is ignored \
	in case a POS (Part of Speech) tagger is available for the language of the parsed content.

	enhancer.engines.linking.caseSensitive.name=Case Sensitivity
	enhancer.engines.linking.caseSensitive.description=Allows to enable/disable \
	case sensitive ranking. NOTE that the linking is based on the Solr FieldType of the \
	FST field. This only affects the ranking (fise:confidence value) of suggestions.

	enhancer.engines.linking.properNounsState.name=Link ProperNouns only
	enhancer.engines.linking.properNounsState.description=If activated \
	only ProperNouns will be matched against the Vocabulary. If deactivated any Noun will be matched. \
	NOTE that this parameter requires a tag of the POS TagSet to be mapped against 'olia:PorperNoun'. \
	Otherwise mapping will not work as expected.

	enhancer.engines.linking.processedLanguages.name=Processed Languages
	enhancer.engines.linking.processedLanguages.description=Languages to \
	process and optionally language specific configurations. Syntax "{lang};{param-name}={param-value};\
	{param-name}={param-value};...". Supported {param-name}s: "lc" - processed Lexical Categories (see \
	LexicalCategory enumeration for possible values); "pos" - processed Pos types (see Pos enumeration \
	for possible values); "tag" - processed string pos tags; "prob" - minumum probability of pos annotations.

	enhancer.engines.linking.defaultMatchingLanguage.name=Default Matching Language
	enhancer.engines.linking.defaultMatchingLanguage.description=The language \
	used in addition to the language detected for the analysed text to search for Entities. Typically this \
	configuration is an empty string to search for labels without any language defined, but for some data \
	sets (such as DBpedia.org) that add languages to any labels it might improve resuls to change this \
	configuration (e.g. to 'en' in the case of DBpedia.org).

	enhancer.engines.linking.typeMappings.name=Type Mappings
	enhancer.engines.linking.typeMappings.description=This allows to add \
	additional entity-type > text-annotation-type mappings. Such mappings are used to determine the \
	'dc:type' value of the 'fise:TextAnnotation' created for extracted entities. Usage: \
	variant (a) '{uri}' short for {uri} > {uri} or (b) '{source1};{source2};..;{sourceN} > {target}'. \
	Note that a {source} may be only mapped to a single {target}. Multiple {source} types \
	can be mapped to the same {target}.

	enhancer.engines.linking.typeField.name=Type Field
	enhancer.engines.linking.typeField.description=The field used to \
	retrieve the types of matched Entities. Values of that field are expected to be URIs

	enhancer.engines.linking.entityTypes.name=Entity Type Filter
	enhancer.engines.linking.entityTypes.description=Allows to define a white/black list \
	based on the types of Entities. Use '!{uri}' for black listing and '{uri}' for white \
	listing. Include '*' to force white listing (e.g. to allow Entities without any type). \
	Rules are processed based on their oder.