enhancement-engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties - stanbol - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 stanbol.enhancer.engine.name.name=Name
 stanbol.enhancer.engine.name.description=The name of the enhancement engine as \
 used in the RESTful interface '/engine/<name>'

 service.ranking.name=Ranking
 service.ranking.description=If two enhancement engines with the same name are active the \
 one with the higher ranking will be used to process parsed content items.


 #===============================================================================
 #Properties and Options used to configure
 #===============================================================================
 org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine.name=Apache \
 Stanbol Enhancer Engine: Keyword Linking
 org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine.description=An engine \
 that extracts keywords present within a Controlled Vocabulary mentioned within parsed ContentItem

 org.apache.stanbol.enhancer.engines.keywordextraction.referencedSiteId.name=Referenced Site
 org.apache.stanbol.enhancer.engines.keywordextraction.referencedSiteId.description=The ID of the \
 Entityhub Referenced Site holding the Controlled Vocabulary (e.g. a taxonomy or just a set of \
 named entities)

 org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer.name=Use Simple Tokenizer
 org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer.description=This allows to \
 deactivate the use of Language specific Tokenizers. For most European languages the Simple Tokenizer \
 is sufficient.

 org.apache.stanbol.enhancer.engines.keywordextraction.minSearchTokenLength.name=Min Token Length
 org.apache.stanbol.enhancer.engines.keywordextraction.minSearchTokenLength.description=The minimum \
 length of Tokens used to lookup Entities within the Controlled Vocabulary. This parameter is ignored \
 in case a POS (Part of Speech) tagger is available for the language of the parsed content.

 #org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker.name=Use Chunker
 #org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker.description=This allows to enable/disable the use of a Chunker. Even if enabled it will only be used if one is present for the language of the content.

 org.apache.stanbol.enhancer.engines.keywordextraction.nameField.name=Label Field
 org.apache.stanbol.enhancer.engines.keywordextraction.nameField.description=The field used to match \
 Entities with a mentions within the parsed text.

 org.apache.stanbol.enhancer.engines.keywordextraction.typeField.name=Type Field
 org.apache.stanbol.enhancer.engines.keywordextraction.typeField.description=The field used to \
 retrieve the types of matched Entities. Values of that field are expected to be URIs

 org.apache.stanbol.enhancer.engines.keywordextraction.caseSensitive.name=Case Sensitivity
 org.apache.stanbol.enhancer.engines.keywordextraction.caseSensitive.description=Allows to enable/disable \
 case sensitive matching

 org.apache.stanbol.enhancer.engines.keywordextraction.redirectField.name=Redirect Field
 org.apache.stanbol.enhancer.engines.keywordextraction.redirectField.description=Entities may \
 define redirects to other Entities (e.g. "USA"(http://dbpedia.org/resource/USA) -> \
 "United States"(http://dbpedia.org/resource/United_States). Values of this field are \
 expected to link to other entities part of the controlled vocabulary

 org.apache.stanbol.enhancer.engines.keywordextraction.maxSuggestions.name=Suggestions
 org.apache.stanbol.enhancer.engines.keywordextraction.maxSuggestions.description=The maximal \
 number of suggestions returned for a single mention.

 org.apache.stanbol.enhancer.engines.keywordextraction.minFoundTokens.name=Number of Required Tokens
 org.apache.stanbol.enhancer.engines.keywordextraction.minFoundTokens.description=For lookups with \
 several words (e.g. Dr Patrick Marshall) this is the minimum number of Tokens the label of an \
 entity must match to be suggested. This is only used of the label does not exactly match a part \
 of the text.

 org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.name=Redirect Mode
 org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.description=Defines how to \
 process redirects of Entities mentioned in the parsed content.. Three modes to deal with such \
 links are supported: Ignore redirects; Add values from redirected Entities to extracted; Follow \
 Redirects and suggest the redirected Entity instead of the extracted.
 org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.follow=Follow Redirects
 org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.addValues=Keep extracted \
 Entity, but add information of the redirected
 org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.ignore=Ignore Redirects

 org.apache.stanbol.enhancer.engines.keywordextraction.processedLanguages.name=Languages
 org.apache.stanbol.enhancer.engines.keywordextraction.processedLanguages.description=Languages to \
 process. An empty text indicates that all languages are processed. Use ',' as separator for \
 languages (e.g. 'en,de' to enhance only English and German texts).

 org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.name=Default Matching Language
 org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.description=The language \
 used in addition to the language detected for the analysed text to search for Entities. Typically this \
 configuration is an empty string to search for labels without any language defined, but for some data \
 sets (such as DBpedia.org) that add languages to any labels it might improve resuls to change this \
 configuration (e.g. to 'en' in the case of DBpedia.org).

 org.apache.stanbol.enhancer.engines.keywordextraction.dereference.name=Dereference Entities
 org.apache.stanbol.enhancer.engines.keywordextraction.dereference.description=If enabled additional \
 data for suggested Entities are included.

 org.apache.stanbol.enhancer.engines.keywordextraction.typeMappings.name=Type Mappings
 org.apache.stanbol.enhancer.engines.keywordextraction.typeMappings.description=This allows to add \
 additional entity-type > text-annotation-type mappings. Such mappings are used to determine the \
 'dc:type' value of the 'fise:TextAnnotation' created for extracted entities. Usage: \
 variant (a) '{uri}' short for {uri} > {uri} or (b) '{source1};{source2};..;{sourceN} > {target}'. \
 Note that a {source} may be only mapped to a single {target}. Multiple {source} types \
 can be mapped to the same {target}.

 org.apache.stanbol.enhancer.engines.keywordextraction.keywordTokenizer.name=Keyword Tokenizer
 org.apache.stanbol.enhancer.engines.keywordextraction.keywordTokenizer.description=This allows \
 to use a special Tokenizer for matching keywords and alpha numeric IDs. Typical language \
 specific Tokenizers tned to split such IDs in several tokens and therefore might prevent \
 a correct matching.

 org.apache.stanbol.enhancer.engines.keywordextraction.minTokenMatchFactor.name=Minimum Token Match Factor
 org.apache.stanbol.enhancer.engines.keywordextraction.minTokenMatchFactor.description=If a Token \
 of the text is compared with a Token in the Label of an Entity the similarity of those is \
 expressed in the range [0..1]. This factor specifies the minimum similarity of two Tokens \
 so that they are considered to match. Lower values will allow more Tokens to match (e.g \
 inflected forms of words) but may also result in false positives. Regardless of the \
 configured value the similarity will influence the confidence of suggestions.
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	stanbol.enhancer.engine.name.name=Name
	stanbol.enhancer.engine.name.description=The name of the enhancement engine as \
	used in the RESTful interface '/engine/<name>'

	service.ranking.name=Ranking
	service.ranking.description=If two enhancement engines with the same name are active the \
	one with the higher ranking will be used to process parsed content items.


	#===============================================================================
	#Properties and Options used to configure
	#===============================================================================
	org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine.name=Apache \
	Stanbol Enhancer Engine: Keyword Linking
	org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine.description=An engine \
	that extracts keywords present within a Controlled Vocabulary mentioned within parsed ContentItem

	org.apache.stanbol.enhancer.engines.keywordextraction.referencedSiteId.name=Referenced Site
	org.apache.stanbol.enhancer.engines.keywordextraction.referencedSiteId.description=The ID of the \
	Entityhub Referenced Site holding the Controlled Vocabulary (e.g. a taxonomy or just a set of \
	named entities)

	org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer.name=Use Simple Tokenizer
	org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer.description=This allows to \
	deactivate the use of Language specific Tokenizers. For most European languages the Simple Tokenizer \
	is sufficient.

	org.apache.stanbol.enhancer.engines.keywordextraction.minSearchTokenLength.name=Min Token Length
	org.apache.stanbol.enhancer.engines.keywordextraction.minSearchTokenLength.description=The minimum \
	length of Tokens used to lookup Entities within the Controlled Vocabulary. This parameter is ignored \
	in case a POS (Part of Speech) tagger is available for the language of the parsed content.

	#org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker.name=Use Chunker
	#org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker.description=This allows to enable/disable the use of a Chunker. Even if enabled it will only be used if one is present for the language of the content.

	org.apache.stanbol.enhancer.engines.keywordextraction.nameField.name=Label Field
	org.apache.stanbol.enhancer.engines.keywordextraction.nameField.description=The field used to match \
	Entities with a mentions within the parsed text.

	org.apache.stanbol.enhancer.engines.keywordextraction.typeField.name=Type Field
	org.apache.stanbol.enhancer.engines.keywordextraction.typeField.description=The field used to \
	retrieve the types of matched Entities. Values of that field are expected to be URIs

	org.apache.stanbol.enhancer.engines.keywordextraction.caseSensitive.name=Case Sensitivity
	org.apache.stanbol.enhancer.engines.keywordextraction.caseSensitive.description=Allows to enable/disable \
	case sensitive matching

	org.apache.stanbol.enhancer.engines.keywordextraction.redirectField.name=Redirect Field
	org.apache.stanbol.enhancer.engines.keywordextraction.redirectField.description=Entities may \
	define redirects to other Entities (e.g. "USA"(http://dbpedia.org/resource/USA) -> \
	"United States"(http://dbpedia.org/resource/United_States). Values of this field are \
	expected to link to other entities part of the controlled vocabulary

	org.apache.stanbol.enhancer.engines.keywordextraction.maxSuggestions.name=Suggestions
	org.apache.stanbol.enhancer.engines.keywordextraction.maxSuggestions.description=The maximal \
	number of suggestions returned for a single mention.

	org.apache.stanbol.enhancer.engines.keywordextraction.minFoundTokens.name=Number of Required Tokens
	org.apache.stanbol.enhancer.engines.keywordextraction.minFoundTokens.description=For lookups with \
	several words (e.g. Dr Patrick Marshall) this is the minimum number of Tokens the label of an \
	entity must match to be suggested. This is only used of the label does not exactly match a part \
	of the text.

	org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.name=Redirect Mode
	org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.description=Defines how to \
	process redirects of Entities mentioned in the parsed content.. Three modes to deal with such \
	links are supported: Ignore redirects; Add values from redirected Entities to extracted; Follow \
	Redirects and suggest the redirected Entity instead of the extracted.
	org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.follow=Follow Redirects
	org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.addValues=Keep extracted \
	Entity, but add information of the redirected
	org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.ignore=Ignore Redirects

	org.apache.stanbol.enhancer.engines.keywordextraction.processedLanguages.name=Languages
	org.apache.stanbol.enhancer.engines.keywordextraction.processedLanguages.description=Languages to \
	process. An empty text indicates that all languages are processed. Use ',' as separator for \
	languages (e.g. 'en,de' to enhance only English and German texts).

	org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.name=Default Matching Language
	org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.description=The language \
	used in addition to the language detected for the analysed text to search for Entities. Typically this \
	configuration is an empty string to search for labels without any language defined, but for some data \
	sets (such as DBpedia.org) that add languages to any labels it might improve resuls to change this \
	configuration (e.g. to 'en' in the case of DBpedia.org).

	org.apache.stanbol.enhancer.engines.keywordextraction.dereference.name=Dereference Entities
	org.apache.stanbol.enhancer.engines.keywordextraction.dereference.description=If enabled additional \
	data for suggested Entities are included.

	org.apache.stanbol.enhancer.engines.keywordextraction.typeMappings.name=Type Mappings
	org.apache.stanbol.enhancer.engines.keywordextraction.typeMappings.description=This allows to add \
	additional entity-type > text-annotation-type mappings. Such mappings are used to determine the \
	'dc:type' value of the 'fise:TextAnnotation' created for extracted entities. Usage: \
	variant (a) '{uri}' short for {uri} > {uri} or (b) '{source1};{source2};..;{sourceN} > {target}'. \
	Note that a {source} may be only mapped to a single {target}. Multiple {source} types \
	can be mapped to the same {target}.

	org.apache.stanbol.enhancer.engines.keywordextraction.keywordTokenizer.name=Keyword Tokenizer
	org.apache.stanbol.enhancer.engines.keywordextraction.keywordTokenizer.description=This allows \
	to use a special Tokenizer for matching keywords and alpha numeric IDs. Typical language \
	specific Tokenizers tned to split such IDs in several tokens and therefore might prevent \
	a correct matching.

	org.apache.stanbol.enhancer.engines.keywordextraction.minTokenMatchFactor.name=Minimum Token Match Factor
	org.apache.stanbol.enhancer.engines.keywordextraction.minTokenMatchFactor.description=If a Token \
	of the text is compared with a Token in the Label of an Entity the similarity of those is \
	expressed in the range [0..1]. This factor specifies the minimum similarity of two Tokens \
	so that they are considered to match. Lower values will allow more Tokens to match (e.g \
	inflected forms of words) but may also result in false positives. Regardless of the \
	configured value the similarity will influence the confidence of suggestions.