sandbox-2.3.0-01/Tagger/tagger.properties - uima-sandbox - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 #  or more contributor license agreements.  See the NOTICE file
 #  distributed with this work for additional information
 #  regarding copyright ownership.  The ASF licenses this file
 #  to you under the Apache License, Version 2.0 (the
 #  "License"); you may not use this file except in compliance
 #  with the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing,
 #  software distributed under the License is distributed on an
 #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 #  KIND, either express or implied.  See the License for the
 #  specific language governing permissions and limitations
 #  under the License.

 ######## This is the default tagger.properties file
 ######## This file is used for training and testing only,
 ######## The configuration for tagging is directly tuned in the descriptor "HmmTagger.xml"

 ###########  ONLY FOR TRAINING #####################################################


 ####### FILE OR DIRECTORY CONTAINING TRAINING CORPUS:
 ####### can be in specified either as an absolute or as a relative path
 ####### e.g. FILE = ../../tueba_tigerFormat.txt or FILE = C:/Data/tueba.txt
 FILE =


 ##########################  BOTH FOR TRAINING AND EVALUATION  ################################

 ######## THESE ARE THE DEFAULT MODEL FILES FOR GERMAN AND ENGLISH
 ######## You can either uncomment one of them, if you want to replace given models with your own one,

 #MODEL_FILE = resources/german/TuebaModel.dat
 MODEL_FILE = resources/english/BrownModel.dat

 ######## or specify a completely different name
 # MODEL_FILE =

 ######## If mapping of tags is desired, uncomment the following
 DO_MAPPING = true


 #######	EXAMPLES OF MAPPING CLASSES

 ## Basic mapping for the Brown corpus (nltk distribution) tagset: to get 93 tags out of 473
 MAPPING = org.apache.uima.examples.tagger.trainAndTest.TagMappingBrown

 ## Basic mapping for STTS tagset: from 54 tags onto the basic ca. 15 classes plus punctuation
 #MAPPING = org.apache.uima.examples.tagger.trainAndTest.GrobMappingTueba

 ## If you implement your own mapping, you should specify here in the same manner as above a java-path to the class
 #MAPPING =

 ######## If corpus is in a different format and cannot be read with the provided READERS,
 ######## you should specify here a java-path to the class (s. examples below)

 #CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.TT_FormatReader
 CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.BrownReader

 #CORPUS_READER =

 #################      ONLY FOR EVALUATION   ###############################

 ######### GOLD STANDARD CORPUS FILE:
 ######### can be specified as an absolute or as a relative path
 ##e.g. GOLD_STANDARD = ../../tueba_tigerFormat.txt or GOLD_STANDARD = C:/Data/tueba.txt

 GOLD_STANDARD =

 ######### Here we specify whether one intends to test a bi- or a trigram model (default is a trigram model)
 N=3
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	######## This is the default tagger.properties file
	######## This file is used for training and testing only,
	######## The configuration for tagging is directly tuned in the descriptor "HmmTagger.xml"

	########### ONLY FOR TRAINING #####################################################


	####### FILE OR DIRECTORY CONTAINING TRAINING CORPUS:
	####### can be in specified either as an absolute or as a relative path
	####### e.g. FILE = ../../tueba_tigerFormat.txt or FILE = C:/Data/tueba.txt
	FILE =


	########################## BOTH FOR TRAINING AND EVALUATION ################################

	######## THESE ARE THE DEFAULT MODEL FILES FOR GERMAN AND ENGLISH
	######## You can either uncomment one of them, if you want to replace given models with your own one,

	#MODEL_FILE = resources/german/TuebaModel.dat
	MODEL_FILE = resources/english/BrownModel.dat

	######## or specify a completely different name
	# MODEL_FILE =

	######## If mapping of tags is desired, uncomment the following
	DO_MAPPING = true


	####### EXAMPLES OF MAPPING CLASSES

	## Basic mapping for the Brown corpus (nltk distribution) tagset: to get 93 tags out of 473
	MAPPING = org.apache.uima.examples.tagger.trainAndTest.TagMappingBrown

	## Basic mapping for STTS tagset: from 54 tags onto the basic ca. 15 classes plus punctuation
	#MAPPING = org.apache.uima.examples.tagger.trainAndTest.GrobMappingTueba

	## If you implement your own mapping, you should specify here in the same manner as above a java-path to the class
	#MAPPING =

	######## If corpus is in a different format and cannot be read with the provided READERS,
	######## you should specify here a java-path to the class (s. examples below)

	#CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.TT_FormatReader
	CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.BrownReader

	#CORPUS_READER =

	################# ONLY FOR EVALUATION ###############################

	######### GOLD STANDARD CORPUS FILE:
	######### can be specified as an absolute or as a relative path
	##e.g. GOLD_STANDARD = ../../tueba_tigerFormat.txt or GOLD_STANDARD = C:/Data/tueba.txt

	GOLD_STANDARD =

	######### Here we specify whether one intends to test a bi- or a trigram model (default is a trigram model)
	N=3