# Licensed to the Apache Software Foundation (ASF) under one | |
# or more contributor license agreements. See the NOTICE file | |
# distributed with this work for additional information | |
# regarding copyright ownership. The ASF licenses this file | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, | |
# software distributed under the License is distributed on an | |
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
# KIND, either express or implied. See the License for the | |
# specific language governing permissions and limitations | |
# under the License. | |
######## This is the default tagger.properties file | |
######## This file is used for training and testing only, | |
######## The configuration for tagging is directly tuned in the descriptor "HmmTagger.xml" | |
########### ONLY FOR TRAINING ##################################################### | |
####### FILE OR DIRECTORY CONTAINING TRAINING CORPUS: | |
####### can be in specified either as an absolute or as a relative path | |
####### e.g. FILE = ../../tueba_tigerFormat.txt or FILE = C:/Data/tueba.txt | |
FILE = | |
########################## BOTH FOR TRAINING AND EVALUATION ################################ | |
######## THESE ARE THE DEFAULT MODEL FILES FOR GERMAN AND ENGLISH | |
######## You can either uncomment one of them, if you want to replace given models with your own one, | |
#MODEL_FILE = resources/german/TuebaModel.dat | |
MODEL_FILE = resources/english/BrownModel.dat | |
######## or specify a completely different name | |
# MODEL_FILE = | |
######## If mapping of tags is desired, uncomment the following | |
DO_MAPPING = true | |
####### EXAMPLES OF MAPPING CLASSES | |
## Basic mapping for the Brown corpus (nltk distribution) tagset: to get 93 tags out of 473 | |
MAPPING = org.apache.uima.examples.tagger.trainAndTest.TagMappingBrown | |
## Basic mapping for STTS tagset: from 54 tags onto the basic ca. 15 classes plus punctuation | |
#MAPPING = org.apache.uima.examples.tagger.trainAndTest.GrobMappingTueba | |
## If you implement your own mapping, you should specify here in the same manner as above a java-path to the class | |
#MAPPING = | |
######## If corpus is in a different format and cannot be read with the provided READERS, | |
######## you should specify here a java-path to the class (s. examples below) | |
#CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.TT_FormatReader | |
CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.BrownReader | |
#CORPUS_READER = | |
################# ONLY FOR EVALUATION ############################### | |
######### GOLD STANDARD CORPUS FILE: | |
######### can be specified as an absolute or as a relative path | |
##e.g. GOLD_STANDARD = ../../tueba_tigerFormat.txt or GOLD_STANDARD = C:/Data/tueba.txt | |
GOLD_STANDARD = | |
######### Here we specify whether one intends to test a bi- or a trigram model (default is a trigram model) | |
N=3 | |