blob: 092a5101126d0241c69f5fd45fd733b9101bc72d [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
######## This is the default tagger.properties file
######## This file is used for training and testing only,
######## The configuration for tagging is directly tuned in the descriptor "HmmTagger.xml"
########### ONLY FOR TRAINING #####################################################
####### FILE OR DIRECTORY CONTAINING TRAINING CORPUS:
####### can be in specified either as an absolute or as a relative path
####### e.g. FILE = ../../tueba_tigerFormat.txt or FILE = C:/Data/tueba.txt
FILE =
########################## BOTH FOR TRAINING AND EVALUATION ################################
######## THESE ARE THE DEFAULT MODEL FILES FOR GERMAN AND ENGLISH
######## You can either uncomment one of them, if you want to replace given models with your own one,
#MODEL_FILE = resources/german/TuebaModel.dat
MODEL_FILE = resources/english/BrownModel.dat
######## or specify a completely different name
# MODEL_FILE =
######## If mapping of tags is desired, uncomment the following
DO_MAPPING = true
####### EXAMPLES OF MAPPING CLASSES
## Basic mapping for the Brown corpus (nltk distribution) tagset: to get 93 tags out of 473
MAPPING = org.apache.uima.examples.tagger.trainAndTest.TagMappingBrown
## Basic mapping for STTS tagset: from 54 tags onto the basic ca. 15 classes plus punctuation
#MAPPING = org.apache.uima.examples.tagger.trainAndTest.GrobMappingTueba
## If you implement your own mapping, you should specify here in the same manner as above a java-path to the class
#MAPPING =
######## If corpus is in a different format and cannot be read with the provided READERS,
######## you should specify here a java-path to the class (s. examples below)
#CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.TT_FormatReader
CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.BrownReader
#CORPUS_READER =
################# ONLY FOR EVALUATION ###############################
######### GOLD STANDARD CORPUS FILE:
######### can be specified as an absolute or as a relative path
##e.g. GOLD_STANDARD = ../../tueba_tigerFormat.txt or GOLD_STANDARD = C:/Data/tueba.txt
GOLD_STANDARD =
######### Here we specify whether one intends to test a bi- or a trigram model (default is a trigram model)
N=3