| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| ######## This is the default tagger.properties file |
| ######## This file is used for training and testing only, |
| ######## The configuration for tagging is directly tuned in the descriptor "HmmTagger.xml" |
| |
| ########### ONLY FOR TRAINING ##################################################### |
| |
| |
| ####### FILE OR DIRECTORY CONTAINING TRAINING CORPUS: |
| ####### can be in specified either as an absolute or as a relative path |
| ####### e.g. FILE = ../../tueba_tigerFormat.txt or FILE = C:/Data/tueba.txt |
| FILE = |
| |
| |
| ########################## BOTH FOR TRAINING AND EVALUATION ################################ |
| |
| ######## THESE ARE THE DEFAULT MODEL FILES FOR GERMAN AND ENGLISH |
| ######## You can either uncomment one of them, if you want to replace given models with your own one, |
| |
| #MODEL_FILE = resources/german/TuebaModel.dat |
| MODEL_FILE = resources/english/BrownModel.dat |
| |
| ######## or specify a completely different name |
| # MODEL_FILE = |
| |
| ######## If mapping of tags is desired, uncomment the following |
| DO_MAPPING = true |
| |
| |
| ####### EXAMPLES OF MAPPING CLASSES |
| |
| ## Basic mapping for the Brown corpus (nltk distribution) tagset: to get 93 tags out of 473 |
| MAPPING = org.apache.uima.examples.tagger.trainAndTest.TagMappingBrown |
| |
| ## Basic mapping for STTS tagset: from 54 tags onto the basic ca. 15 classes plus punctuation |
| #MAPPING = org.apache.uima.examples.tagger.trainAndTest.GrobMappingTueba |
| |
| ## If you implement your own mapping, you should specify here in the same manner as above a java-path to the class |
| #MAPPING = |
| |
| ######## If corpus is in a different format and cannot be read with the provided READERS, |
| ######## you should specify here a java-path to the class (s. examples below) |
| |
| #CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.TT_FormatReader |
| CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.BrownReader |
| |
| #CORPUS_READER = |
| |
| ################# ONLY FOR EVALUATION ############################### |
| |
| ######### GOLD STANDARD CORPUS FILE: |
| ######### can be specified as an absolute or as a relative path |
| ##e.g. GOLD_STANDARD = ../../tueba_tigerFormat.txt or GOLD_STANDARD = C:/Data/tueba.txt |
| |
| GOLD_STANDARD = |
| |
| ######### Here we specify whether one intends to test a bi- or a trigram model (default is a trigram model) |
| N=3 |
| |