| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.stanbol.enhancer.engines.opennlp.chunker.model; |
| |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| import opennlp.tools.chunker.Chunker; |
| |
| import org.apache.stanbol.enhancer.nlp.model.tag.TagSet; |
| import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag; |
| import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory; |
| |
| /** |
| * Registry for {@link PhraseTag} {@link TagSet}s used by OpenNLP |
| * {@link Chunker}.<p> |
| * TODO: consider to add a {@link TagSet}Registry feature to the |
| * org.apache.stanbol.enhancer.nlp module. Maybe even register TagSets to |
| * the OSGI Environment. |
| * @author Rupert Westenthaler |
| * |
| */ |
| public class PhraseTagSetRegistry { |
| private static PhraseTagSetRegistry instance = new PhraseTagSetRegistry(); |
| |
| private PhraseTagSetRegistry(){} |
| |
| private final Map<String, TagSet<PhraseTag>> models = new HashMap<String,TagSet<PhraseTag>>(); |
| |
| public static PhraseTagSetRegistry getInstance(){ |
| return instance; |
| } |
| |
| private void add(TagSet<PhraseTag> model) { |
| for(String lang : model.getLanguages()){ |
| if(models.put(lang, model) != null){ |
| throw new IllegalStateException("Multiple TagSets for Language '" |
| + lang+"'! This is an error in the static confituration of " |
| + "this class. Please report this to the stanbol-dev mailing" |
| + "list!"); |
| } |
| } |
| } |
| /** |
| * Getter for the TagSet used by an {@link Chunker} of the parsed Language. |
| * If no {@link TagSet} is available for an Language this will return |
| * <code>null</code> |
| * @param language the language |
| * @return the AnnotationModel or <code>null</code> if non is defined |
| */ |
| public TagSet<PhraseTag> getTagSet(String language){ |
| return models.get(language); |
| } |
| |
| public static final TagSet<PhraseTag> DEFAULT = new TagSet<PhraseTag>( |
| "OpenNLP Default Chunker TagSet", "en","de"); |
| |
| static { |
| DEFAULT.addTag(new PhraseTag("NP", LexicalCategory.Noun)); |
| DEFAULT.addTag(new PhraseTag("VP",LexicalCategory.Verb)); |
| DEFAULT.addTag(new PhraseTag("PP", LexicalCategory.PronounOrDeterminer)); |
| getInstance().add(DEFAULT); |
| } |
| } |