blob: 981036cafc46b35909e4caea766c089c2d4d838a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.nlp.pos.olia;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
import org.apache.stanbol.enhancer.nlp.pos.Pos;
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
/**
* Defines {@link TagSet}s for the English language.<p>
* TODO: this is currently done manually but it should be able to generate this
* based on the <a herf="http://nlp2rdf.lod2.eu/olia/">OLIA</a> Ontologies
* @author Rupert Westenthaler
*
*/
public final class English {
private English(){}
public static final TagSet<PosTag> PENN_TREEBANK = new TagSet<PosTag>(
"Penn Treebank", "en");
static {
//TODO: define constants for annotation model and linking model
PENN_TREEBANK.getProperties().put("olia.annotationModel",
new IRI("http://purl.org/olia/penn.owl"));
PENN_TREEBANK.getProperties().put("olia.linkingModel",
new IRI("http://purl.org/olia/penn-link.rdf"));
PENN_TREEBANK.addTag(new PosTag("CC", Pos.CoordinatingConjunction));
PENN_TREEBANK.addTag(new PosTag("CD",Pos.CardinalNumber));
PENN_TREEBANK.addTag(new PosTag("DT",Pos.Determiner));
PENN_TREEBANK.addTag(new PosTag("EX",Pos.ExistentialParticle)); //TODO: unsure mapping
PENN_TREEBANK.addTag(new PosTag("FW",Pos.Foreign));
PENN_TREEBANK.addTag(new PosTag("IN",Pos.Preposition, Pos.SubordinatingConjunction));
PENN_TREEBANK.addTag(new PosTag("JJ",LexicalCategory.Adjective));
PENN_TREEBANK.addTag(new PosTag("JJR",LexicalCategory.Adjective, Pos.ComparativeParticle));
PENN_TREEBANK.addTag(new PosTag("JJS",LexicalCategory.Adjective, Pos.SuperlativeParticle));
PENN_TREEBANK.addTag(new PosTag("LS",Pos.ListMarker));
PENN_TREEBANK.addTag(new PosTag("MD",Pos.ModalVerb));
PENN_TREEBANK.addTag(new PosTag("NN",Pos.CommonNoun, Pos.SingularQuantifier));
PENN_TREEBANK.addTag(new PosTag("NNP",Pos.ProperNoun, Pos.SingularQuantifier));
PENN_TREEBANK.addTag(new PosTag("NNPS",Pos.ProperNoun, Pos.PluralQuantifier));
PENN_TREEBANK.addTag(new PosTag("NNS",Pos.CommonNoun, Pos.PluralQuantifier));
PENN_TREEBANK.addTag(new PosTag("PDT",Pos.Determiner)); //TODO should be Pre-Determiner
PENN_TREEBANK.addTag(new PosTag("POS")); //TODO: map Possessive Ending (e.g., Nouns ending in 's)
PENN_TREEBANK.addTag(new PosTag("PP",Pos.PersonalPronoun));
PENN_TREEBANK.addTag(new PosTag("PP$",Pos.PossessivePronoun));
PENN_TREEBANK.addTag(new PosTag("PRP",Pos.PersonalPronoun));
PENN_TREEBANK.addTag(new PosTag("PRP$",Pos.PossessivePronoun));
PENN_TREEBANK.addTag(new PosTag("RB",LexicalCategory.Adverb));
PENN_TREEBANK.addTag(new PosTag("RBR",LexicalCategory.Adverb,Pos.ComparativeParticle));
PENN_TREEBANK.addTag(new PosTag("RBS",LexicalCategory.Adverb,Pos.SuperlativeParticle));
PENN_TREEBANK.addTag(new PosTag("RP",Pos.Participle));
PENN_TREEBANK.addTag(new PosTag("SYM",Pos.Symbol));
PENN_TREEBANK.addTag(new PosTag("TO",LexicalCategory.Adposition));
PENN_TREEBANK.addTag(new PosTag("UH",LexicalCategory.Interjection));
PENN_TREEBANK.addTag(new PosTag("VB",Pos.Infinitive)); //TODO check a Verb in the base form should be Pos.Infinitive
PENN_TREEBANK.addTag(new PosTag("VBD",Pos.PastParticiple)); //TODO check
PENN_TREEBANK.addTag(new PosTag("VBG",Pos.PresentParticiple,Pos.Gerund));
PENN_TREEBANK.addTag(new PosTag("VBN",Pos.PastParticiple));
PENN_TREEBANK.addTag(new PosTag("VBP",Pos.PresentParticiple));
PENN_TREEBANK.addTag(new PosTag("VBZ",Pos.PresentParticiple));
PENN_TREEBANK.addTag(new PosTag("WDT",Pos.WHDeterminer));
PENN_TREEBANK.addTag(new PosTag("WP",Pos.WHPronoun));
PENN_TREEBANK.addTag(new PosTag("WP$",Pos.PossessivePronoun, Pos.WHPronoun));
PENN_TREEBANK.addTag(new PosTag("WRB",Pos.WHTypeAdverbs));
PENN_TREEBANK.addTag(new PosTag("ยดยด",Pos.CloseQuote));
PENN_TREEBANK.addTag(new PosTag(":",Pos.Colon));
PENN_TREEBANK.addTag(new PosTag(",",Pos.Comma));
PENN_TREEBANK.addTag(new PosTag("$",LexicalCategory.Residual));
PENN_TREEBANK.addTag(new PosTag("\"",Pos.Quote));
PENN_TREEBANK.addTag(new PosTag("``",Pos.OpenQuote));
PENN_TREEBANK.addTag(new PosTag(".",Pos.Point));
PENN_TREEBANK.addTag(new PosTag("{",Pos.OpenCurlyBracket));
PENN_TREEBANK.addTag(new PosTag("}",Pos.CloseCurlyBracket));
PENN_TREEBANK.addTag(new PosTag("[",Pos.OpenSquareBracket));
PENN_TREEBANK.addTag(new PosTag("]",Pos.CloseSquareBracket));
PENN_TREEBANK.addTag(new PosTag("(",Pos.OpenParenthesis));
PENN_TREEBANK.addTag(new PosTag(")",Pos.CloseParenthesis));
}
}