blob: a61c63a688652ebf9b38480285166f03527c48d7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nlpcraft.common.nlp.pos
import scala.collection.immutable.HashMap
/**
* Penn Treebank POS helper.
*/
object NCPennTreebank {
// http://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
private final val PENN_TREEBANK = HashMap[String, String] (
"CC" -> "Coordinating conjunction",
"CD" -> "Cardinal number",
"DT" -> "Determiner",
"EX" -> "Existential there",
"FW" -> "Foreign word",
"IN" -> "Preposition or sub. conjunction",
"JJ" -> "Adjective",
"JJR" -> "Adjective, comparative",
"JJS" -> "Adjective, superlative",
"LS" -> "List item marker",
"MD" -> "Modal",
"NN" -> "Noun, singular or mass",
"NNS" -> "Noun, plural",
"NNP" -> "Proper noun, singular",
"NNPS" -> "Proper noun, plural",
"PDT" -> "Predeterminer",
"POS" -> "Possessive ending",
"PRP" -> "Personal pronoun",
"PRP$" -> "Possessive pronoun",
"RB" -> "Adverb",
"RBR" -> "Adverb, comparative",
"RBS" -> "Adverb, superlative",
"RP" -> "Particle",
"SYM" -> "Symbol",
"TO" -> "To",
"UH" -> "Interjection",
"VB" -> "Verb, base form",
"VBD" -> "Verb, past tense",
"VBG" -> "Verb, gerund or present part",
"VBN" -> "Verb, past participle",
"VBP" -> "Verb, non-3rd person sing. present",
"VBZ" -> "Verb, 3rd person sing. present",
"WDT" -> "Wh-determiner",
"WP" -> "Wh-pronoun",
"WP$" -> "Possessive wh-pronoun",
"WRB" -> "Wh-adverb"
)
// Synthetic token.
final val SYNTH_POS = "---"
final val SYNTH_POS_DESC = "Synthetic tag"
// Useful POS tags sets.
final val NOUNS_POS = Seq("NN", "NNS", "NNP", "NNPS")
final val VERBS_POS = Seq("VB", "VBD", "VBG", "VBN", "VBP", "VBZ")
final val WHS_POS = Seq("WDT", "WP", "WP$", "WRB")
final val JJS_POS = Seq("JJ", "JJR", "JJS")
// Accessors.
def description(tag: String): Option[String] = if (isSynthetic(tag)) Some(SYNTH_POS_DESC) else PENN_TREEBANK.get(tag)
def contains(tag: String): Boolean = PENN_TREEBANK.contains(tag)
def isSynthetic(tag: String): Boolean = tag == SYNTH_POS
}