blob: 263c9c8591dc9d129760f8b24917b566a84c71d1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ko;
import java.util.Locale;
/**
* Part of speech classification for Korean based on Sejong corpus classification.
* The list of tags and their meanings is available here:
* https://docs.google.com/spreadsheets/d/1-9blXKjtjeKZqsf4NzHeYJCrr49-nXeRF6D80udfcwY
*/
public class POS {
/**
* The type of the token.
*/
public enum Type {
/**
* A simple morpheme.
*/
MORPHEME,
/**
* Compound noun.
*/
COMPOUND,
/**
* Inflected token.
*/
INFLECT,
/**
* Pre-analysis token.
*/
PREANALYSIS,
}
/**
* Part of speech tag for Korean based on Sejong corpus classification.
*/
public enum Tag {
/**
* Verbal endings
*/
E(100, "Verbal endings"),
/**
* Interjection
*/
IC(110, "Interjection"),
/**
* Ending Particle
*/
J(120, "Ending Particle"),
/**
* General Adverb
*/
MAG(130, "General Adverb"),
/**
* Conjunctive adverb
*/
MAJ(131, "Conjunctive adverb"),
/**
* Determiner
**/
MM(140, "Modifier"),
/**
* General Noun
**/
NNG(150, "General Noun"),
/**
* Proper Noun
**/
NNP(151, "Proper Noun"),
/**
* Dependent noun (following nouns)
**/
NNB(152, "Dependent noun"),
/**
* Dependent noun
**/
NNBC(153, "Dependent noun"),
/**
* Pronoun
**/
NP(154, "Pronoun"),
/**
* Numeral
**/
NR(155, "Numeral"),
/**
* Terminal punctuation (? ! .)
**/
SF(160, "Terminal punctuation"),
/**
* Chinese character
**/
SH(161, "Chinese Characeter"),
/**
* Foreign language
**/
SL(162, "Foreign language"),
/**
* Number
**/
SN(163, "Number"),
/**
* Space
**/
SP(164, "Space"),
/**
* Closing brackets
**/
SSC(165, "Closing brackets"),
/**
* Opening brackets
**/
SSO(166, "Opening brackets"),
/**
* Separator (ยท / :)
**/
SC(167, "Separator"),
/**
* Other symbol
**/
SY(168, "Other symbol"),
/**
* Ellipsis
**/
SE(169, "Ellipsis"),
/**
* Adjective
**/
VA(170, "Adjective"),
/**
* Negative designator
**/
VCN(171, "Negative designator"),
/**
* Positive designator
**/
VCP(172, "Positive designator"),
/**
* Verb
**/
VV(173, "Verb"),
/**
* Auxiliary Verb or Adjective
**/
VX(174, "Auxiliary Verb or Adjective"),
/**
* Prefix
**/
XPN(181, "Prefix"),
/**
* Root
**/
XR(182, "Root"),
/**
* Adjective Suffix
**/
XSA(183, "Adjective Suffix"),
/**
* Noun Suffix
**/
XSN(184, "Noun Suffix"),
/**
* Verb Suffix
**/
XSV(185, "Verb Suffix"),
/**
* Unknown
*/
UNKNOWN(999, "Unknown"),
/**
* Unknown
*/
UNA(-1, "Unknown"),
/**
* Unknown
*/
NA(-1, "Unknown"),
/**
* Unknown
*/
VSV(-1, "Unknown");
private final int code;
private final String desc;
/**
* Returns the code associated with the tag (as defined in pos-id.def).
*/
public int code() {
return code;
}
/**
* Returns the description associated with the tag.
*/
public String description() {
return desc;
}
/**
* Returns a new part of speech tag.
* @param code The code for the tag.
* @param desc The description of the tag.
*/
Tag(int code, String desc) {
this.code = code;
this.desc = desc;
}
}
/**
* Returns the {@link Tag} of the provided <code>name</code>.
*/
public static Tag resolveTag(String name) {
String tagUpper = name.toUpperCase(Locale.ENGLISH);
if (tagUpper.startsWith("J")) {
return Tag.J;
} else if (tagUpper.startsWith("E")) {
return Tag.E;
} else {
return Tag.valueOf(tagUpper);
}
}
/**
* Returns the {@link Tag} of the provided <code>tag</code>.
*/
public static Tag resolveTag(byte tag) {
assert tag < Tag.values().length;
return Tag.values()[tag];
}
/**
* Returns the {@link Type} of the provided <code>name</code>.
*/
public static Type resolveType(String name) {
if ("*".equals(name)) {
return Type.MORPHEME;
}
return Type.valueOf(name.toUpperCase(Locale.ENGLISH));
}
/**
* Returns the {@link Type} of the provided <code>type</code>.
*/
public static Type resolveType(byte type) {
assert type < Type.values().length;
return Type.values()[type];
}
}