blob: 2f6a80972587f36dc1d5570432c0242484c2d1a2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ko;
import java.util.Locale;
/**
* Part of speech classification for Korean based on Sejong corpus classification. The list of tags
* and their meanings is available here:
* https://docs.google.com/spreadsheets/d/1-9blXKjtjeKZqsf4NzHeYJCrr49-nXeRF6D80udfcwY
*/
public class POS {
/** The type of the token. */
public enum Type {
/** A simple morpheme. */
MORPHEME,
/** Compound noun. */
COMPOUND,
/** Inflected token. */
INFLECT,
/** Pre-analysis token. */
PREANALYSIS,
}
/** Part of speech tag for Korean based on Sejong corpus classification. */
public enum Tag {
/** Verbal endings */
E(100, "Verbal endings"),
/** Interjection */
IC(110, "Interjection"),
/** Ending Particle */
J(120, "Ending Particle"),
/** General Adverb */
MAG(130, "General Adverb"),
/** Conjunctive adverb */
MAJ(131, "Conjunctive adverb"),
/** Determiner */
MM(140, "Modifier"),
/** General Noun */
NNG(150, "General Noun"),
/** Proper Noun */
NNP(151, "Proper Noun"),
/** Dependent noun (following nouns) */
NNB(152, "Dependent noun"),
/** Dependent noun */
NNBC(153, "Dependent noun"),
/** Pronoun */
NP(154, "Pronoun"),
/** Numeral */
NR(155, "Numeral"),
/** Terminal punctuation (? ! .) */
SF(160, "Terminal punctuation"),
/** Chinese character */
SH(161, "Chinese Characeter"),
/** Foreign language */
SL(162, "Foreign language"),
/** Number */
SN(163, "Number"),
/** Space */
SP(164, "Space"),
/** Closing brackets */
SSC(165, "Closing brackets"),
/** Opening brackets */
SSO(166, "Opening brackets"),
/** Separator (ยท / :) */
SC(167, "Separator"),
/** Other symbol */
SY(168, "Other symbol"),
/** Ellipsis */
SE(169, "Ellipsis"),
/** Adjective */
VA(170, "Adjective"),
/** Negative designator */
VCN(171, "Negative designator"),
/** Positive designator */
VCP(172, "Positive designator"),
/** Verb */
VV(173, "Verb"),
/** Auxiliary Verb or Adjective */
VX(174, "Auxiliary Verb or Adjective"),
/** Prefix */
XPN(181, "Prefix"),
/** Root */
XR(182, "Root"),
/** Adjective Suffix */
XSA(183, "Adjective Suffix"),
/** Noun Suffix */
XSN(184, "Noun Suffix"),
/** Verb Suffix */
XSV(185, "Verb Suffix"),
/** Unknown */
UNKNOWN(999, "Unknown"),
/** Unknown */
UNA(-1, "Unknown"),
/** Unknown */
NA(-1, "Unknown"),
/** Unknown */
VSV(-1, "Unknown");
private final int code;
private final String desc;
/** Returns the code associated with the tag (as defined in pos-id.def). */
public int code() {
return code;
}
/** Returns the description associated with the tag. */
public String description() {
return desc;
}
/**
* Returns a new part of speech tag.
*
* @param code The code for the tag.
* @param desc The description of the tag.
*/
Tag(int code, String desc) {
this.code = code;
this.desc = desc;
}
}
/** Returns the {@link Tag} of the provided <code>name</code>. */
public static Tag resolveTag(String name) {
String tagUpper = name.toUpperCase(Locale.ENGLISH);
if (tagUpper.startsWith("J")) {
return Tag.J;
} else if (tagUpper.startsWith("E")) {
return Tag.E;
} else {
return Tag.valueOf(tagUpper);
}
}
/** Returns the {@link Tag} of the provided <code>tag</code>. */
public static Tag resolveTag(byte tag) {
assert tag < Tag.values().length;
return Tag.values()[tag];
}
/** Returns the {@link Type} of the provided <code>name</code>. */
public static Type resolveType(String name) {
if ("*".equals(name)) {
return Type.MORPHEME;
}
return Type.valueOf(name.toUpperCase(Locale.ENGLISH));
}
/** Returns the {@link Type} of the provided <code>type</code>. */
public static Type resolveType(byte type) {
assert type < Type.values().length;
return Type.values()[type];
}
}