| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.analysis.ja; |
| |
| import org.apache.lucene.analysis.ja.JapaneseTokenizer.Type; |
| import org.apache.lucene.analysis.ja.dict.Dictionary; |
| |
| /** Analyzed token with morphological data from its dictionary. */ |
| public class Token { |
| private final Dictionary dictionary; |
| |
| private final int wordId; |
| |
| private final char[] surfaceForm; |
| private final int offset; |
| private final int length; |
| |
| private final int position; |
| private int positionLength; |
| |
| private final Type type; |
| |
| public Token( |
| int wordId, |
| char[] surfaceForm, |
| int offset, |
| int length, |
| Type type, |
| int position, |
| Dictionary dictionary) { |
| this.wordId = wordId; |
| this.surfaceForm = surfaceForm; |
| this.offset = offset; |
| this.length = length; |
| this.type = type; |
| this.position = position; |
| this.dictionary = dictionary; |
| } |
| |
| @Override |
| public String toString() { |
| return "Token(\"" |
| + new String(surfaceForm, offset, length) |
| + "\" pos=" |
| + position |
| + " length=" |
| + length |
| + " posLen=" |
| + positionLength |
| + " type=" |
| + type |
| + " wordId=" |
| + wordId |
| + " leftID=" |
| + dictionary.getLeftId(wordId) |
| + ")"; |
| } |
| |
| /** @return surfaceForm */ |
| public char[] getSurfaceForm() { |
| return surfaceForm; |
| } |
| |
| /** @return offset into surfaceForm */ |
| public int getOffset() { |
| return offset; |
| } |
| |
| /** @return length of surfaceForm */ |
| public int getLength() { |
| return length; |
| } |
| |
| /** @return surfaceForm as a String */ |
| public String getSurfaceFormString() { |
| return new String(surfaceForm, offset, length); |
| } |
| |
| /** @return reading. null if token doesn't have reading. */ |
| public String getReading() { |
| return dictionary.getReading(wordId, surfaceForm, offset, length); |
| } |
| |
| /** @return pronunciation. null if token doesn't have pronunciation. */ |
| public String getPronunciation() { |
| return dictionary.getPronunciation(wordId, surfaceForm, offset, length); |
| } |
| |
| /** @return part of speech. */ |
| public String getPartOfSpeech() { |
| return dictionary.getPartOfSpeech(wordId); |
| } |
| |
| /** @return inflection type or null */ |
| public String getInflectionType() { |
| return dictionary.getInflectionType(wordId); |
| } |
| |
| /** @return inflection form or null */ |
| public String getInflectionForm() { |
| return dictionary.getInflectionForm(wordId); |
| } |
| |
| /** @return base form or null if token is not inflected */ |
| public String getBaseForm() { |
| return dictionary.getBaseForm(wordId, surfaceForm, offset, length); |
| } |
| |
| /** |
| * Returns the type of this token |
| * |
| * @return token type, not null |
| */ |
| public Type getType() { |
| return type; |
| } |
| |
| /** |
| * Returns true if this token is known word |
| * |
| * @return true if this token is in standard dictionary. false if not. |
| */ |
| public boolean isKnown() { |
| return type == Type.KNOWN; |
| } |
| |
| /** |
| * Returns true if this token is unknown word |
| * |
| * @return true if this token is unknown word. false if not. |
| */ |
| public boolean isUnknown() { |
| return type == Type.UNKNOWN; |
| } |
| |
| /** |
| * Returns true if this token is defined in user dictionary |
| * |
| * @return true if this token is in user dictionary. false if not. |
| */ |
| public boolean isUser() { |
| return type == Type.USER; |
| } |
| |
| /** |
| * Get index of this token in input text |
| * |
| * @return position of token |
| */ |
| public int getPosition() { |
| return position; |
| } |
| |
| /** |
| * Set the position length (in tokens) of this token. For normal tokens this is 1; for compound |
| * tokens it's > 1. |
| */ |
| public void setPositionLength(int positionLength) { |
| this.positionLength = positionLength; |
| } |
| |
| /** |
| * Get the length (in tokens) of this token. For normal tokens this is 1; for compound tokens it's |
| * > 1. |
| * |
| * @return position length of token |
| */ |
| public int getPositionLength() { |
| return positionLength; |
| } |
| } |