blob: 5a10b50817b46a5a73f3674ac6871c4825286bb0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja;
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Type;
import org.apache.lucene.analysis.ja.dict.Dictionary;
/** Analyzed token with morphological data from its dictionary. */
public class Token {
private final Dictionary dictionary;
private final int wordId;
private final char[] surfaceForm;
private final int offset;
private final int length;
private final int position;
private int positionLength;
private final Type type;
public Token(
int wordId,
char[] surfaceForm,
int offset,
int length,
Type type,
int position,
Dictionary dictionary) {
this.wordId = wordId;
this.surfaceForm = surfaceForm;
this.offset = offset;
this.length = length;
this.type = type;
this.position = position;
this.dictionary = dictionary;
}
@Override
public String toString() {
return "Token(\""
+ new String(surfaceForm, offset, length)
+ "\" pos="
+ position
+ " length="
+ length
+ " posLen="
+ positionLength
+ " type="
+ type
+ " wordId="
+ wordId
+ " leftID="
+ dictionary.getLeftId(wordId)
+ ")";
}
/** @return surfaceForm */
public char[] getSurfaceForm() {
return surfaceForm;
}
/** @return offset into surfaceForm */
public int getOffset() {
return offset;
}
/** @return length of surfaceForm */
public int getLength() {
return length;
}
/** @return surfaceForm as a String */
public String getSurfaceFormString() {
return new String(surfaceForm, offset, length);
}
/** @return reading. null if token doesn't have reading. */
public String getReading() {
return dictionary.getReading(wordId, surfaceForm, offset, length);
}
/** @return pronunciation. null if token doesn't have pronunciation. */
public String getPronunciation() {
return dictionary.getPronunciation(wordId, surfaceForm, offset, length);
}
/** @return part of speech. */
public String getPartOfSpeech() {
return dictionary.getPartOfSpeech(wordId);
}
/** @return inflection type or null */
public String getInflectionType() {
return dictionary.getInflectionType(wordId);
}
/** @return inflection form or null */
public String getInflectionForm() {
return dictionary.getInflectionForm(wordId);
}
/** @return base form or null if token is not inflected */
public String getBaseForm() {
return dictionary.getBaseForm(wordId, surfaceForm, offset, length);
}
/**
* Returns the type of this token
*
* @return token type, not null
*/
public Type getType() {
return type;
}
/**
* Returns true if this token is known word
*
* @return true if this token is in standard dictionary. false if not.
*/
public boolean isKnown() {
return type == Type.KNOWN;
}
/**
* Returns true if this token is unknown word
*
* @return true if this token is unknown word. false if not.
*/
public boolean isUnknown() {
return type == Type.UNKNOWN;
}
/**
* Returns true if this token is defined in user dictionary
*
* @return true if this token is in user dictionary. false if not.
*/
public boolean isUser() {
return type == Type.USER;
}
/**
* Get index of this token in input text
*
* @return position of token
*/
public int getPosition() {
return position;
}
/**
* Set the position length (in tokens) of this token. For normal tokens this is 1; for compound
* tokens it's > 1.
*/
public void setPositionLength(int positionLength) {
this.positionLength = positionLength;
}
/**
* Get the length (in tokens) of this token. For normal tokens this is 1; for compound tokens it's
* > 1.
*
* @return position length of token
*/
public int getPositionLength() {
return positionLength;
}
}