lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Token.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.analysis.ja;


 import org.apache.lucene.analysis.ja.JapaneseTokenizer.Type;
 import org.apache.lucene.analysis.ja.dict.Dictionary;

 /**
  * Analyzed token with morphological data from its dictionary.
  */
 public class Token {
   private final Dictionary dictionary;

   private final int wordId;

   private final char[] surfaceForm;
   private final int offset;
   private final int length;

   private final int position;
   private int positionLength;

   private final Type type;

   public Token(int wordId, char[] surfaceForm, int offset, int length, Type type, int position, Dictionary dictionary) {
     this.wordId = wordId;
     this.surfaceForm = surfaceForm;
     this.offset = offset;
     this.length = length;
     this.type = type;
     this.position = position;
     this.dictionary = dictionary;
   }

   @Override
   public String toString() {
     return "Token(\"" + new String(surfaceForm, offset, length) + "\" pos=" + position + " length=" + length +
       " posLen=" + positionLength + " type=" + type + " wordId=" + wordId +
       " leftID=" + dictionary.getLeftId(wordId) + ")";
   }

   /**
    * @return surfaceForm
    */
   public char[] getSurfaceForm() {
     return surfaceForm;
   }

   /**
    * @return offset into surfaceForm
    */
   public int getOffset() {
     return offset;
   }

   /**
    * @return length of surfaceForm
    */
   public int getLength() {
     return length;
   }

   /**
    * @return surfaceForm as a String
    */
   public String getSurfaceFormString() {
     return new String(surfaceForm, offset, length);
   }

   /**
    * @return reading. null if token doesn't have reading.
    */
   public String getReading() {
     return dictionary.getReading(wordId, surfaceForm, offset, length);
   }

   /**
    * @return pronunciation. null if token doesn't have pronunciation.
    */
   public String getPronunciation() {
     return dictionary.getPronunciation(wordId, surfaceForm, offset, length);
   }

   /**
    * @return part of speech.
    */
   public String getPartOfSpeech() {
     return dictionary.getPartOfSpeech(wordId);
   }

   /**
    * @return inflection type or null
    */
   public String getInflectionType() {
     return dictionary.getInflectionType(wordId);
   }

   /**
    * @return inflection form or null
    */
   public String getInflectionForm() {
     return dictionary.getInflectionForm(wordId);
   }

   /**
    * @return base form or null if token is not inflected
    */
   public String getBaseForm() {
     return dictionary.getBaseForm(wordId, surfaceForm, offset, length);
   }

   /**
    * Returns the type of this token
    * @return token type, not null
    */
   public Type getType() {
     return type;
   }

   /**
    * Returns true if this token is known word
    * @return true if this token is in standard dictionary. false if not.
    */
   public boolean isKnown() {
     return type == Type.KNOWN;
   }

   /**
    * Returns true if this token is unknown word
    * @return true if this token is unknown word. false if not.
    */
   public boolean isUnknown() {
     return type == Type.UNKNOWN;
   }

   /**
    * Returns true if this token is defined in user dictionary
    * @return true if this token is in user dictionary. false if not.
    */
   public boolean isUser() {
     return type == Type.USER;
   }

   /**
    * Get index of this token in input text
    * @return position of token
    */
   public int getPosition() {
     return position;
   }

   /**
    * Set the position length (in tokens) of this token.  For normal
    * tokens this is 1; for compound tokens it's &gt; 1.
    */
   public void setPositionLength(int positionLength) {
     this.positionLength = positionLength;
   }

   /**
    * Get the length (in tokens) of this token.  For normal
    * tokens this is 1; for compound tokens it's &gt; 1.
    * @return position length of token
    */
   public int getPositionLength() {
     return positionLength;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.analysis.ja;


	import org.apache.lucene.analysis.ja.JapaneseTokenizer.Type;
	import org.apache.lucene.analysis.ja.dict.Dictionary;

	/**
	* Analyzed token with morphological data from its dictionary.
	*/
	public class Token {
	private final Dictionary dictionary;

	private final int wordId;

	private final char[] surfaceForm;
	private final int offset;
	private final int length;

	private final int position;
	private int positionLength;

	private final Type type;

	public Token(int wordId, char[] surfaceForm, int offset, int length, Type type, int position, Dictionary dictionary) {
	this.wordId = wordId;
	this.surfaceForm = surfaceForm;
	this.offset = offset;
	this.length = length;
	this.type = type;
	this.position = position;
	this.dictionary = dictionary;
	}

	@Override
	public String toString() {
	return "Token(\"" + new String(surfaceForm, offset, length) + "\" pos=" + position + " length=" + length +
	" posLen=" + positionLength + " type=" + type + " wordId=" + wordId +
	" leftID=" + dictionary.getLeftId(wordId) + ")";
	}

	/**
	* @return surfaceForm
	*/
	public char[] getSurfaceForm() {
	return surfaceForm;
	}

	/**
	* @return offset into surfaceForm
	*/
	public int getOffset() {
	return offset;
	}

	/**
	* @return length of surfaceForm
	*/
	public int getLength() {
	return length;
	}

	/**
	* @return surfaceForm as a String
	*/
	public String getSurfaceFormString() {
	return new String(surfaceForm, offset, length);
	}

	/**
	* @return reading. null if token doesn't have reading.
	*/
	public String getReading() {
	return dictionary.getReading(wordId, surfaceForm, offset, length);
	}

	/**
	* @return pronunciation. null if token doesn't have pronunciation.
	*/
	public String getPronunciation() {
	return dictionary.getPronunciation(wordId, surfaceForm, offset, length);
	}

	/**
	* @return part of speech.
	*/
	public String getPartOfSpeech() {
	return dictionary.getPartOfSpeech(wordId);
	}

	/**
	* @return inflection type or null
	*/
	public String getInflectionType() {
	return dictionary.getInflectionType(wordId);
	}

	/**
	* @return inflection form or null
	*/
	public String getInflectionForm() {
	return dictionary.getInflectionForm(wordId);
	}

	/**
	* @return base form or null if token is not inflected
	*/
	public String getBaseForm() {
	return dictionary.getBaseForm(wordId, surfaceForm, offset, length);
	}

	/**
	* Returns the type of this token
	* @return token type, not null
	*/
	public Type getType() {
	return type;
	}

	/**
	* Returns true if this token is known word
	* @return true if this token is in standard dictionary. false if not.
	*/
	public boolean isKnown() {
	return type == Type.KNOWN;
	}

	/**
	* Returns true if this token is unknown word
	* @return true if this token is unknown word. false if not.
	*/
	public boolean isUnknown() {
	return type == Type.UNKNOWN;
	}

	/**
	* Returns true if this token is defined in user dictionary
	* @return true if this token is in user dictionary. false if not.
	*/
	public boolean isUser() {
	return type == Type.USER;
	}

	/**
	* Get index of this token in input text
	* @return position of token
	*/
	public int getPosition() {
	return position;
	}

	/**
	* Set the position length (in tokens) of this token. For normal
	* tokens this is 1; for compound tokens it's > 1.
	*/
	public void setPositionLength(int positionLength) {
	this.positionLength = positionLength;
	}

	/**
	* Get the length (in tokens) of this token. For normal
	* tokens this is 1; for compound tokens it's > 1.
	* @return position length of token
	*/
	public int getPositionLength() {
	return positionLength;
	}
	}