blob: ca736b97134465f18d560056a7bf2405e8602c12 [file] [log] [blame]
using Lucene.Net.Analysis.Ja.Dict;
using Lucene.Net.Support;
using System.Diagnostics.CodeAnalysis;
namespace Lucene.Net.Analysis.Ja
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Analyzed token with morphological data from its dictionary.
/// </summary>
public class Token
{
private readonly IDictionary dictionary;
private readonly int wordId;
private readonly char[] surfaceForm;
private readonly int offset;
private readonly int length;
private readonly int position;
private int positionLength;
private readonly JapaneseTokenizerType type;
public Token(int wordId, char[] surfaceForm, int offset, int length, JapaneseTokenizerType type, int position, IDictionary dictionary)
{
this.wordId = wordId;
this.surfaceForm = surfaceForm;
this.offset = offset;
this.length = length;
this.type = type;
this.position = position;
this.dictionary = dictionary;
}
public override string ToString()
{
return "Token(\"" + new string(surfaceForm, offset, length) + "\" pos=" + position + " length=" + length +
" posLen=" + positionLength + " type=" + type + " wordId=" + wordId +
" leftID=" + dictionary.GetLeftId(wordId) + ")";
}
/// <summary>
/// surfaceForm
/// </summary>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
public virtual char[] SurfaceForm => surfaceForm;
/// <summary>
/// offset into surfaceForm
/// </summary>
public virtual int Offset => offset;
/// <summary>
/// length of surfaceForm
/// </summary>
public virtual int Length => length;
/// <summary>
/// surfaceForm as a String
/// </summary>
/// <returns>surfaceForm as a String</returns>
public virtual string GetSurfaceFormString()
{
return new string(surfaceForm, offset, length);
}
/// <summary>
/// reading. <c>null</c> if token doesn't have reading.
/// </summary>
/// <returns>reading. <c>null</c> if token doesn't have reading.</returns>
public virtual string GetReading()
{
return dictionary.GetReading(wordId, surfaceForm, offset, length);
}
/// <summary>
/// pronunciation. <c>null</c> if token doesn't have pronunciation.
/// </summary>
/// <returns>pronunciation. <c>null</c> if token doesn't have pronunciation.</returns>
public virtual string GetPronunciation()
{
return dictionary.GetPronunciation(wordId, surfaceForm, offset, length);
}
/// <summary>
/// part of speech.
/// </summary>
/// <returns>part of speech.</returns>
public virtual string GetPartOfSpeech()
{
return dictionary.GetPartOfSpeech(wordId);
}
/// <summary>
/// inflection type or <c>null</c>
/// </summary>
/// <returns>inflection type or <c>null</c></returns>
public virtual string GetInflectionType()
{
return dictionary.GetInflectionType(wordId);
}
/// <summary>
/// inflection form or <c>null</c>
/// </summary>
/// <returns>inflection form or <c>null</c></returns>
public virtual string GetInflectionForm()
{
return dictionary.GetInflectionForm(wordId);
}
/// <summary>
/// base form or <c>null</c> if token is not inflected
/// </summary>
/// <returns>base form or <c>null</c> if token is not inflected</returns>
public virtual string GetBaseForm()
{
return dictionary.GetBaseForm(wordId, surfaceForm, offset, length);
}
/// <summary>
/// Returns <c>true</c> if this token is known word.
/// </summary>
/// <returns><c>true</c> if this token is in standard dictionary. <c>false</c> if not.</returns>
public virtual bool IsKnown()
{
return type == JapaneseTokenizerType.KNOWN;
}
/// <summary>
/// Returns <c>true</c> if this token is unknown word.
/// </summary>
/// <returns><c>true</c> if this token is unknown word. <c>false</c> if not.</returns>
public virtual bool IsUnknown()
{
return type == JapaneseTokenizerType.UNKNOWN;
}
/// <summary>
/// Returns <c>true</c> if this token is defined in user dictionary.
/// </summary>
/// <returns><c>true</c> if this token is in user dictionary. <c>false</c> if not.</returns>
public virtual bool IsUser()
{
return type == JapaneseTokenizerType.USER;
}
/// <summary>
/// Get index of this token in input text. Returns position of token.
/// </summary>
public virtual int Position => position;
/// <summary>
/// Gets or Sets the length (in tokens) of this token. For normal
/// tokens this is 1; for compound tokens it's > 1.
/// </summary>
public virtual int PositionLength
{
get => positionLength;
set => this.positionLength = value;
}
}
}