blob: 4c24a4eb691829ba7fbe7bee6c79d8c14e6442c7 [file] [log] [blame]
namespace Lucene.Net.Analysis.Ja.Dict
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Dictionary interface for retrieving morphological data
/// by id.
/// </summary>
public interface IDictionary
{
/// <summary>
/// Get left id of specified word.
/// </summary>
/// <param name="wordId">Word ID of token.</param>
/// <returns>Left id.</returns>
int GetLeftId(int wordId);
/// <summary>
/// Get right id of specified word.
/// </summary>
/// <param name="wordId">Word ID of token.</param>
/// <returns>Right id.</returns>
int GetRightId(int wordId);
/// <summary>
/// Get word cost of specified word
/// </summary>
/// <param name="wordId">Word ID of token.</param>
/// <returns>Word's cost.</returns>
int GetWordCost(int wordId);
/// <summary>
/// Get Part-Of-Speech of tokens
/// </summary>
/// <param name="wordId">Word ID of token.</param>
/// <returns>Part-Of-Speech of the token.</returns>
string GetPartOfSpeech(int wordId);
/// <summary>
/// Get reading of tokens.
/// </summary>
/// <param name="wordId">Word ID of token.</param>
/// <param name="surface"></param>
/// <param name="off"></param>
/// <param name="len"></param>
/// <returns>Reading of the token.</returns>
string GetReading(int wordId, char[] surface, int off, int len);
/// <summary>
/// Get base form of word.
/// </summary>
/// <param name="wordId">Word ID of token.</param>
/// <param name="surface"></param>
/// <param name="off"></param>
/// <param name="len"></param>
/// <returns>Base form (only different for inflected words, otherwise null).</returns>
string GetBaseForm(int wordId, char[] surface, int off, int len);
/// <summary>
/// Get pronunciation of tokens
/// </summary>
/// <param name="wordId">Word ID of token.</param>
/// <param name="surface"></param>
/// <param name="off"></param>
/// <param name="len"></param>
/// <returns>Pronunciation of the token.</returns>
string GetPronunciation(int wordId, char[] surface, int off, int len);
/// <summary>
/// Get inflection type of tokens.
/// </summary>
/// <param name="wordId">Word ID of token.</param>
/// <returns>Inflection type, or null.</returns>
string GetInflectionType(int wordId);
/// <summary>
/// Get inflection form of tokens.
/// </summary>
/// <param name="wordId">Word ID of token.</param>
/// <returns>Inflection form, or null.</returns>
string GetInflectionForm(int wordId);
// TODO: maybe we should have a optimal method, a non-typesafe
// 'getAdditionalData' if other dictionaries like unidic have additional data
}
// LUCENENT TODO: Make this whole thing into an abstact class??
public class Dictionary
{
public static readonly string INTERNAL_SEPARATOR = "\u0000";
}
}