src/Lucene.Net.Analysis.SmartCn/Hhmm/WordDictionary.cs - lucenenet - Git at Google

 // lucene version compatibility level: 4.8.1
 using J2N;
 using J2N.IO;
 using System;
 using System.IO;
 using System.Text;

 namespace Lucene.Net.Analysis.Cn.Smart.Hhmm
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     /// <summary>
     /// SmartChineseAnalyzer Word Dictionary
     /// <para/>
     /// @lucene.experimental
     /// </summary>
     internal class WordDictionary : AbstractDictionary
     {
         private WordDictionary()
         {
         }

         private static WordDictionary singleInstance;

         /// <summary>
         /// Large prime number for hash function
         /// </summary>
         public const int PRIME_INDEX_LENGTH = 12071;

         /// <summary>
         /// wordIndexTable guarantees to hash all Chinese characters in Unicode into
         /// PRIME_INDEX_LENGTH array. There will be conflict, but in reality this
         /// program only handles the 6768 characters found in GB2312 plus some
         /// ASCII characters. Therefore in order to guarantee better precision, it is
         /// necessary to retain the original symbol in the charIndexTable.
         /// </summary>
         private short[] wordIndexTable;

         private char[] charIndexTable;

         /// <summary>
         /// To avoid taking too much space, the data structure needed to store the
         /// lexicon requires two multidimensional arrays to store word and frequency.
         /// Each word is placed in a char[]. Each char represents a Chinese char or
         /// other symbol.  Each frequency is put into an int. These two arrays
         /// correspond to each other one-to-one. Therefore, one can use
         /// wordItem_charArrayTable[i][j] to look up word from lexicon, and
         /// wordItem_frequencyTable[i][j] to look up the corresponding frequency.
         /// </summary>
         private char[][][] wordItem_charArrayTable;

         private int[][] wordItem_frequencyTable;

         // static Logger log = Logger.getLogger(WordDictionary.class);

         private static readonly object syncLock = new object();

         /// <summary>
         /// Get the singleton dictionary instance.
         /// </summary>
         /// <returns>singleton</returns>
         public static WordDictionary GetInstance()
         {
             lock (syncLock)
             {
                 if (singleInstance == null)
                 {
                     singleInstance = new WordDictionary();

                     // LUCENENET specific
                     // LUCENE-1817: https://issues.apache.org/jira/browse/LUCENE-1817
                     // This issue still existed as of 4.8.0. Here is the fix - we only
                     // load from a directory if the actual directory exists (AnalyzerProfile
                     // ensures it is an empty string if it is not available).
                     string dictRoot = AnalyzerProfile.ANALYSIS_DATA_DIR;
                     if (string.IsNullOrEmpty(dictRoot))
                     {
                         singleInstance.Load();
                     }
                     else
                     {
                         singleInstance.Load(dictRoot);
                     }
                 }
                 return singleInstance;
             }
         }

         /// <summary>
         /// Attempt to load dictionary from provided directory, first trying coredict.mem, failing back on coredict.dct
         /// </summary>
         /// <param name="dctFileRoot">path to dictionary directory</param>
         public virtual void Load(string dctFileRoot)
         {
             string dctFilePath = System.IO.Path.Combine(dctFileRoot, "coredict.dct");
             FileInfo serialObj = new FileInfo(System.IO.Path.Combine(dctFileRoot, "coredict.mem"));

             if (serialObj.Exists && LoadFromObj(serialObj))
             {

             }
             else
             {
                 try
                 {
                     wordIndexTable = new short[PRIME_INDEX_LENGTH];
                     charIndexTable = new char[PRIME_INDEX_LENGTH];
                     for (int i = 0; i < PRIME_INDEX_LENGTH; i++)
                     {
                         charIndexTable[i] = (char)0;
                         wordIndexTable[i] = -1;
                     }
                     wordItem_charArrayTable = new char[GB2312_CHAR_NUM][][];
                     wordItem_frequencyTable = new int[GB2312_CHAR_NUM][];
                     // int total =
                     LoadMainDataFromFile(dctFilePath);
                     ExpandDelimiterData();
                     MergeSameWords();
                     SortEachItems();
                     // log.info("load dictionary: " + dctFilePath + " total:" + total);
                 }
                 catch (IOException e)
                 {
                     throw new Exception(e.ToString(), e);
                 }

                 SaveToObj(serialObj);
             }

         }

         /// <summary>
         /// Load coredict.mem internally from the jar file.
         /// </summary>
         /// <exception cref="IOException">If there is a low-level I/O error.</exception>
         public virtual void Load()
         {
             using Stream input = this.GetType().FindAndGetManifestResourceStream("coredict.mem");
             LoadFromObjectInputStream(input);
         }

         private bool LoadFromObj(FileInfo serialObj)
         {
             try
             {
                 using (Stream input = new FileStream(serialObj.FullName, FileMode.Open, FileAccess.Read))
                     LoadFromObjectInputStream(input);
                 return true;
             }
             catch (Exception e)
             {
                 throw new Exception(e.ToString(), e);
             }
         }

         // LUCENENET conversion note:
         // The data in Lucene is stored in a proprietary binary format (similar to
         // .NET's BinarySerializer) that cannot be read back in .NET. Therefore, the
         // data was extracted using Java's DataOutputStream using the following Java code.
         // It can then be read in using the LoadFromInputStream method below
         // (using a DataInputStream instead of a BinaryReader), and saved
         // in the correct (BinaryWriter) format by calling the SaveToObj method.
         // Alternatively, the data can be loaded from disk using the files
         // here(https://issues.apache.org/jira/browse/LUCENE-1629) in the analysis.data.zip file,
         // which will automatically produce the .mem files.

         //public void saveToOutputStream(java.io.DataOutputStream stream) throws IOException
         //{
         //    // save bigramHashTable
         //    int bhLen = bigramHashTable.length;
         //    stream.writeInt(bhLen);
         //    for (int i = 0; i<bhLen; i++)
         //    {
         //        stream.writeLong(bigramHashTable[i]);
         //    }

         //    // save frequencyTable
         //    int fLen = frequencyTable.length;
         //    stream.writeInt(fLen);
         //    for (int i = 0; i<fLen; i++)
         //    {
         //        stream.writeInt(frequencyTable[i]);
         //    }
         //}

         private void LoadFromObjectInputStream(Stream serialObjectInputStream)
         {
             using var reader = new BinaryReader(serialObjectInputStream);

             // Read wordIndexTable
             int wiLen = reader.ReadInt32();
             wordIndexTable = new short[wiLen];
             for (int i = 0; i < wiLen; i++)
             {
                 wordIndexTable[i] = reader.ReadInt16();
             }

             // Read charIndexTable
             int ciLen = reader.ReadInt32();
             charIndexTable = new char[ciLen];
             for (int i = 0; i < ciLen; i++)
             {
                 charIndexTable[i] = reader.ReadChar();
             }

             // Read wordItem_charArrayTable
             int caDim1 = reader.ReadInt32();
             if (caDim1 > -1)
             {
                 wordItem_charArrayTable = new char[caDim1][][];
                 for (int i = 0; i < caDim1; i++)
                 {
                     int caDim2 = reader.ReadInt32();
                     if (caDim2 > -1)
                     {
                         wordItem_charArrayTable[i] = new char[caDim2][];
                         for (int j = 0; j < caDim2; j++)
                         {
                             int caDim3 = reader.ReadInt32();
                             if (caDim3 > -1)
                             {
                                 wordItem_charArrayTable[i][j] = new char[caDim3];
                                 for (int k = 0; k < caDim3; k++)
                                 {
                                     wordItem_charArrayTable[i][j][k] = reader.ReadChar();
                                 }
                             }
                         }
                     }
                 }
             }

             // Read wordItem_frequencyTable
             int fDim1 = reader.ReadInt32();
             if (fDim1 > -1)
             {
                 wordItem_frequencyTable = new int[fDim1][];
                 for (int i = 0; i < fDim1; i++)
                 {
                     int fDim2 = reader.ReadInt32();
                     if (fDim2 > -1)
                     {
                         wordItem_frequencyTable[i] = new int[fDim2];
                         for (int j = 0; j < fDim2; j++)
                         {
                             wordItem_frequencyTable[i][j] = reader.ReadInt32();
                         }
                     }
                 }
             }

             // log.info("load core dict from serialization.");
         }

         private void SaveToObj(FileInfo serialObj)
         {
             try
             {
                 using Stream stream = new FileStream(serialObj.FullName, FileMode.Create, FileAccess.Write);
                 using var writer = new BinaryWriter(stream);
                 // Write wordIndexTable
                 int wiLen = wordIndexTable.Length;
                 writer.Write(wiLen);
                 for (int i = 0; i < wiLen; i++)
                 {
                     writer.Write(wordIndexTable[i]);
                 }

                 // Write charIndexTable
                 int ciLen = charIndexTable.Length;
                 writer.Write(ciLen);
                 for (int i = 0; i < ciLen; i++)
                 {
                     writer.Write(charIndexTable[i]);
                 }

                 // Write wordItem_charArrayTable
                 int caDim1 = wordItem_charArrayTable == null ? -1 : wordItem_charArrayTable.Length;
                 writer.Write(caDim1);
                 for (int i = 0; i < caDim1; i++)
                 {
                     int caDim2 = wordItem_charArrayTable[i] == null ? -1 : wordItem_charArrayTable[i].Length;
                     writer.Write(caDim2);
                     for (int j = 0; j < caDim2; j++)
                     {
                         int caDim3 = wordItem_charArrayTable[i][j] == null ? -1 : wordItem_charArrayTable[i][j].Length;
                         writer.Write(caDim3);
                         for (int k = 0; k < caDim3; k++)
                         {
                             writer.Write(wordItem_charArrayTable[i][j][k]);
                         }
                     }
                 }

                 // Write wordItem_frequencyTable
                 int fDim1 = wordItem_frequencyTable == null ? -1 : wordItem_frequencyTable.Length;
                 writer.Write(fDim1);
                 for (int i = 0; i < fDim1; i++)
                 {
                     int fDim2 = wordItem_frequencyTable[i] == null ? -1 : wordItem_frequencyTable[i].Length;
                     writer.Write(fDim2);
                     for (int j = 0; j < fDim2; j++)
                     {
                         writer.Write(wordItem_frequencyTable[i][j]);
                     }
                 }

                 // log.info("serialize core dict.");
             }
 #pragma warning disable 168, IDE0059
             catch (Exception e)
 #pragma warning restore 168, IDE0059
             {
                 // log.warn(e.getMessage());
             }
         }

         /// <summary>
         /// Load the datafile into this <see cref="WordDictionary"/>
         /// </summary>
         /// <param name="dctFilePath">path to word dictionary (coredict.dct)</param>
         /// <returns>number of words read</returns>
         /// <exception cref="IOException">If there is a low-level I/O error.</exception>
         private int LoadMainDataFromFile(string dctFilePath)
         {
             int i, cnt, length, total = 0;
             // The file only counted 6763 Chinese characters plus 5 reserved slots 3756~3760.
             // The 3756th is used (as a header) to store information.
             int[]
             buffer = new int[3];
             byte[] intBuffer = new byte[4];
             string tmpword;
             using (var dctFile = new FileStream(dctFilePath, FileMode.Open, FileAccess.Read))
             {

                 // GB2312 characters 0 - 6768
                 for (i = GB2312_FIRST_CHAR; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++)
                 {
                     // if (i == 5231)
                     // System.out.println(i);

                     dctFile.Read(intBuffer, 0, intBuffer.Length);
                     // the dictionary was developed for C, and byte order must be converted to work with Java
                     cnt = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian).GetInt32();
                     if (cnt <= 0)
                     {
                         wordItem_charArrayTable[i] = null;
                         wordItem_frequencyTable[i] = null;
                         continue;
                     }
                     wordItem_charArrayTable[i] = new char[cnt][];
                     wordItem_frequencyTable[i] = new int[cnt];
                     total += cnt;
                     int j = 0;
                     while (j < cnt)
                     {
                         // wordItemTable[i][j] = new WordItem();
                         dctFile.Read(intBuffer, 0, intBuffer.Length);
                         buffer[0] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian)
                             .GetInt32();// frequency
                         dctFile.Read(intBuffer, 0, intBuffer.Length);
                         buffer[1] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian)
                             .GetInt32();// length
                         dctFile.Read(intBuffer, 0, intBuffer.Length);
                         buffer[2] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian)
                             .GetInt32();// handle

                         // wordItemTable[i][j].frequency = buffer[0];
                         wordItem_frequencyTable[i][j] = buffer[0];

                         length = buffer[1];
                         if (length > 0)
                         {
                             byte[] lchBuffer = new byte[length];
                             dctFile.Read(lchBuffer, 0, lchBuffer.Length);
                             tmpword = Encoding.GetEncoding("GB2312").GetString(lchBuffer);
                             wordItem_charArrayTable[i][j] = tmpword.ToCharArray();
                         }
                         else
                         {
                             // wordItemTable[i][j].charArray = null;
                             wordItem_charArrayTable[i][j] = null;
                         }
                         // System.out.println(indexTable[i].wordItems[j]);
                         j++;
                     }

                     string str = GetCCByGB2312Id(i);
                     SetTableIndex(str[0], i);
                 }
             }
             return total;
         }

         /// <summary>
         /// The original lexicon puts all information with punctuation into a
         /// chart (from 1 to 3755). Here it then gets expanded, separately being
         /// placed into the chart that has the corresponding symbol.
         /// </summary>
         private void ExpandDelimiterData()
         {
             int i;
             int cnt;
             // Punctuation then treating index 3755 as 1,
             // distribute the original punctuation corresponding dictionary into
             int delimiterIndex = 3755 + GB2312_FIRST_CHAR;
             i = 0;
             while (i < wordItem_charArrayTable[delimiterIndex].Length)
             {
                 char c = wordItem_charArrayTable[delimiterIndex][i][0];
                 int j = GetGB2312Id(c);// the id value of the punctuation
                 if (wordItem_charArrayTable[j] == null)
                 {

                     int k = i;
                     // Starting from i, count the number of the following worditem symbol from j
                     while (k < wordItem_charArrayTable[delimiterIndex].Length
                         && wordItem_charArrayTable[delimiterIndex][k][0] == c)
                     {
                         k++;
                     }
                     // c is the punctuation character, j is the id value of c
                     // k-1 represents the index of the last punctuation character
                     cnt = k - i;
                     if (cnt != 0)
                     {
                         wordItem_charArrayTable[j] = new char[cnt][];
                         wordItem_frequencyTable[j] = new int[cnt];
                     }

                     // Assign value for each wordItem.
                     for (k = 0; k < cnt; k++, i++)
                     {
                         // wordItemTable[j][k] = new WordItem();
                         wordItem_frequencyTable[j][k] = wordItem_frequencyTable[delimiterIndex][i];
                         wordItem_charArrayTable[j][k] = new char[wordItem_charArrayTable[delimiterIndex][i].Length - 1];
                         System.Array.Copy(wordItem_charArrayTable[delimiterIndex][i], 1,
                             wordItem_charArrayTable[j][k], 0,
                             wordItem_charArrayTable[j][k].Length);
                     }
                     SetTableIndex(c, j);
                 }
             }
             // Delete the original corresponding symbol array.
             wordItem_charArrayTable[delimiterIndex] = null;
             wordItem_frequencyTable[delimiterIndex] = null;
         }

         /// <summary>
         /// since we aren't doing POS-tagging, merge the frequencies for entries of the same word (with different POS)
         /// </summary>
         private void MergeSameWords()
         {
             int i;
             for (i = 0; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++)
             {
                 if (wordItem_charArrayTable[i] == null)
                     continue;
                 int len = 1;
                 for (int j = 1; j < wordItem_charArrayTable[i].Length; j++)
                 {
                     if (Utility.CompareArray(wordItem_charArrayTable[i][j], 0,
                         wordItem_charArrayTable[i][j - 1], 0) != 0)
                         len++;

                 }
                 if (len < wordItem_charArrayTable[i].Length)
                 {
                     char[][] tempArray = new char[len][];
                     int[] tempFreq = new int[len];
                     int k = 0;
                     tempArray[0] = wordItem_charArrayTable[i][0];
                     tempFreq[0] = wordItem_frequencyTable[i][0];
                     for (int j = 1; j < wordItem_charArrayTable[i].Length; j++)
                     {
                         if (Utility.CompareArray(wordItem_charArrayTable[i][j], 0,
                             tempArray[k], 0) != 0)
                         {
                             k++;
                             // temp[k] = wordItemTable[i][j];
                             tempArray[k] = wordItem_charArrayTable[i][j];
                             tempFreq[k] = wordItem_frequencyTable[i][j];
                         }
                         else
                         {
                             // temp[k].frequency += wordItemTable[i][j].frequency;
                             tempFreq[k] += wordItem_frequencyTable[i][j];
                         }
                     }
                     // wordItemTable[i] = temp;
                     wordItem_charArrayTable[i] = tempArray;
                     wordItem_frequencyTable[i] = tempFreq;
                 }
             }
         }

         private void SortEachItems()
         {
             char[] tmpArray;
             int tmpFreq;
             for (int i = 0; i < wordItem_charArrayTable.Length; i++)
             {
                 if (wordItem_charArrayTable[i] != null
                     && wordItem_charArrayTable[i].Length > 1)
                 {
                     for (int j = 0; j < wordItem_charArrayTable[i].Length - 1; j++)
                     {
                         for (int j2 = j + 1; j2 < wordItem_charArrayTable[i].Length; j2++)
                         {
                             if (Utility.CompareArray(wordItem_charArrayTable[i][j], 0,
                                 wordItem_charArrayTable[i][j2], 0) > 0)
                             {
                                 tmpArray = wordItem_charArrayTable[i][j];
                                 tmpFreq = wordItem_frequencyTable[i][j];
                                 wordItem_charArrayTable[i][j] = wordItem_charArrayTable[i][j2];
                                 wordItem_frequencyTable[i][j] = wordItem_frequencyTable[i][j2];
                                 wordItem_charArrayTable[i][j2] = tmpArray;
                                 wordItem_frequencyTable[i][j2] = tmpFreq;
                             }
                         }
                     }
                 }
             }
         }

         /// <summary>
         /// Calculate character <paramref name="c"/>'s position in hash table,
         /// then initialize the value of that position in the address table.
         /// </summary>
         private bool SetTableIndex(char c, int j)
         {
             int index = GetAvaliableTableIndex(c);
             if (index != -1)
             {
                 charIndexTable[index] = c;
                 wordIndexTable[index] = (short)j;
                 return true;
             }
             else
                 return false;
         }

         private short GetAvaliableTableIndex(char c)
         {
             int hash1 = (int)(Hash1(c) % PRIME_INDEX_LENGTH);
             int hash2 = Hash2(c) % PRIME_INDEX_LENGTH;
             if (hash1 < 0)
                 hash1 = PRIME_INDEX_LENGTH + hash1;
             if (hash2 < 0)
                 hash2 = PRIME_INDEX_LENGTH + hash2;
             int index = hash1;
             int i = 1;
             while (charIndexTable[index] != 0 && charIndexTable[index] != c
                 && i < PRIME_INDEX_LENGTH)
             {
                 index = (hash1 + i * hash2) % PRIME_INDEX_LENGTH;
                 i++;
             }
             // System.out.println(i - 1);

             if (i < PRIME_INDEX_LENGTH
                 && (charIndexTable[index] == 0 || charIndexTable[index] == c))
             {
                 return (short)index;
             }
             else
             {
                 return -1;
             }
         }

         private short GetWordItemTableIndex(char c)
         {
             int hash1 = (int)(Hash1(c) % PRIME_INDEX_LENGTH);
             int hash2 = Hash2(c) % PRIME_INDEX_LENGTH;
             if (hash1 < 0)
                 hash1 = PRIME_INDEX_LENGTH + hash1;
             if (hash2 < 0)
                 hash2 = PRIME_INDEX_LENGTH + hash2;
             int index = hash1;
             int i = 1;
             while (charIndexTable[index] != 0 && charIndexTable[index] != c
                 && i < PRIME_INDEX_LENGTH)
             {
                 index = (hash1 + i * hash2) % PRIME_INDEX_LENGTH;
                 i++;
             }

             if (i < PRIME_INDEX_LENGTH && charIndexTable[index] == c)
             {
                 return (short)index;
             }
             else
                 return -1;
         }

         /// <summary>
         /// Look up the text string corresponding with the word char array,
         /// and return the position of the word list.
         /// </summary>
         /// <param name="knownHashIndex">
         /// already figure out position of the first word
         /// symbol charArray[0] in hash table. If not calculated yet, can be
         /// replaced with function int findInTable(char[] charArray).
         /// </param>
         /// <param name="charArray">look up the char array corresponding with the word.</param>
         /// <returns>word location in word array.  If not found, then return -1.</returns>
         private int FindInTable(short knownHashIndex, char[] charArray)
         {
             if (charArray == null || charArray.Length == 0)
                 return -1;

             char[][] items = wordItem_charArrayTable[wordIndexTable[knownHashIndex]];
             int start = 0, end = items.Length - 1;
             int mid = (start + end) / 2, cmpResult;

             // Binary search for the index of idArray
             while (start <= end)
             {
                 cmpResult = Utility.CompareArray(items[mid], 0, charArray, 1);

                 if (cmpResult == 0)
                     return mid;// find it
                 else if (cmpResult < 0)
                     start = mid + 1;
                 else if (cmpResult > 0)
                     end = mid - 1;

                 mid = (start + end) / 2;
             }
             return -1;
         }

         /// <summary>
         /// Find the first word in the dictionary that starts with the supplied prefix
         /// </summary>
         /// <param name="charArray">input prefix</param>
         /// <returns>index of word, or -1 if not found</returns>
         /// <seealso cref="GetPrefixMatch(char[], int)"/>
         public virtual int GetPrefixMatch(char[] charArray)
         {
             return GetPrefixMatch(charArray, 0);
         }

         /// <summary>
         /// Find the nth word in the dictionary that starts with the supplied prefix
         /// </summary>
         /// <param name="charArray">input prefix</param>
         /// <param name="knownStart">relative position in the dictionary to start</param>
         /// <returns>index of word, or -1 if not found</returns>
         /// <seealso cref="GetPrefixMatch(char[])"/>
         public virtual int GetPrefixMatch(char[] charArray, int knownStart)
         {
             short index = GetWordItemTableIndex(charArray[0]);
             if (index == -1)
                 return -1;
             char[][] items = wordItem_charArrayTable[wordIndexTable[index]];
             int start = knownStart, end = items.Length - 1;

             int mid = (start + end) / 2, cmpResult;

             // Binary search for the index of idArray
             while (start <= end)
             {
                 cmpResult = Utility.CompareArrayByPrefix(charArray, 1, items[mid], 0);
                 if (cmpResult == 0)
                 {
                     // Get the first item which match the current word
                     while (mid >= 0
                         && Utility.CompareArrayByPrefix(charArray, 1, items[mid], 0) == 0)
                         mid--;
                     mid++;
                     return mid;// Find the first word that uses charArray as prefix.
                 }
                 else if (cmpResult < 0)
                     end = mid - 1;
                 else
                     start = mid + 1;
                 mid = (start + end) / 2;
             }
             return -1;
         }

         /// <summary>
         /// Get the frequency of a word from the dictionary
         /// </summary>
         /// <param name="charArray">input word</param>
         /// <returns>word frequency, or zero if the word is not found</returns>
         public virtual int GetFrequency(char[] charArray)
         {
             short hashIndex = GetWordItemTableIndex(charArray[0]);
             if (hashIndex == -1)
             {
                 return 0;
             }
             int itemIndex = FindInTable(hashIndex, charArray);
             if (itemIndex != -1)
             {
                 return wordItem_frequencyTable[wordIndexTable[hashIndex]][itemIndex];
             }
             return 0;
         }

         /// <summary>
         /// Return <c>true</c> if the dictionary entry at itemIndex for table charArray[0] is charArray
         /// </summary>
         /// <param name="charArray">input word</param>
         /// <param name="itemIndex">item index for table charArray[0]</param>
         /// <returns><c>true</c> if the entry exists</returns>
         public virtual bool IsEqual(char[] charArray, int itemIndex)
         {
             short hashIndex = GetWordItemTableIndex(charArray[0]);
             return Utility.CompareArray(charArray, 1,
                 wordItem_charArrayTable[wordIndexTable[hashIndex]][itemIndex], 0) == 0;
         }
     }
 }
	// lucene version compatibility level: 4.8.1
	using J2N;
	using J2N.IO;
	using System;
	using System.IO;
	using System.Text;

	namespace Lucene.Net.Analysis.Cn.Smart.Hhmm
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/// <summary>
	/// SmartChineseAnalyzer Word Dictionary
	/// <para/>
	/// @lucene.experimental
	/// </summary>
	internal class WordDictionary : AbstractDictionary
	{
	private WordDictionary()
	{
	}

	private static WordDictionary singleInstance;

	/// <summary>
	/// Large prime number for hash function
	/// </summary>
	public const int PRIME_INDEX_LENGTH = 12071;

	/// <summary>
	/// wordIndexTable guarantees to hash all Chinese characters in Unicode into
	/// PRIME_INDEX_LENGTH array. There will be conflict, but in reality this
	/// program only handles the 6768 characters found in GB2312 plus some
	/// ASCII characters. Therefore in order to guarantee better precision, it is
	/// necessary to retain the original symbol in the charIndexTable.
	/// </summary>
	private short[] wordIndexTable;

	private char[] charIndexTable;

	/// <summary>
	/// To avoid taking too much space, the data structure needed to store the
	/// lexicon requires two multidimensional arrays to store word and frequency.
	/// Each word is placed in a char[]. Each char represents a Chinese char or
	/// other symbol. Each frequency is put into an int. These two arrays
	/// correspond to each other one-to-one. Therefore, one can use
	/// wordItem_charArrayTable[i][j] to look up word from lexicon, and
	/// wordItem_frequencyTable[i][j] to look up the corresponding frequency.
	/// </summary>
	private char[][][] wordItem_charArrayTable;

	private int[][] wordItem_frequencyTable;

	// static Logger log = Logger.getLogger(WordDictionary.class);

	private static readonly object syncLock = new object();

	/// <summary>
	/// Get the singleton dictionary instance.
	/// </summary>
	/// <returns>singleton</returns>
	public static WordDictionary GetInstance()
	{
	lock (syncLock)
	{
	if (singleInstance == null)
	{
	singleInstance = new WordDictionary();

	// LUCENENET specific
	// LUCENE-1817: https://issues.apache.org/jira/browse/LUCENE-1817
	// This issue still existed as of 4.8.0. Here is the fix - we only
	// load from a directory if the actual directory exists (AnalyzerProfile
	// ensures it is an empty string if it is not available).
	string dictRoot = AnalyzerProfile.ANALYSIS_DATA_DIR;
	if (string.IsNullOrEmpty(dictRoot))
	{
	singleInstance.Load();
	}
	else
	{
	singleInstance.Load(dictRoot);
	}
	}
	return singleInstance;
	}
	}

	/// <summary>
	/// Attempt to load dictionary from provided directory, first trying coredict.mem, failing back on coredict.dct
	/// </summary>
	/// <param name="dctFileRoot">path to dictionary directory</param>
	public virtual void Load(string dctFileRoot)
	{
	string dctFilePath = System.IO.Path.Combine(dctFileRoot, "coredict.dct");
	FileInfo serialObj = new FileInfo(System.IO.Path.Combine(dctFileRoot, "coredict.mem"));

	if (serialObj.Exists && LoadFromObj(serialObj))
	{

	}
	else
	{
	try
	{
	wordIndexTable = new short[PRIME_INDEX_LENGTH];
	charIndexTable = new char[PRIME_INDEX_LENGTH];
	for (int i = 0; i < PRIME_INDEX_LENGTH; i++)
	{
	charIndexTable[i] = (char)0;
	wordIndexTable[i] = -1;
	}
	wordItem_charArrayTable = new char[GB2312_CHAR_NUM][][];
	wordItem_frequencyTable = new int[GB2312_CHAR_NUM][];
	// int total =
	LoadMainDataFromFile(dctFilePath);
	ExpandDelimiterData();
	MergeSameWords();
	SortEachItems();
	// log.info("load dictionary: " + dctFilePath + " total:" + total);
	}
	catch (IOException e)
	{
	throw new Exception(e.ToString(), e);
	}

	SaveToObj(serialObj);
	}

	}

	/// <summary>
	/// Load coredict.mem internally from the jar file.
	/// </summary>
	/// <exception cref="IOException">If there is a low-level I/O error.</exception>
	public virtual void Load()
	{
	using Stream input = this.GetType().FindAndGetManifestResourceStream("coredict.mem");
	LoadFromObjectInputStream(input);
	}

	private bool LoadFromObj(FileInfo serialObj)
	{
	try
	{
	using (Stream input = new FileStream(serialObj.FullName, FileMode.Open, FileAccess.Read))
	LoadFromObjectInputStream(input);
	return true;
	}
	catch (Exception e)
	{
	throw new Exception(e.ToString(), e);
	}
	}

	// LUCENENET conversion note:
	// The data in Lucene is stored in a proprietary binary format (similar to
	// .NET's BinarySerializer) that cannot be read back in .NET. Therefore, the
	// data was extracted using Java's DataOutputStream using the following Java code.
	// It can then be read in using the LoadFromInputStream method below
	// (using a DataInputStream instead of a BinaryReader), and saved
	// in the correct (BinaryWriter) format by calling the SaveToObj method.
	// Alternatively, the data can be loaded from disk using the files
	// here(https://issues.apache.org/jira/browse/LUCENE-1629) in the analysis.data.zip file,
	// which will automatically produce the .mem files.

	//public void saveToOutputStream(java.io.DataOutputStream stream) throws IOException
	//{
	// // save bigramHashTable
	// int bhLen = bigramHashTable.length;
	// stream.writeInt(bhLen);
	// for (int i = 0; i<bhLen; i++)
	// {
	// stream.writeLong(bigramHashTable[i]);
	// }

	// // save frequencyTable
	// int fLen = frequencyTable.length;
	// stream.writeInt(fLen);
	// for (int i = 0; i<fLen; i++)
	// {
	// stream.writeInt(frequencyTable[i]);
	// }
	//}

	private void LoadFromObjectInputStream(Stream serialObjectInputStream)
	{
	using var reader = new BinaryReader(serialObjectInputStream);

	// Read wordIndexTable
	int wiLen = reader.ReadInt32();
	wordIndexTable = new short[wiLen];
	for (int i = 0; i < wiLen; i++)
	{
	wordIndexTable[i] = reader.ReadInt16();
	}

	// Read charIndexTable
	int ciLen = reader.ReadInt32();
	charIndexTable = new char[ciLen];
	for (int i = 0; i < ciLen; i++)
	{
	charIndexTable[i] = reader.ReadChar();
	}

	// Read wordItem_charArrayTable
	int caDim1 = reader.ReadInt32();
	if (caDim1 > -1)
	{
	wordItem_charArrayTable = new char[caDim1][][];
	for (int i = 0; i < caDim1; i++)
	{
	int caDim2 = reader.ReadInt32();
	if (caDim2 > -1)
	{
	wordItem_charArrayTable[i] = new char[caDim2][];
	for (int j = 0; j < caDim2; j++)
	{
	int caDim3 = reader.ReadInt32();
	if (caDim3 > -1)
	{
	wordItem_charArrayTable[i][j] = new char[caDim3];
	for (int k = 0; k < caDim3; k++)
	{
	wordItem_charArrayTable[i][j][k] = reader.ReadChar();
	}
	}
	}
	}
	}
	}

	// Read wordItem_frequencyTable
	int fDim1 = reader.ReadInt32();
	if (fDim1 > -1)
	{
	wordItem_frequencyTable = new int[fDim1][];
	for (int i = 0; i < fDim1; i++)
	{
	int fDim2 = reader.ReadInt32();
	if (fDim2 > -1)
	{
	wordItem_frequencyTable[i] = new int[fDim2];
	for (int j = 0; j < fDim2; j++)
	{
	wordItem_frequencyTable[i][j] = reader.ReadInt32();
	}
	}
	}
	}

	// log.info("load core dict from serialization.");
	}

	private void SaveToObj(FileInfo serialObj)
	{
	try
	{
	using Stream stream = new FileStream(serialObj.FullName, FileMode.Create, FileAccess.Write);
	using var writer = new BinaryWriter(stream);
	// Write wordIndexTable
	int wiLen = wordIndexTable.Length;
	writer.Write(wiLen);
	for (int i = 0; i < wiLen; i++)
	{
	writer.Write(wordIndexTable[i]);
	}

	// Write charIndexTable
	int ciLen = charIndexTable.Length;
	writer.Write(ciLen);
	for (int i = 0; i < ciLen; i++)
	{
	writer.Write(charIndexTable[i]);
	}

	// Write wordItem_charArrayTable
	int caDim1 = wordItem_charArrayTable == null ? -1 : wordItem_charArrayTable.Length;
	writer.Write(caDim1);
	for (int i = 0; i < caDim1; i++)
	{
	int caDim2 = wordItem_charArrayTable[i] == null ? -1 : wordItem_charArrayTable[i].Length;
	writer.Write(caDim2);
	for (int j = 0; j < caDim2; j++)
	{
	int caDim3 = wordItem_charArrayTable[i][j] == null ? -1 : wordItem_charArrayTable[i][j].Length;
	writer.Write(caDim3);
	for (int k = 0; k < caDim3; k++)
	{
	writer.Write(wordItem_charArrayTable[i][j][k]);
	}
	}
	}

	// Write wordItem_frequencyTable
	int fDim1 = wordItem_frequencyTable == null ? -1 : wordItem_frequencyTable.Length;
	writer.Write(fDim1);
	for (int i = 0; i < fDim1; i++)
	{
	int fDim2 = wordItem_frequencyTable[i] == null ? -1 : wordItem_frequencyTable[i].Length;
	writer.Write(fDim2);
	for (int j = 0; j < fDim2; j++)
	{
	writer.Write(wordItem_frequencyTable[i][j]);
	}
	}

	// log.info("serialize core dict.");
	}
	#pragma warning disable 168, IDE0059
	catch (Exception e)
	#pragma warning restore 168, IDE0059
	{
	// log.warn(e.getMessage());
	}
	}

	/// <summary>
	/// Load the datafile into this <see cref="WordDictionary"/>
	/// </summary>
	/// <param name="dctFilePath">path to word dictionary (coredict.dct)</param>
	/// <returns>number of words read</returns>
	/// <exception cref="IOException">If there is a low-level I/O error.</exception>
	private int LoadMainDataFromFile(string dctFilePath)
	{
	int i, cnt, length, total = 0;
	// The file only counted 6763 Chinese characters plus 5 reserved slots 3756~3760.
	// The 3756th is used (as a header) to store information.
	int[]
	buffer = new int[3];
	byte[] intBuffer = new byte[4];
	string tmpword;
	using (var dctFile = new FileStream(dctFilePath, FileMode.Open, FileAccess.Read))
	{

	// GB2312 characters 0 - 6768
	for (i = GB2312_FIRST_CHAR; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++)
	{
	// if (i == 5231)
	// System.out.println(i);

	dctFile.Read(intBuffer, 0, intBuffer.Length);
	// the dictionary was developed for C, and byte order must be converted to work with Java
	cnt = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian).GetInt32();
	if (cnt <= 0)
	{
	wordItem_charArrayTable[i] = null;
	wordItem_frequencyTable[i] = null;
	continue;
	}
	wordItem_charArrayTable[i] = new char[cnt][];
	wordItem_frequencyTable[i] = new int[cnt];
	total += cnt;
	int j = 0;
	while (j < cnt)
	{
	// wordItemTable[i][j] = new WordItem();
	dctFile.Read(intBuffer, 0, intBuffer.Length);
	buffer[0] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian)
	.GetInt32();// frequency
	dctFile.Read(intBuffer, 0, intBuffer.Length);
	buffer[1] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian)
	.GetInt32();// length
	dctFile.Read(intBuffer, 0, intBuffer.Length);
	buffer[2] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian)
	.GetInt32();// handle

	// wordItemTable[i][j].frequency = buffer[0];
	wordItem_frequencyTable[i][j] = buffer[0];

	length = buffer[1];
	if (length > 0)
	{
	byte[] lchBuffer = new byte[length];
	dctFile.Read(lchBuffer, 0, lchBuffer.Length);
	tmpword = Encoding.GetEncoding("GB2312").GetString(lchBuffer);
	wordItem_charArrayTable[i][j] = tmpword.ToCharArray();
	}
	else
	{
	// wordItemTable[i][j].charArray = null;
	wordItem_charArrayTable[i][j] = null;
	}
	// System.out.println(indexTable[i].wordItems[j]);
	j++;
	}

	string str = GetCCByGB2312Id(i);
	SetTableIndex(str[0], i);
	}
	}
	return total;
	}

	/// <summary>
	/// The original lexicon puts all information with punctuation into a
	/// chart (from 1 to 3755). Here it then gets expanded, separately being
	/// placed into the chart that has the corresponding symbol.
	/// </summary>
	private void ExpandDelimiterData()
	{
	int i;
	int cnt;
	// Punctuation then treating index 3755 as 1,
	// distribute the original punctuation corresponding dictionary into
	int delimiterIndex = 3755 + GB2312_FIRST_CHAR;
	i = 0;
	while (i < wordItem_charArrayTable[delimiterIndex].Length)
	{
	char c = wordItem_charArrayTable[delimiterIndex][i][0];
	int j = GetGB2312Id(c);// the id value of the punctuation
	if (wordItem_charArrayTable[j] == null)
	{

	int k = i;
	// Starting from i, count the number of the following worditem symbol from j
	while (k < wordItem_charArrayTable[delimiterIndex].Length
	&& wordItem_charArrayTable[delimiterIndex][k][0] == c)
	{
	k++;
	}
	// c is the punctuation character, j is the id value of c
	// k-1 represents the index of the last punctuation character
	cnt = k - i;
	if (cnt != 0)
	{
	wordItem_charArrayTable[j] = new char[cnt][];
	wordItem_frequencyTable[j] = new int[cnt];
	}

	// Assign value for each wordItem.
	for (k = 0; k < cnt; k++, i++)
	{
	// wordItemTable[j][k] = new WordItem();
	wordItem_frequencyTable[j][k] = wordItem_frequencyTable[delimiterIndex][i];
	wordItem_charArrayTable[j][k] = new char[wordItem_charArrayTable[delimiterIndex][i].Length - 1];
	System.Array.Copy(wordItem_charArrayTable[delimiterIndex][i], 1,
	wordItem_charArrayTable[j][k], 0,
	wordItem_charArrayTable[j][k].Length);
	}
	SetTableIndex(c, j);
	}
	}
	// Delete the original corresponding symbol array.
	wordItem_charArrayTable[delimiterIndex] = null;
	wordItem_frequencyTable[delimiterIndex] = null;
	}

	/// <summary>
	/// since we aren't doing POS-tagging, merge the frequencies for entries of the same word (with different POS)
	/// </summary>
	private void MergeSameWords()
	{
	int i;
	for (i = 0; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++)
	{
	if (wordItem_charArrayTable[i] == null)
	continue;
	int len = 1;
	for (int j = 1; j < wordItem_charArrayTable[i].Length; j++)
	{
	if (Utility.CompareArray(wordItem_charArrayTable[i][j], 0,
	wordItem_charArrayTable[i][j - 1], 0) != 0)
	len++;

	}
	if (len < wordItem_charArrayTable[i].Length)
	{
	char[][] tempArray = new char[len][];
	int[] tempFreq = new int[len];
	int k = 0;
	tempArray[0] = wordItem_charArrayTable[i][0];
	tempFreq[0] = wordItem_frequencyTable[i][0];
	for (int j = 1; j < wordItem_charArrayTable[i].Length; j++)
	{
	if (Utility.CompareArray(wordItem_charArrayTable[i][j], 0,
	tempArray[k], 0) != 0)
	{
	k++;
	// temp[k] = wordItemTable[i][j];
	tempArray[k] = wordItem_charArrayTable[i][j];
	tempFreq[k] = wordItem_frequencyTable[i][j];
	}
	else
	{
	// temp[k].frequency += wordItemTable[i][j].frequency;
	tempFreq[k] += wordItem_frequencyTable[i][j];
	}
	}
	// wordItemTable[i] = temp;
	wordItem_charArrayTable[i] = tempArray;
	wordItem_frequencyTable[i] = tempFreq;
	}
	}
	}

	private void SortEachItems()
	{
	char[] tmpArray;
	int tmpFreq;
	for (int i = 0; i < wordItem_charArrayTable.Length; i++)
	{
	if (wordItem_charArrayTable[i] != null
	&& wordItem_charArrayTable[i].Length > 1)
	{
	for (int j = 0; j < wordItem_charArrayTable[i].Length - 1; j++)
	{
	for (int j2 = j + 1; j2 < wordItem_charArrayTable[i].Length; j2++)
	{
	if (Utility.CompareArray(wordItem_charArrayTable[i][j], 0,
	wordItem_charArrayTable[i][j2], 0) > 0)
	{
	tmpArray = wordItem_charArrayTable[i][j];
	tmpFreq = wordItem_frequencyTable[i][j];
	wordItem_charArrayTable[i][j] = wordItem_charArrayTable[i][j2];
	wordItem_frequencyTable[i][j] = wordItem_frequencyTable[i][j2];
	wordItem_charArrayTable[i][j2] = tmpArray;
	wordItem_frequencyTable[i][j2] = tmpFreq;
	}
	}
	}
	}
	}
	}

	/// <summary>
	/// Calculate character <paramref name="c"/>'s position in hash table,
	/// then initialize the value of that position in the address table.
	/// </summary>
	private bool SetTableIndex(char c, int j)
	{
	int index = GetAvaliableTableIndex(c);
	if (index != -1)
	{
	charIndexTable[index] = c;
	wordIndexTable[index] = (short)j;
	return true;
	}
	else
	return false;
	}

	private short GetAvaliableTableIndex(char c)
	{
	int hash1 = (int)(Hash1(c) % PRIME_INDEX_LENGTH);
	int hash2 = Hash2(c) % PRIME_INDEX_LENGTH;
	if (hash1 < 0)
	hash1 = PRIME_INDEX_LENGTH + hash1;
	if (hash2 < 0)
	hash2 = PRIME_INDEX_LENGTH + hash2;
	int index = hash1;
	int i = 1;
	while (charIndexTable[index] != 0 && charIndexTable[index] != c
	&& i < PRIME_INDEX_LENGTH)
	{
	index = (hash1 + i * hash2) % PRIME_INDEX_LENGTH;
	i++;
	}
	// System.out.println(i - 1);

	if (i < PRIME_INDEX_LENGTH
	&& (charIndexTable[index] == 0 \|\| charIndexTable[index] == c))
	{
	return (short)index;
	}
	else
	{
	return -1;
	}
	}

	private short GetWordItemTableIndex(char c)
	{
	int hash1 = (int)(Hash1(c) % PRIME_INDEX_LENGTH);
	int hash2 = Hash2(c) % PRIME_INDEX_LENGTH;
	if (hash1 < 0)
	hash1 = PRIME_INDEX_LENGTH + hash1;
	if (hash2 < 0)
	hash2 = PRIME_INDEX_LENGTH + hash2;
	int index = hash1;
	int i = 1;
	while (charIndexTable[index] != 0 && charIndexTable[index] != c
	&& i < PRIME_INDEX_LENGTH)
	{
	index = (hash1 + i * hash2) % PRIME_INDEX_LENGTH;
	i++;
	}

	if (i < PRIME_INDEX_LENGTH && charIndexTable[index] == c)
	{
	return (short)index;
	}
	else
	return -1;
	}

	/// <summary>
	/// Look up the text string corresponding with the word char array,
	/// and return the position of the word list.
	/// </summary>
	/// <param name="knownHashIndex">
	/// already figure out position of the first word
	/// symbol charArray[0] in hash table. If not calculated yet, can be
	/// replaced with function int findInTable(char[] charArray).
	/// </param>
	/// <param name="charArray">look up the char array corresponding with the word.</param>
	/// <returns>word location in word array. If not found, then return -1.</returns>
	private int FindInTable(short knownHashIndex, char[] charArray)
	{
	if (charArray == null \|\| charArray.Length == 0)
	return -1;

	char[][] items = wordItem_charArrayTable[wordIndexTable[knownHashIndex]];
	int start = 0, end = items.Length - 1;
	int mid = (start + end) / 2, cmpResult;

	// Binary search for the index of idArray
	while (start <= end)
	{
	cmpResult = Utility.CompareArray(items[mid], 0, charArray, 1);

	if (cmpResult == 0)
	return mid;// find it
	else if (cmpResult < 0)
	start = mid + 1;
	else if (cmpResult > 0)
	end = mid - 1;

	mid = (start + end) / 2;
	}
	return -1;
	}

	/// <summary>
	/// Find the first word in the dictionary that starts with the supplied prefix
	/// </summary>
	/// <param name="charArray">input prefix</param>
	/// <returns>index of word, or -1 if not found</returns>
	/// <seealso cref="GetPrefixMatch(char[], int)"/>
	public virtual int GetPrefixMatch(char[] charArray)
	{
	return GetPrefixMatch(charArray, 0);
	}

	/// <summary>
	/// Find the nth word in the dictionary that starts with the supplied prefix
	/// </summary>
	/// <param name="charArray">input prefix</param>
	/// <param name="knownStart">relative position in the dictionary to start</param>
	/// <returns>index of word, or -1 if not found</returns>
	/// <seealso cref="GetPrefixMatch(char[])"/>
	public virtual int GetPrefixMatch(char[] charArray, int knownStart)
	{
	short index = GetWordItemTableIndex(charArray[0]);
	if (index == -1)
	return -1;
	char[][] items = wordItem_charArrayTable[wordIndexTable[index]];
	int start = knownStart, end = items.Length - 1;

	int mid = (start + end) / 2, cmpResult;

	// Binary search for the index of idArray
	while (start <= end)
	{
	cmpResult = Utility.CompareArrayByPrefix(charArray, 1, items[mid], 0);
	if (cmpResult == 0)
	{
	// Get the first item which match the current word
	while (mid >= 0
	&& Utility.CompareArrayByPrefix(charArray, 1, items[mid], 0) == 0)
	mid--;
	mid++;
	return mid;// Find the first word that uses charArray as prefix.
	}
	else if (cmpResult < 0)
	end = mid - 1;
	else
	start = mid + 1;
	mid = (start + end) / 2;
	}
	return -1;
	}

	/// <summary>
	/// Get the frequency of a word from the dictionary
	/// </summary>
	/// <param name="charArray">input word</param>
	/// <returns>word frequency, or zero if the word is not found</returns>
	public virtual int GetFrequency(char[] charArray)
	{
	short hashIndex = GetWordItemTableIndex(charArray[0]);
	if (hashIndex == -1)
	{
	return 0;
	}
	int itemIndex = FindInTable(hashIndex, charArray);
	if (itemIndex != -1)
	{
	return wordItem_frequencyTable[wordIndexTable[hashIndex]][itemIndex];
	}
	return 0;
	}

	/// <summary>
	/// Return <c>true</c> if the dictionary entry at itemIndex for table charArray[0] is charArray
	/// </summary>
	/// <param name="charArray">input word</param>
	/// <param name="itemIndex">item index for table charArray[0]</param>
	/// <returns><c>true</c> if the entry exists</returns>
	public virtual bool IsEqual(char[] charArray, int itemIndex)
	{
	short hashIndex = GetWordItemTableIndex(charArray[0]);
	return Utility.CompareArray(charArray, 1,
	wordItem_charArrayTable[wordIndexTable[hashIndex]][itemIndex], 0) == 0;
	}
	}
	}