src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs - lucenenet - Git at Google

 using J2N.Text;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
 using System;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*
 This file was partially derived from the
 original CIIR University of Massachusetts Amherst version of KStemmer.java (license for
 the original shown below)
  */

 /*
  Copyright © 2003,
  Center for Intelligent Information Retrieval,
  University of Massachusetts, Amherst.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without modification,
  are permitted provided that the following conditions are met:

  1. Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

  2. Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

  3. The names "Center for Intelligent Information Retrieval" and
  "University of Massachusetts" must not be used to endorse or promote products
  derived from this software without prior written permission. To obtain
  permission, contact info@ciir.cs.umass.edu.

  THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  SUCH DAMAGE.
  */
 namespace Lucene.Net.Analysis.En
 {
     /// <summary>
     /// This class implements the Kstem algorithm
     /// </summary>
     /// <remarks>
     /// <para>Title: Kstemmer</para>
     /// <para>Description: This is a java version of Bob Krovetz' kstem stemmer</para>
     /// <para>Copyright: Copyright 2008, Luicid Imagination, Inc. </para>
     /// <para>Copyright: Copyright 2003, CIIR University of Massachusetts Amherst (http://ciir.cs.umass.edu) </para>
     /// </remarks>
     public class KStemmer
     {
         private const int MaxWordLen = 50;

         private static readonly string[] exceptionWords = new string[] { "aide", "bathe", "caste",
             "cute", "dame", "dime", "doge", "done", "dune", "envelope", "gage",
             "grille", "grippe", "lobe", "mane", "mare", "nape", "node", "pane",
             "pate", "plane", "pope", "programme", "quite", "ripe", "rote", "rune",
             "sage", "severe", "shoppe", "sine", "slime", "snipe", "steppe", "suite",
             "swinge", "tare", "tine", "tope", "tripe", "twine"
         };

         private static readonly string[][] directConflations = new string[][]
         {
             new string[] {"aging", "age"},
             new string[] {"going", "go"},
             new string[] {"goes", "go"},
             new string[] {"lying", "lie"},
             new string[] {"using", "use"},
             new string[] {"owing", "owe"},
             new string[] {"suing", "sue"},
             new string[] {"dying", "die"},
             new string[] {"tying", "tie"},
             new string[] {"vying", "vie"},
             new string[] {"aged", "age"},
             new string[] {"used", "use"},
             new string[] {"vied", "vie"},
             new string[] {"cued", "cue"},
             new string[] {"died", "die"},
             new string[] {"eyed", "eye"},
             new string[] {"hued", "hue"},
             new string[] {"iced", "ice"},
             new string[] {"lied", "lie"},
             new string[] {"owed", "owe"},
             new string[] {"sued", "sue"},
             new string[] {"toed", "toe"},
             new string[] {"tied", "tie"},
             new string[] {"does", "do"},
             new string[] {"doing", "do"},
             new string[] {"aeronautical", "aeronautics"},
             new string[] {"mathematical", "mathematics"},
             new string[] {"political", "politics"},
             new string[] {"metaphysical", "metaphysics"},
             new string[] {"cylindrical", "cylinder"},
             new string[] {"nazism", "nazi"},
             new string[] {"ambiguity", "ambiguous"},
             new string[] {"barbarity", "barbarous"},
             new string[] {"credulity", "credulous"},
             new string[] {"generosity", "generous"},
             new string[] {"spontaneity", "spontaneous"},
             new string[] {"unanimity", "unanimous"},
             new string[] {"voracity", "voracious"},
             new string[] {"fled", "flee"},
             new string[] {"miscarriage", "miscarry"}
         };

         private static readonly string[][] countryNationality = new string[][]
         {
             new string[] {"afghan", "afghanistan"},
             new string[] {"african", "africa"},
             new string[] {"albanian", "albania"},
             new string[] {"algerian", "algeria"},
             new string[] {"american", "america"},
             new string[] {"andorran", "andorra"},
             new string[] {"angolan", "angola"},
             new string[] {"arabian", "arabia"},
             new string[] {"argentine", "argentina"},
             new string[] {"armenian", "armenia"},
             new string[] {"asian", "asia"},
             new string[] {"australian", "australia"},
             new string[] {"austrian", "austria"},
             new string[] {"azerbaijani", "azerbaijan"},
             new string[] {"azeri", "azerbaijan"},
             new string[] {"bangladeshi", "bangladesh"},
             new string[] {"belgian", "belgium"},
             new string[] {"bermudan", "bermuda"},
             new string[] {"bolivian", "bolivia"},
             new string[] {"bosnian", "bosnia"},
             new string[] {"botswanan", "botswana"},
             new string[] {"brazilian", "brazil"},
             new string[] {"british", "britain"},
             new string[] {"bulgarian", "bulgaria"},
             new string[] {"burmese", "burma"},
             new string[] {"californian", "california"},
             new string[] {"cambodian", "cambodia"},
             new string[] {"canadian", "canada"},
             new string[] {"chadian", "chad"},
             new string[] {"chilean", "chile"},
             new string[] {"chinese", "china"},
             new string[] {"colombian", "colombia"},
             new string[] {"croat", "croatia"},
             new string[] {"croatian", "croatia"},
             new string[] {"cuban", "cuba"},
             new string[] {"cypriot", "cyprus"},
             new string[] {"czechoslovakian", "czechoslovakia"},
             new string[] {"danish", "denmark"},
             new string[] {"egyptian", "egypt"},
             new string[] {"equadorian", "equador"},
             new string[] {"eritrean", "eritrea"},
             new string[] {"estonian", "estonia"},
             new string[] {"ethiopian", "ethiopia"},
             new string[] {"european", "europe"},
             new string[] {"fijian", "fiji"},
             new string[] {"filipino", "philippines"},
             new string[] {"finnish", "finland"},
             new string[] {"french", "france"},
             new string[] {"gambian", "gambia"},
             new string[] {"georgian", "georgia"},
             new string[] {"german", "germany"},
             new string[] {"ghanian", "ghana"},
             new string[] {"greek", "greece"},
             new string[] {"grenadan", "grenada"},
             new string[] {"guamian", "guam"},
             new string[] {"guatemalan", "guatemala"},
             new string[] {"guinean", "guinea"},
             new string[] {"guyanan", "guyana"},
             new string[] {"haitian", "haiti"},
             new string[] {"hawaiian", "hawaii"},
             new string[] {"holland", "dutch"},
             new string[] {"honduran", "honduras"},
             new string[] {"hungarian", "hungary"},
             new string[] {"icelandic", "iceland"},
             new string[] {"indonesian", "indonesia"},
             new string[] {"iranian", "iran"},
             new string[] {"iraqi", "iraq"},
             new string[] {"iraqui", "iraq"},
             new string[] {"irish", "ireland"},
             new string[] {"israeli", "israel"},
             new string[] {"italian", "italy"},
             new string[] {"jamaican", "jamaica"},
             new string[] {"japanese", "japan"},
             new string[] {"jordanian", "jordan"},
             new string[] {"kampuchean", "cambodia"},
             new string[] {"kenyan", "kenya"},
             new string[] {"korean", "korea"},
             new string[] {"kuwaiti", "kuwait"},
             new string[] {"lankan", "lanka"},
             new string[] {"laotian", "laos"},
             new string[] {"latvian", "latvia"},
             new string[] {"lebanese", "lebanon"},
             new string[] {"liberian", "liberia"},
             new string[] {"libyan", "libya"},
             new string[] {"lithuanian", "lithuania"},
             new string[] {"macedonian", "macedonia"},
             new string[] {"madagascan", "madagascar"},
             new string[] {"malaysian", "malaysia"},
             new string[] {"maltese", "malta"},
             new string[] {"mauritanian", "mauritania"},
             new string[] {"mexican", "mexico"},
             new string[] {"micronesian", "micronesia"},
             new string[] {"moldovan", "moldova"},
             new string[] {"monacan", "monaco"},
             new string[] {"mongolian", "mongolia"},
             new string[] {"montenegran", "montenegro"},
             new string[] {"moroccan", "morocco"},
             new string[] {"myanmar", "burma"},
             new string[] {"namibian", "namibia"},
             new string[] {"nepalese", "nepal"},
             new string[] {"nicaraguan", "nicaragua"},
             new string[] {"nigerian", "nigeria"},
             new string[] {"norwegian", "norway"},
             new string[] {"omani", "oman"},
             new string[] {"pakistani", "pakistan"},
             new string[] {"panamanian", "panama"},
             new string[] {"papuan", "papua"},
             new string[] {"paraguayan", "paraguay"},
             new string[] {"peruvian", "peru"},
             new string[] {"portuguese", "portugal"},
             new string[] {"romanian", "romania"},
             new string[] {"rumania", "romania"},
             new string[] {"rumanian", "romania"},
             new string[] {"russian", "russia"},
             new string[] {"rwandan", "rwanda"},
             new string[] {"samoan", "samoa"},
             new string[] {"scottish", "scotland"},
             new string[] {"serb", "serbia"},
             new string[] {"serbian", "serbia"},
             new string[] {"siam", "thailand"},
             new string[] {"siamese", "thailand"},
             new string[] {"slovakia", "slovak"},
             new string[] {"slovakian", "slovak"},
             new string[] {"slovenian", "slovenia"},
             new string[] {"somali", "somalia"},
             new string[] {"somalian", "somalia"},
             new string[] {"spanish", "spain"},
             new string[] {"swedish", "sweden"},
             new string[] {"swiss", "switzerland"},
             new string[] {"syrian", "syria"},
             new string[] {"taiwanese", "taiwan"},
             new string[] {"tanzanian", "tanzania"},
             new string[] {"texan", "texas"},
             new string[] {"thai", "thailand"},
             new string[] {"tunisian", "tunisia"},
             new string[] {"turkish", "turkey"},
             new string[] {"ugandan", "uganda"},
             new string[] {"ukrainian", "ukraine"},
             new string[] {"uruguayan", "uruguay"},
             new string[] {"uzbek", "uzbekistan"},
             new string[] {"venezuelan", "venezuela"},
             new string[] {"vietnamese", "viet"},
             new string[] {"virginian", "virginia"},
             new string[] {"yemeni", "yemen"},
             new string[] {"yugoslav", "yugoslavia"},
             new string[] {"yugoslavian", "yugoslavia"},
             new string[] {"zambian", "zambia"},
             new string[] {"zealander", "zealand"},
             new string[] {"zimbabwean", "zimbabwe"}
         };

         private static readonly string[] supplementDict = new string[] { "aids", "applicator",
             "capacitor", "digitize", "electromagnet", "ellipsoid", "exosphere",
             "extensible", "ferromagnet", "graphics", "hydromagnet", "polygraph",
             "toroid", "superconduct", "backscatter", "connectionism"};

         private static readonly string[] properNouns = new string[] { "abrams", "achilles",
             "acropolis", "adams", "agnes", "aires", "alexander", "alexis", "alfred",
             "algiers", "alps", "amadeus", "ames", "amos", "andes", "angeles",
             "annapolis", "antilles", "aquarius", "archimedes", "arkansas", "asher",
             "ashly", "athens", "atkins", "atlantis", "avis", "bahamas", "bangor",
             "barbados", "barger", "bering", "brahms", "brandeis", "brussels",
             "bruxelles", "cairns", "camoros", "camus", "carlos", "celts", "chalker",
             "charles", "cheops", "ching", "christmas", "cocos", "collins",
             "columbus", "confucius", "conners", "connolly", "copernicus", "cramer",
             "cyclops", "cygnus", "cyprus", "dallas", "damascus", "daniels", "davies",
             "davis", "decker", "denning", "dennis", "descartes", "dickens", "doris",
             "douglas", "downs", "dreyfus", "dukakis", "dulles", "dumfries",
             "ecclesiastes", "edwards", "emily", "erasmus", "euphrates", "evans",
             "everglades", "fairbanks", "federales", "fisher", "fitzsimmons",
             "fleming", "forbes", "fowler", "france", "francis", "goering",
             "goodling", "goths", "grenadines", "guiness", "hades", "harding",
             "harris", "hastings", "hawkes", "hawking", "hayes", "heights",
             "hercules", "himalayas", "hippocrates", "hobbs", "holmes", "honduras",
             "hopkins", "hughes", "humphreys", "illinois", "indianapolis",
             "inverness", "iris", "iroquois", "irving", "isaacs", "italy", "james",
             "jarvis", "jeffreys", "jesus", "jones", "josephus", "judas", "julius",
             "kansas", "keynes", "kipling", "kiwanis", "lansing", "laos", "leeds",
             "levis", "leviticus", "lewis", "louis", "maccabees", "madras",
             "maimonides", "maldive", "massachusetts", "matthews", "mauritius",
             "memphis", "mercedes", "midas", "mingus", "minneapolis", "mohammed",
             "moines", "morris", "moses", "myers", "myknos", "nablus", "nanjing",
             "nantes", "naples", "neal", "netherlands", "nevis", "nostradamus",
             "oedipus", "olympus", "orleans", "orly", "papas", "paris", "parker",
             "pauling", "peking", "pershing", "peter", "peters", "philippines",
             "phineas", "pisces", "pryor", "pythagoras", "queens", "rabelais",
             "ramses", "reynolds", "rhesus", "rhodes", "richards", "robins",
             "rodgers", "rogers", "rubens", "sagittarius", "seychelles", "socrates",
             "texas", "thames", "thomas", "tiberias", "tunis", "venus", "vilnius",
             "wales", "warner", "wilkins", "williams", "wyoming", "xmas", "yonkers",
             "zeus", "frances", "aarhus", "adonis", "andrews", "angus", "antares",
             "aquinas", "arcturus", "ares", "artemis", "augustus", "ayers",
             "barnabas", "barnes", "becker", "bejing", "biggs", "billings", "boeing",
             "boris", "borroughs", "briggs", "buenos", "calais", "caracas", "cassius",
             "cerberus", "ceres", "cervantes", "chantilly", "chartres", "chester",
             "connally", "conner", "coors", "cummings", "curtis", "daedalus",
             "dionysus", "dobbs", "dolores", "edmonds"};

         internal class DictEntry
         {
             internal bool exception;
             internal string root;

             internal DictEntry(string root, bool isException)
             {
                 this.root = root;
                 this.exception = isException;
             }
         }

         private static readonly CharArrayMap<DictEntry> dict_ht = InitializeDictHash();

         // caching off
         //
         // private int maxCacheSize; private CharArrayMap{String} cache =
         // null; private static final String SAME = "SAME"; // use if stemmed form is
         // the same

         private readonly OpenStringBuilder word = new OpenStringBuilder();
         private int j; // index of final letter in stem (within word)
         /// <summary>
         /// INDEX of final letter in word. You must add 1 to k to get
         /// the current length of word. When you want the length of
         /// word, use the method wordLength, which returns (k+1).
         /// </summary>
         private int k;

         // private void initializeStemHash() { if (maxCacheSize > 0) cache = new
         // CharArrayMap<String>(maxCacheSize,false); }

         private char FinalChar
         {
             get { return word[k]; }
         }

         private char PenultChar
         {
             get { return word[k - 1]; }
         }

         private bool IsVowel(int index)
         {
             return !IsCons(index);
         }

         private bool IsCons(int index)
         {
             char ch;

             ch = word[index];

             if ((ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u'))
             {
                 return false;
             }
             if ((ch != 'y') || (index == 0))
             {
                 return true;
             }
             else
             {
                 return (!IsCons(index - 1));
             }
         }

         private static CharArrayMap<DictEntry> InitializeDictHash()
         {
             DictEntry defaultEntry;
             DictEntry entry;

 #pragma warning disable 612, 618
             CharArrayMap<DictEntry> d = new CharArrayMap<DictEntry>(LuceneVersion.LUCENE_CURRENT, 1000, false);
 #pragma warning restore 612, 618
             for (int i = 0; i < exceptionWords.Length; i++)
             {
                 if (!d.ContainsKey(exceptionWords[i]))
                 {
                     entry = new DictEntry(exceptionWords[i], true);
                     d.Put(exceptionWords[i], entry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + exceptionWords[i] + "] already in dictionary 1");
                 }
             }

             for (int i = 0; i < directConflations.Length; i++)
             {
                 if (!d.ContainsKey(directConflations[i][0]))
                 {
                     entry = new DictEntry(directConflations[i][1], false);
                     d.Put(directConflations[i][0], entry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + directConflations[i][0] + "] already in dictionary 2");
                 }
             }

             for (int i = 0; i < countryNationality.Length; i++)
             {
                 if (!d.ContainsKey(countryNationality[i][0]))
                 {
                     entry = new DictEntry(countryNationality[i][1], false);
                     d.Put(countryNationality[i][0], entry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + countryNationality[i][0] + "] already in dictionary 3");
                 }
             }

             defaultEntry = new DictEntry(null, false);

             string[] array;
             array = KStemData1.data;

             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Put(array[i], defaultEntry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
                 }
             }

             array = KStemData2.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Put(array[i], defaultEntry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
                 }
             }

             array = KStemData3.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Put(array[i], defaultEntry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
                 }
             }

             array = KStemData4.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Put(array[i], defaultEntry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
                 }
             }

             array = KStemData5.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Put(array[i], defaultEntry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
                 }
             }

             array = KStemData6.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Put(array[i], defaultEntry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
                 }
             }

             array = KStemData7.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Put(array[i], defaultEntry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
                 }
             }

             for (int i = 0; i < KStemData8.data.Length; i++)
             {
                 if (!d.ContainsKey(KStemData8.data[i]))
                 {
                     d.Put(KStemData8.data[i], defaultEntry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + KStemData8.data[i] + "] already in dictionary 4");
                 }
             }

             for (int i = 0; i < supplementDict.Length; i++)
             {
                 if (!d.ContainsKey(supplementDict[i]))
                 {
                     d.Put(supplementDict[i], defaultEntry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + supplementDict[i] + "] already in dictionary 5");
                 }
             }

             for (int i = 0; i < properNouns.Length; i++)
             {
                 if (!d.ContainsKey(properNouns[i]))
                 {
                     d.Put(properNouns[i], defaultEntry);
                 }
                 else
                 {
                     throw new Exception("Warning: Entry [" + properNouns[i] + "] already in dictionary 6");
                 }
             }

             return d;
         }

         private bool IsAlpha(char ch)
         {
             return ch >= 'a' && ch <= 'z'; // terms must be lowercased already
         }

         /// <summary>length of stem within word</summary>
         private int StemLength
         {
             get { return j + 1; }
         }

         private bool EndsIn(char[] s)
         {
             if (s.Length > k)
             {
                 return false;
             }

             int r = word.Length - s.Length; // length of word before this suffix
             j = k;
             for (int r1 = r, i = 0; i < s.Length; i++, r1++)
             {
                 if (s[i] != word[r1])
                 {
                     return false;
                 }
             }
             j = r - 1; // index of the character BEFORE the posfix
             return true;
         }

         private bool EndsIn(char a, char b)
         {
             if (2 > k)
             {
                 return false;
             }
             // check left to right since the endings have often already matched
             if (word[k - 1] == a && word[k] == b)
             {
                 j = k - 2;
                 return true;
             }
             return false;
         }

         private bool EndsIn(char a, char b, char c)
         {
             if (3 > k)
             {
                 return false;
             }
             if (word[k - 2] == a && word[k - 1] == b && word[k] == c)
             {
                 j = k - 3;
                 return true;
             }
             return false;
         }

         private bool EndsIn(char a, char b, char c, char d)
         {
             if (4 > k)
             {
                 return false;
             }
             if (word[k - 3] == a && word[k - 2] == b && word[k - 1] == c && word[k] == d)
             {
                 j = k - 4;
                 return true;
             }
             return false;
         }

         private DictEntry WordInDict()
         {
             // if (matchedEntry != null) { if (dict_ht.get(word.getArray(), 0,
             // word.size()) != matchedEntry) {
             // System.out.println("Uh oh... cached entry doesn't match"); } return
             // matchedEntry; }

             if (matchedEntry != null)
             {
                 return matchedEntry;
             }
             DictEntry e = dict_ht.Get(word.Array, 0, word.Length);
             if (e != null && !e.exception)
             {
                 matchedEntry = e; // only cache if it's not an exception.
             }
             // lookups.add(word.toString());
             return e;
         }

         /// <summary>Convert plurals to singular form, and '-ies' to 'y'</summary>
         private void Plural()
         {
             if (word[k] == 's')
             {
                 if (EndsIn('i', 'e', 's'))
                 {
                     word.Length = j + 3;
                     k--;
                     if (Lookup()) // ensure calories -> calorie
                     {
                         return;
                     }
                     k++;
                     word.UnsafeWrite('s');
                     SetSuffix("y");
                     Lookup();
                 }
                 else if (EndsIn('e', 's'))
                 {
                     /* try just removing the "s" */
                     word.Length = j + 2;
                     k--;

                     /*
                      * note: don't check for exceptions here. So, `aides' -> `aide', but
                      * `aided' -> `aid'. The exception for double s is used to prevent
                      * crosses -> crosse. This is actually correct if crosses is a plural
                      * noun (a type of racket used in lacrosse), but the verb is much more
                      * common
                      */


                     //**
                     // YCS: this was the one place where lookup was not followed by return.
                     // So restructure it. if ((j>0)&&(lookup(word.toString())) &&
                     // !((word.CharAt(j) == 's') && (word.CharAt(j-1) == 's'))) return;
                     // ****

                     bool tryE = j > 0 && !((word[j] == 's') && (word[j - 1] == 's'));
                     if (tryE && Lookup())
                     {
                         return;
                     }

                     /* try removing the "es" */

                     word.Length = j + 1;
                     k--;
                     if (Lookup())
                     {
                         return;
                     }

                     /* the default is to retain the "e" */
                     word.UnsafeWrite('e');
                     k++;

                     if (!tryE) // if we didn't try the "e" ending before
                     {
                         Lookup();
                     }
                     return;
                 }
                 else
                 {
                     if (word.Length > 3 && PenultChar != 's' && !EndsIn('o', 'u', 's'))
                     {
                         /* unless the word ends in "ous" or a double "s", remove the final "s" */

                         word.Length = k;
                         k--;
                         Lookup();
                     }
                 }
             }
         }

         private void SetSuffix(string s)
         {
             SetSuff(s, s.Length);
         }

         /// <summary>replace old suffix with s</summary>
         private void SetSuff(string s, int len)
         {
             word.Length = j + 1;
             for (int l = 0; l < len; l++)
             {
                 word.UnsafeWrite(s[l]);
             }
             k = j + len;
         }

         /* Returns true if the word is found in the dictionary */
         // almost all uses of Lookup() return immediately and are
         // followed by another lookup in the dict. Store the match
         // to avoid this double lookup.
         internal DictEntry matchedEntry = null;

         private bool Lookup()
         {
             // debugging code String thisLookup = word.toString(); boolean added =
             // lookups.add(thisLookup); if (!added) {
             // System.out.println("######extra lookup:" + thisLookup); // occaasional
             // extra lookups aren't necessarily errors... could happen by diff
             // manipulations // throw new RuntimeException("######extra lookup:" +
             // thisLookup); } else { // System.out.println("new lookup:" + thisLookup);
             // }

             matchedEntry = dict_ht.Get(word.Array, 0, word.Length);
             return matchedEntry != null;
         }

         // Set<String> lookups = new HashSet<>();

         /// <summary>convert past tense (-ed) to present, and `-ied' to `y'</summary>
         private void PastTense()
         {
             /*
              * Handle words less than 5 letters with a direct mapping This prevents
              * (fled -> fl).
              */
             if (word.Length <= 4)
             {
                 return;
             }

             if (EndsIn('i', 'e', 'd'))
             {
                 word.Length = j + 3;
                 k--;
                 if (Lookup()) // we almost always want to convert -ied to -y, but
                 {
                     return; // this isn't true for short words (died->die)
                 }
                 k++; // I don't know any long words that this applies to,
                 word.UnsafeWrite('d'); // but just in case...
                 SetSuffix("y");
                 Lookup();
                 return;
             }

             /* the vowelInStem() is necessary so we don't stem acronyms */
             if (EndsIn('e', 'd') && VowelInStem())
             {
                 /* see if the root ends in `e' */
                 word.Length = j + 2;
                 k = j + 1;

                 DictEntry entry = WordInDict();
                 if (entry != null)
                 {
                     if (!entry.exception)
                     {
                         // if it's in the dictionary and
                         // not an exception
                         return;
                     }
                 }

                 /* try removing the "ed" */
                 word.Length = j + 1;
                 k = j;
                 if (Lookup())
                 {
                     return;
                 }

                 /*
                  * try removing a doubled consonant. if the root isn't found in the
                  * dictionary, the default is to leave it doubled. This will correctly
                  * capture `backfilled' -> `backfill' instead of `backfill' ->
                  * `backfille', and seems correct most of the time
                  */

                 if (DoubleC(k))
                 {
                     word.Length = k;
                     k--;
                     if (Lookup())
                     {
                         return;
                     }
                     word.UnsafeWrite(word[k]);
                     k++;
                     Lookup();
                     return;
                 }

                 /* if we have a `un-' prefix, then leave the word alone */
                 /* (this will sometimes screw up with `under-', but we */
                 /* will take care of that later) */

                 if ((word[0] == 'u') && (word[1] == 'n'))
                 {
                     word.UnsafeWrite('e');
                     word.UnsafeWrite('d');
                     k = k + 2;
                     // nolookup()
                     return;
                 }

                 /*
                  * it wasn't found by just removing the `d' or the `ed', so prefer to end
                  * with an `e' (e.g., `microcoded' -> `microcode').
                  */

                 word.Length = j + 1;
                 word.UnsafeWrite('e');
                 k = j + 1;
                 // nolookup() - we already tried the "e" ending
                 return;
             }
         }

         /// <summary>return TRUE if word ends with a double consonant</summary>
         private bool DoubleC(int i)
         {
             if (i < 1)
             {
                 return false;
             }

             if (word[i] != word[i - 1])
             {
                 return false;
             }
             return (IsCons(i));
         }

         private bool VowelInStem()
         {
             for (int i = 0; i < StemLength; i++)
             {
                 if (IsVowel(i))
                 {
                     return true;
                 }
             }
             return false;
         }

         /// <summary>handle `-ing' endings</summary>
         private void Aspect()
         {
             /*
              * handle short words (aging -> age) via a direct mapping. This prevents
              * (thing -> the) in the version of this routine that ignores inflectional
              * variants that are mentioned in the dictionary (when the root is also
              * present)
              */

             if (word.Length <= 5)
             {
                 return;
             }

             /* the vowelinstem() is necessary so we don't stem acronyms */
             if (EndsIn('i', 'n', 'g') && VowelInStem())
             {

                 /* try adding an `e' to the stem and check against the dictionary */
                 word[j + 1] = 'e';
                 word.Length = j + 2;
                 k = j + 1;

                 DictEntry entry = WordInDict();
                 if (entry != null)
                 {
                     if (!entry.exception) // if it's in the dictionary and not an exception
                     {
                         return;
                     }
                 }

                 /* adding on the `e' didn't work, so remove it */
                 word.Length = k;
                 k--; // note that `ing' has also been removed

                 if (Lookup())
                 {
                     return;
                 }

                 /* if I can remove a doubled consonant and get a word, then do so */
                 if (DoubleC(k))
                 {
                     k--;
                     word.Length = k + 1;
                     if (Lookup())
                     {
                         return;
                     }
                     word.UnsafeWrite(word[k]); // restore the doubled consonant

                     /* the default is to leave the consonant doubled */
                     /* (e.g.,`fingerspelling' -> `fingerspell'). Unfortunately */
                     /* `bookselling' -> `booksell' and `mislabelling' -> `mislabell'). */
                     /* Without making the algorithm significantly more complicated, this */
                     /* is the best I can do */
                     k++;
                     Lookup();
                     return;
                 }

                 /*
                  * the word wasn't in the dictionary after removing the stem, and then
                  * checking with and without a final `e'. The default is to add an `e'
                  * unless the word ends in two consonants, so `microcoding' ->
                  * `microcode'. The two consonants restriction wouldn't normally be
                  * necessary, but is needed because we don't try to deal with prefixes and
                  * compounds, and most of the time it is correct (e.g., footstamping ->
                  * footstamp, not footstampe; however, decoupled -> decoupl). We can
                  * prevent almost all of the incorrect stems if we try to do some prefix
                  * analysis first
                  */

                 if ((j > 0) && IsCons(j) && IsCons(j - 1))
                 {
                     k = j;
                     word.Length = k + 1;
                     // nolookup() because we already did according to the comment
                     return;
                 }

                 word.Length = j + 1;
                 word.UnsafeWrite('e');
                 k = j + 1;
                 // nolookup(); we already tried an 'e' ending
                 return;
             }
         }

         /// <summary>
         /// this routine deals with -ity endings. It accepts -ability, -ibility, and
         /// -ality, even without checking the dictionary because they are so
         /// productive. The first two are mapped to -ble, and the -ity is remove for
         /// the latter
         /// </summary>
         private void ItyEndings()
         {
             int old_k = k;

             if (EndsIn('i', 't', 'y'))
             {
                 word.Length = j + 1; // try just removing -ity
                 k = j;
                 if (Lookup())
                 {
                     return;
                 }
                 word.UnsafeWrite('e'); // try removing -ity and adding -e
                 k = j + 1;
                 if (Lookup())
                 {
                     return;
                 }
                 word[j + 1] = 'i';
                 word.Append("ty");
                 k = old_k;
                 /*
                  * the -ability and -ibility endings are highly productive, so just accept
                  * them
                  */
                 if ((j > 0) && (word[j - 1] == 'i') && (word[j] == 'l'))
                 {
                     word.Length = j - 1;
                     word.Append("le"); // convert to -ble
                     k = j;
                     Lookup();
                     return;
                 }

                 /* ditto for -ivity */
                 if ((j > 0) && (word[j - 1] == 'i') && (word[j] == 'v'))
                 {
                     word.Length = j + 1;
                     word.UnsafeWrite('e'); // convert to -ive
                     k = j + 1;
                     Lookup();
                     return;
                 }
                 /* ditto for -ality */
                 if ((j > 0) && (word[j - 1] == 'a') && (word[j] == 'l'))
                 {
                     word.Length = j + 1;
                     k = j;
                     Lookup();
                     return;
                 }

                 /*
                  * if the root isn't in the dictionary, and the variant *is* there, then
                  * use the variant. This allows `immunity'->`immune', but prevents
                  * `capacity'->`capac'. If neither the variant nor the root form are in
                  * the dictionary, then remove the ending as a default
                  */

                 if (Lookup())
                 {
                     return;
                 }

                 /* the default is to remove -ity altogether */
                 word.Length = j + 1;
                 k = j;
                 // nolookup(), we already did it.
                 return;
             }
         }

         /// <summary>handle -ence and -ance</summary>
         private void NceEndings()
         {
             int old_k = k;
             char word_char;

             if (EndsIn('n', 'c', 'e'))
             {
                 word_char = word[j];
                 if (!((word_char == 'e') || (word_char == 'a')))
                 {
                     return;
                 }
                 word.Length = j;
                 word.UnsafeWrite('e'); // try converting -e/ance to -e (adherance/adhere)
                 k = j;
                 if (Lookup())
                 {
                     return;
                 }
                 word.Length = j; /*
                               * try removing -e/ance altogether
                               * (disappearance/disappear)
                               */
                 k = j - 1;
                 if (Lookup())
                 {
                     return;
                 }
                 word.UnsafeWrite(word_char); // restore the original ending
                 word.Append("nce");
                 k = old_k;
                 // nolookup() because we restored the original ending
             }
             return;
         }

         /// <summary>handle -ness</summary>
         private void NessEndings()
         {
             if (EndsIn('n', 'e', 's', 's'))
             {
                 /*
                                                    * this is a very productive endings, so
                                                    * just accept it
                                                    */
                 word.Length = j + 1;
                 k = j;
                 if (word[j] == 'i')
                 {
                     word[j] = 'y';
                 }
                 Lookup();
             }
             return;
         }

         /// <summary>handle -ism</summary>
         private void IsmEndings()
         {
             if (EndsIn('i', 's', 'm'))
             {
                 /*
                                               * this is a very productive ending, so just
                                               * accept it
                                               */
                 word.Length = j + 1;
                 k = j;
                 Lookup();
             }
             return;
         }

         /// <summary>this routine deals with -ment endings.</summary>
         private void MentEndings()
         {
             int old_k = k;

             if (EndsIn('m', 'e', 'n', 't'))
             {
                 word.Length = j + 1;
                 k = j;
                 if (Lookup())
                 {
                     return;
                 }
                 word.Append("ment");
                 k = old_k;
                 // nolookup
             }
             return;
         }

         /// <summary>this routine deals with -ize endings.</summary>
         private void IzeEndings()
         {
             int old_k = k;

             if (EndsIn('i', 'z', 'e'))
             {
                 word.Length = j + 1; // try removing -ize entirely
                 k = j;
                 if (Lookup())
                 {
                     return;
                 }
                 word.UnsafeWrite('i');

                 if (DoubleC(j)) // allow for a doubled consonant
                 {
                     word.Length = j;
                     k = j - 1;
                     if (Lookup())
                     {
                         return;
                     }
                     word.UnsafeWrite(word[j - 1]);
                 }

                 word.Length = j + 1;
                 word.UnsafeWrite('e'); // try removing -ize and adding -e
                 k = j + 1;
                 if (Lookup())
                 {
                     return;
                 }
                 word.Length = j + 1;
                 word.Append("ize");
                 k = old_k;
                 // nolookup()
             }
             return;
         }

         /// <summary>handle -ency and -ancy</summary>
         private void NcyEndings()
         {
             if (EndsIn('n', 'c', 'y'))
             {
                 if (!((word[j] == 'e') || (word[j] == 'a')))
                 {
                     return;
                 }
                 word[j + 2] = 't'; // try converting -ncy to -nt
                 word.Length = j + 3;
                 k = j + 2;

                 if (Lookup())
                 {
                     return;
                 }

                 word[j + 2] = 'c'; // the default is to convert it to -nce
                 word.UnsafeWrite('e');
                 k = j + 3;
                 Lookup();
             }
             return;
         }

         /// <summary>handle -able and -ible</summary>
         private void BleEndings()
         {
             int old_k = k;
             char word_char;

             if (EndsIn('b', 'l', 'e'))
             {
                 if (!((word[j] == 'a') || (word[j] == 'i')))
                 {
                     return;
                 }
                 word_char = word[j];
                 word.Length = j; // try just removing the ending
                 k = j - 1;
                 if (Lookup())
                 {
                     return;
                 }
                 if (DoubleC(k)) // allow for a doubled consonant
                 {
                     word.Length = k;
                     k--;
                     if (Lookup())
                     {
                         return;
                     }
                     k++;
                     word.UnsafeWrite(word[k - 1]);
                 }
                 word.Length = j;
                 word.UnsafeWrite('e'); // try removing -a/ible and adding -e
                 k = j;
                 if (Lookup())
                 {
                     return;
                 }
                 word.Length = j;
                 word.Append("ate"); // try removing -able and adding -ate
                                     /* (e.g., compensable/compensate) */
                 k = j + 2;
                 if (Lookup())
                 {
                     return;
                 }
                 word.Length = j;
                 word.UnsafeWrite(word_char); // restore the original values
                 word.Append("ble");
                 k = old_k;
                 // nolookup()
             }
             return;
         }

         /// <summary>
         /// handle -ic endings. This is fairly straightforward, but this is also the
         /// only place we try *expanding* an ending, -ic -> -ical. This is to handle
         /// cases like `canonic' -> `canonical'
         /// </summary>
         private void IcEndings()
         {
             if (EndsIn('i', 'c'))
             {
                 word.Length = j + 3;
                 word.Append("al"); // try converting -ic to -ical
                 k = j + 4;
                 if (Lookup())
                 {
                     return;
                 }

                 word[j + 1] = 'y'; // try converting -ic to -y
                 word.Length = j + 2;
                 k = j + 1;
                 if (Lookup())
                 {
                     return;
                 }

                 word[j + 1] = 'e'; // try converting -ic to -e
                 if (Lookup())
                 {
                     return;
                 }

                 word.Length = j + 1; // try removing -ic altogether
                 k = j;
                 if (Lookup())
                 {
                     return;
                 }
                 word.Append("ic"); // restore the original ending
                 k = j + 2;
                 // nolookup()
             }
             return;
         }

         private static char[] ization = "ization".ToCharArray();
         private static char[] ition = "ition".ToCharArray();
         private static char[] ation = "ation".ToCharArray();
         private static char[] ication = "ication".ToCharArray();

         /* handle some derivational endings */

         /// <summary>
         /// this routine deals with -ion, -ition, -ation, -ization, and -ication. The
         /// -ization ending is always converted to -ize
         /// </summary>
         private void IonEndings()
         {
             int old_k = k;
             if (!EndsIn('i', 'o', 'n'))
             {
                 return;
             }

             if (EndsIn(ization))
             {
                 /*
                                         * the -ize ending is very productive, so simply
                                         * accept it as the root
                                         */
                 word.Length = j + 3;
                 word.UnsafeWrite('e');
                 k = j + 3;
                 Lookup();
                 return;
             }

             if (EndsIn(ition))
             {
                 word.Length = j + 1;
                 word.UnsafeWrite('e');
                 k = j + 1;
                 if (Lookup()) /*
                          * remove -ition and add `e', and check against the
                          * dictionary
                          */
                 {
                     return; // (e.g., definition->define, opposition->oppose)
                 }

                 /* restore original values */
                 word.Length = j + 1;
                 word.Append("ition");
                 k = old_k;
                 // nolookup()
             }
             else if (EndsIn(ation))
             {
                 word.Length = j + 3;
                 word.UnsafeWrite('e');
                 k = j + 3;
                 if (Lookup()) // remove -ion and add `e', and check against the dictionary
                 {
                     return; // (elmination -> eliminate)
                 }

                 word.Length = j + 1;
                 word.UnsafeWrite('e'); /*
                                   * remove -ation and add `e', and check against the
                                   * dictionary
                                   */
                 k = j + 1;
                 if (Lookup())
                 {
                     return;
                 }

                 word.Length = j + 1; /*
                                  * just remove -ation (resignation->resign) and
                                  * check dictionary
                                  */
                 k = j;
                 if (Lookup())
                 {
                     return;
                 }

                 /* restore original values */
                 word.Length = j + 1;
                 word.Append("ation");
                 k = old_k;
                 // nolookup()

             }

             /*
              * test -ication after -ation is attempted (e.g., `complication->complicate'
              * rather than `complication->comply')
              */

             if (EndsIn(ication))
             {
                 word.Length = j + 1;
                 word.UnsafeWrite('y');
                 k = j + 1;
                 if (Lookup()) /*
                          * remove -ication and add `y', and check against the
                          * dictionary
                          */
                 {
                     return; // (e.g., amplification -> amplify)
                 }

                 /* restore original values */
                 word.Length = j + 1;
                 word.Append("ication");
                 k = old_k;
                 // nolookup()
             }

             // if (EndsIn(ion)) {
             if (true) // we checked for this earlier... just need to set "j"
             {
                 j = k - 3; // YCS

                 word.Length = j + 1;
                 word.UnsafeWrite('e');
                 k = j + 1;
                 if (Lookup()) // remove -ion and add `e', and check against the dictionary
                 {
                     return;
                 }

                 word.Length = j + 1;
                 k = j;
                 if (Lookup()) // remove -ion, and if it's found, treat that as the root
                 {
                     return;
                 }

                 /* restore original values */
                 word.Length = j + 1;
                 word.Append("ion");
                 k = old_k;
                 // nolookup()
             }

             // nolookup(); all of the other paths restored original values
             return;
         }

         /// <summary>
         /// this routine deals with -er, -or, -ier, and -eer. The -izer ending is
         /// always converted to -ize
         /// </summary>
         private void ErAndOrEndings()
         {
             int old_k = k;

             if (word[k] != 'r') // YCS
             {
                 return;
             }

             char word_char; // so we can remember if it was -er or -or

             if (EndsIn('i', 'z', 'e', 'r'))
             {
                 /*
                                                    * -ize is very productive, so accept it
                                                    * as the root
                                                    */
                 word.Length = j + 4;
                 k = j + 3;
                 Lookup();
                 return;
             }

             if (EndsIn('e', 'r') || EndsIn('o', 'r'))
             {
                 word_char = word[j + 1];
                 if (DoubleC(j))
                 {
                     word.Length = j;
                     k = j - 1;
                     if (Lookup())
                     {
                         return;
                     }
                     word.UnsafeWrite(word[j - 1]); // restore the doubled consonant
                 }

                 if (word[j] == 'i') // do we have a -ier ending?
                 {
                     word[j] = 'y';
                     word.Length = j + 1;
                     k = j;
                     if (Lookup()) // yes, so check against the dictionary
                     {
                         return;
                     }
                     word[j] = 'i'; // restore the endings
                     word.UnsafeWrite('e');
                 }

                 if (word[j] == 'e') // handle -eer
                 {
                     word.Length = j;
                     k = j - 1;
                     if (Lookup())
                     {
                         return;
                     }
                     word.UnsafeWrite('e');
                 }

                 word.Length = j + 2; // remove the -r ending
                 k = j + 1;
                 if (Lookup())
                 {
                     return;
                 }
                 word.Length = j + 1; // try removing -er/-or
                 k = j;
                 if (Lookup())
                 {
                     return;
                 }
                 word.UnsafeWrite('e'); // try removing -or and adding -e
                 k = j + 1;
                 if (Lookup())
                 {
                     return;
                 }
                 word.Length = j + 1;
                 word.UnsafeWrite(word_char);
                 word.UnsafeWrite('r'); // restore the word to the way it was
                 k = old_k;
                 // nolookup()
             }

         }

         /// <summary>
         /// this routine deals with -ly endings. The -ally ending is always converted
         /// to -al Sometimes this will temporarily leave us with a non-word (e.g.,
         /// heuristically maps to heuristical), but then the -al is removed in the next
         /// step.
         /// </summary>
         private void LyEndings()
         {
             int old_k = k;

             if (EndsIn('l', 'y'))
             {

                 word[j + 2] = 'e'; // try converting -ly to -le

                 if (Lookup())
                 {
                     return;
                 }
                 word[j + 2] = 'y';

                 word.Length = j + 1; // try just removing the -ly
                 k = j;

                 if (Lookup())
                 {
                     return;
                 }

                 if ((j > 0) && (word[j - 1] == 'a') && (word[j] == 'l')) /*
                                                                                   * always
                                                                                   * convert
                                                                                   * -
                                                                                   * ally
                                                                                   * to
                                                                                   * -
                                                                                   * al
                                                                                   */
                 {
                     return;
                 }
                 word.Append("ly");
                 k = old_k;

                 if ((j > 0) && (word[j - 1] == 'a') && (word[j] == 'b'))
                 {
                     /*
                                                                                               * always
                                                                                               * convert
                                                                                               * -
                                                                                               * ably
                                                                                               * to
                                                                                               * -
                                                                                               * able
                                                                                               */
                     word[j + 2] = 'e';
                     k = j + 2;
                     return;
                 }

                 if (word[j] == 'i') // e.g., militarily -> military
                 {
                     word.Length = j;
                     word.UnsafeWrite('y');
                     k = j;
                     if (Lookup())
                     {
                         return;
                     }
                     word.Length = j;
                     word.Append("ily");
                     k = old_k;
                 }

                 word.Length = j + 1; // the default is to remove -ly

                 k = j;
                 // nolookup()... we already tried removing the "ly" variant
             }
             return;
         }

         /// <summary>
         /// this routine deals with -al endings. Some of the endings from the previous
         /// routine are finished up here.
         /// </summary>
         private void AlEndings()
         {
             int old_k = k;

             if (word.Length < 4)
             {
                 return;
             }
             if (EndsIn('a', 'l'))
             {
                 word.Length = j + 1;
                 k = j;
                 if (Lookup()) // try just removing the -al
                 {
                     return;
                 }

                 if (DoubleC(j)) // allow for a doubled consonant
                 {
                     word.Length = j;
                     k = j - 1;
                     if (Lookup())
                     {
                         return;
                     }
                     word.UnsafeWrite(word[j - 1]);
                 }

                 word.Length = j + 1;
                 word.UnsafeWrite('e'); // try removing the -al and adding -e
                 k = j + 1;
                 if (Lookup())
                 {
                     return;
                 }

                 word.Length = j + 1;
                 word.Append("um"); // try converting -al to -um
                                    /* (e.g., optimal - > optimum ) */
                 k = j + 2;
                 if (Lookup())
                 {
                     return;
                 }

                 word.Length = j + 1;
                 word.Append("al"); // restore the ending to the way it was
                 k = old_k;

                 if ((j > 0) && (word[j - 1] == 'i') && (word[j] == 'c'))
                 {
                     word.Length = j - 1; // try removing -ical
                     k = j - 2;
                     if (Lookup())
                     {
                         return;
                     }

                     word.Length = j - 1;
                     word.UnsafeWrite('y'); // try turning -ical to -y (e.g., bibliographical)
                     k = j - 1;
                     if (Lookup())
                     {
                         return;
                     }

                     word.Length = j - 1;
                     word.Append("ic"); // the default is to convert -ical to -ic
                     k = j;
                     // nolookup() ... converting ical to ic means removing "al" which we
                     // already tried
                     // ERROR
                     Lookup();
                     return;
                 }

                 if (word[j] == 'i') // sometimes -ial endings should be removed
                 {
                     word.Length = j; // (sometimes it gets turned into -y, but we
                     k = j - 1; // aren't dealing with that case for now)
                     if (Lookup())
                     {
                         return;
                     }
                     word.Append("ial");
                     k = old_k;
                     Lookup();
                 }

             }
             return;
         }

         /// <summary>
         /// this routine deals with -ive endings. It normalizes some of the -ative
         /// endings directly, and also maps some -ive endings to -ion.
         /// </summary>
         private void IveEndings()
         {
             int old_k = k;

             if (EndsIn('i', 'v', 'e'))
             {
                 word.Length = j + 1; // try removing -ive entirely
                 k = j;
                 if (Lookup())
                 {
                     return;
                 }

                 word.UnsafeWrite('e'); // try removing -ive and adding -e
                 k = j + 1;
                 if (Lookup())
                 {
                     return;
                 }
                 word.Length = j + 1;
                 word.Append("ive");
                 if ((j > 0) && (word[j - 1] == 'a') && (word[j] == 't'))
                 {
                     word[j - 1] = 'e'; // try removing -ative and adding -e
                     word.Length = j; // (e.g., determinative -> determine)
                     k = j - 1;
                     if (Lookup())
                     {
                         return;
                     }
                     word.Length = j - 1; // try just removing -ative
                     if (Lookup())
                     {
                         return;
                     }

                     word.Append("ative");
                     k = old_k;
                 }

                 /* try mapping -ive to -ion (e.g., injunctive/injunction) */
                 word[j + 2] = 'o';
                 word[j + 3] = 'n';
                 if (Lookup())
                 {
                     return;
                 }

                 word[j + 2] = 'v'; // restore the original values
                 word[j + 3] = 'e';
                 k = old_k;
                 // nolookup()
             }
             return;
         }

         internal KStemmer()
         {
         }

         internal virtual string Stem(string term)
         {
             bool changed = Stem(term.ToCharArray(), term.Length);
             if (!changed)
             {
                 return term;
             }
             return AsString();
         }

         /// <summary>
         /// Returns the result of the stem (assuming the word was changed) as a <see cref="string"/>.
         /// </summary>
         internal virtual string AsString()
         {
             string s = String;
             if (s != null)
             {
                 return s;
             }
             return word.ToString();
         }

         internal virtual ICharSequence AsCharSequence()
         {
             return result != null ? (ICharSequence)new CharsRef(result) : word;
         }

         internal virtual string String
         {
             get
             {
                 return result;
             }
         }

         internal virtual char[] Chars
         {
             get
             {
                 return word.Array;
             }
         }

         internal virtual int Length
         {
             get
             {
                 return word.Length;
             }
         }

         internal string result;

         private bool IsMatched
         {
             get
             {

                 //*
                 // if (!lookups.contains(word.toString())) { throw new
                 // RuntimeException("didn't look up "+word.toString()+" prev="+prevLookup);
                 // }
                 // **

                 // lookup();
                 return matchedEntry != null;
             }
         }

         /// <summary>
         /// Stems the text in the token. Returns true if changed.
         /// </summary>
         internal virtual bool Stem(char[] term, int len)
         {

             result = null;

             k = len - 1;
             if ((k <= 1) || (k >= MaxWordLen - 1))
             {
                 return false; // don't stem
             }

             // first check the stemmer dictionaries, and avoid using the
             // cache if it's in there.
             DictEntry entry = dict_ht.Get(term, 0, len);
             if (entry != null)
             {
                 if (entry.root != null)
                 {
                     result = entry.root;
                     return true;
                 }
                 return false;
             }

             //*
             // caching off is normally faster if (cache == null) initializeStemHash();
             //
             // // now check the cache, before we copy chars to "word" if (cache != null)
             // { String val = cache.get(term, 0, len); if (val != null) { if (val !=
             // SAME) { result = val; return true; } return false; } }
             // **

             word.Reset();
             // allocate enough space so that an expansion is never needed
             word.EnsureCapacity(len + 10);
             for (int i = 0; i < len; i++)
             {
                 char ch = term[i];
                 if (!IsAlpha(ch)) // don't stem
                 {
                     return false;
                 }
                 // don't lowercase... it's a requirement that lowercase filter be
                 // used before this stemmer.
                 word.UnsafeWrite(ch);
             }

             matchedEntry = null;

             //*
             // lookups.clear(); lookups.add(word.toString());
             // **


             /*
              * This while loop will never be executed more than one time; it is here
              * only to allow the break statement to be used to escape as soon as a word
              * is recognized
              */
             while (true)
             {
                 // YCS: extra lookup()s were inserted so we don't need to
                 // do an extra wordInDict() here.
                 Plural();
                 if (IsMatched)
                 {
                     break;
                 }
                 PastTense();
                 if (IsMatched)
                 {
                     break;
                 }
                 Aspect();
                 if (IsMatched)
                 {
                     break;
                 }
                 ItyEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 NessEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 IonEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 ErAndOrEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 LyEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 AlEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 entry = WordInDict();
                 IveEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 IzeEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 MentEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 BleEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 IsmEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 IcEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 NcyEndings();
                 if (IsMatched)
                 {
                     break;
                 }
                 NceEndings();
                 bool foo = IsMatched;
                 break;
             }

             /*
              * try for a direct mapping (allows for cases like `Italian'->`Italy' and
              * `Italians'->`Italy')
              */
             entry = matchedEntry;
             if (entry != null)
             {
                 result = entry.root; // may be null, which means that "word" is the stem
             }

             //*
             // caching off is normally faster if (cache != null && cache.size() <
             // maxCacheSize) { char[] key = new char[len]; System.arraycopy(term, 0,
             // key, 0, len); if (result != null) { cache.put(key, result); } else {
             // cache.put(key, word.toString()); } }
             // **

             //*
             // if (entry == null) { if (!word.toString().equals(new String(term,0,len), StringComparison.Ordinal))
             // { System.out.println("CASE:" + word.toString() + "," + new
             // String(term,0,len));
             //
             // } }
             // **

             // no entry matched means result is "word"
             return true;
         }
     }
 }