src/contrib/Analyzers/En/KStemmer.cs - lucenenet - Git at Google

 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using Lucene.Net.Analysis.En;

 namespace Lucene.Net.Analysis.En
 {
     public class KStemmer
     {
         private const int MaxWordLen = 50;

         static private readonly String[] exceptionWords = {"aide", "bathe", "caste",
       "cute", "dame", "dime", "doge", "done", "dune", "envelope", "gage",
       "grille", "grippe", "lobe", "mane", "mare", "nape", "node", "pane",
       "pate", "plane", "pope", "programme", "quite", "ripe", "rote", "rune",
       "sage", "severe", "shoppe", "sine", "slime", "snipe", "steppe", "suite",
       "swinge", "tare", "tine", "tope", "tripe", "twine"};

         static private readonly String[][] directConflations = { new[] {"aging", "age"},
       new[] {"going", "go"}, new[] {"goes", "go"}, new[] {"lying", "lie"}, new[] {"using", "use"},
       new[] {"owing", "owe"}, new[] {"suing", "sue"}, new[] {"dying", "die"}, new[] {"tying", "tie"},
       new[] {"vying", "vie"}, new[] {"aged", "age"}, new[] {"used", "use"},new[] {"vied", "vie"},
       new[] {"cued", "cue"}, new[] {"died", "die"}, new[] {"eyed", "eye"}, new[] {"hued", "hue"},
       new[] {"iced", "ice"}, new[] {"lied", "lie"}, new[] {"owed", "owe"}, new[] {"sued", "sue"},
       new[] {"toed", "toe"}, new[] {"tied", "tie"}, new[] {"does", "do"}, new[] {"doing", "do"},
       new[] {"aeronautical", "aeronautics"}, new[] {"mathematical", "mathematics"},
       new[] {"political", "politics"}, new[] {"metaphysical", "metaphysics"},
       new[] {"cylindrical", "cylinder"}, new[] {"nazism", "nazi"},
       new[] {"ambiguity", "ambiguous"}, new[] {"barbarity", "barbarous"},
       new[] {"credulity", "credulous"}, new[] {"generosity", "generous"},
       new[] {"spontaneity", "spontaneous"}, new[] {"unanimity", "unanimous"},
       new[] {"voracity", "voracious"}, new[] {"fled", "flee"}, new[] {"miscarriage", "miscarry"}};

         static private readonly String[][] countryNationality = {
       new[] {"afghan", "afghanistan"}, new[] {"african", "africa"},
       new[] {"albanian", "albania"}, new[] {"algerian", "algeria"},
       new[] {"american", "america"}, new[] {"andorran", "andorra"}, new[] {"angolan", "angola"},
       new[] {"arabian", "arabia"}, new[] {"argentine", "argentina"},
       new[] {"armenian", "armenia"}, new[] {"asian", "asia"}, new[] {"australian", "australia"},
       new[] {"austrian", "austria"}, new[] {"azerbaijani", "azerbaijan"},
       new[] {"azeri", "azerbaijan"}, new[] {"bangladeshi", "bangladesh"},
       new[] {"belgian", "belgium"}, new[] {"bermudan", "bermuda"}, new[] {"bolivian", "bolivia"},
       new[] {"bosnian", "bosnia"}, new[] {"botswanan", "botswana"},
       new[] {"brazilian", "brazil"}, new[] {"british", "britain"},
       new[] {"bulgarian", "bulgaria"}, new[] {"burmese", "burma"},
       new[] {"californian", "california"}, new[] {"cambodian", "cambodia"},
       new[] {"canadian", "canada"}, new[] {"chadian", "chad"}, new[] {"chilean", "chile"},
       new[] {"chinese", "china"}, new[] {"colombian", "colombia"}, new[] {"croat", "croatia"},
       new[] {"croatian", "croatia"}, new[] {"cuban", "cuba"}, new[] {"cypriot", "cyprus"},
       new[] {"czechoslovakian", "czechoslovakia"}, new[] {"danish", "denmark"},
       new[] {"egyptian", "egypt"}, new[] {"equadorian", "equador"},
       new[] {"eritrean", "eritrea"}, new[] {"estonian", "estonia"},
       new[] {"ethiopian", "ethiopia"}, new[] {"european", "europe"}, new[] {"fijian", "fiji"},
       new[] {"filipino", "philippines"}, new[] {"finnish", "finland"},
       new[] {"french", "france"}, new[] {"gambian", "gambia"}, new[] {"georgian", "georgia"},
       new[] {"german", "germany"}, new[] {"ghanian", "ghana"}, new[] {"greek", "greece"},
       new[] {"grenadan", "grenada"}, new[] {"guamian", "guam"},
       new[] {"guatemalan", "guatemala"}, new[] {"guinean", "guinea"},
       new[] {"guyanan", "guyana"}, new[] {"haitian", "haiti"}, new[] {"hawaiian", "hawaii"},
       new[] {"holland", "dutch"}, new[] {"honduran", "honduras"}, new[] {"hungarian", "hungary"},
       new[] {"icelandic", "iceland"}, new[] {"indonesian", "indonesia"},
       new[] {"iranian", "iran"}, new[] {"iraqi", "iraq"}, new[] {"iraqui", "iraq"},
       new[] {"irish", "ireland"}, new[] {"israeli", "israel"},
       new[] {"italian", "italy"},
       new[] {"jamaican", "jamaica"},
       new[] {"japanese", "japan"},
       new[] {"jordanian", "jordan"},
       new[] {"kampuchean", "cambodia"},
       new[] {"kenyan", "kenya"},
       new[] {"korean", "korea"},
       new[] {"kuwaiti", "kuwait"},
       new[] {"lankan", "lanka"},
       new[] {"laotian", "laos"},
       new[] {"latvian", "latvia"},
       new[] {"lebanese", "lebanon"},
       new[] {"liberian", "liberia"},
       new[] {"libyan", "libya"},
       new[] {"lithuanian", "lithuania"},
       new[] {"macedonian", "macedonia"},
       new[] {"madagascan", "madagascar"},
       new[] {"malaysian", "malaysia"},
       new[] {"maltese", "malta"},
       new[] {"mauritanian", "mauritania"},
       new[] {"mexican", "mexico"},
       new[] {"micronesian", "micronesia"},
       new[] {"moldovan", "moldova"},
       new[] {"monacan", "monaco"},
       new[] {"mongolian", "mongolia"},
       new[] {"montenegran", "montenegro"},
       new[] {"moroccan", "morocco"},
       new[] {"myanmar", "burma"},
       new[] {"namibian", "namibia"},
       new[] {"nepalese", "nepal"},
       //new[]  {"netherlands", "dutch"},
       new[] {"nicaraguan", "nicaragua"}, new[] {"nigerian", "nigeria"},
       new[] {"norwegian", "norway"}, new[] {"omani", "oman"}, new[] {"pakistani", "pakistan"},
       new[] {"panamanian", "panama"}, new[] {"papuan", "papua"},
       new[] {"paraguayan", "paraguay"}, new[] {"peruvian", "peru"},
       new[] {"portuguese", "portugal"}, new[] {"romanian", "romania"},
       new[] {"rumania", "romania"}, new[] {"rumanian", "romania"}, new[] {"russian", "russia"},
       new[] {"rwandan", "rwanda"}, new[] {"samoan", "samoa"}, new[] {"scottish", "scotland"},
       new[] {"serb", "serbia"}, new[] {"serbian", "serbia"}, new[] {"siam", "thailand"},
       new[] {"siamese", "thailand"}, new[] {"slovakia", "slovak"}, new[] {"slovakian", "slovak"},
       new[] {"slovenian", "slovenia"}, new[] {"somali", "somalia"},
       new[] {"somalian", "somalia"}, new[] {"spanish", "spain"}, new[] {"swedish", "sweden"},
       new[] {"swiss", "switzerland"}, new[] {"syrian", "syria"}, new[] {"taiwanese", "taiwan"},
       new[] {"tanzanian", "tanzania"}, new[] {"texan", "texas"}, new[] {"thai", "thailand"},
       new[] {"tunisian", "tunisia"}, new[] {"turkish", "turkey"}, new[] {"ugandan", "uganda"},
       new[] {"ukrainian", "ukraine"}, new[] {"uruguayan", "uruguay"},
       new[] {"uzbek", "uzbekistan"}, new[] {"venezuelan", "venezuela"},
       new[] {"vietnamese", "viet"}, new[] {"virginian", "virginia"}, new[] {"yemeni", "yemen"},
       new[] {"yugoslav", "yugoslavia"}, new[] {"yugoslavian", "yugoslavia"},
       new[] {"zambian", "zambia"}, new[] {"zealander", "zealand"},
       new[] {"zimbabwean", "zimbabwe"}};

         static private readonly String[] supplementDict = {"aids", "applicator",
       "capacitor", "digitize", "electromagnet", "ellipsoid", "exosphere",
       "extensible", "ferromagnet", "graphics", "hydromagnet", "polygraph",
       "toroid", "superconduct", "backscatter", "connectionism"};

         static private readonly String[] properNouns = {"abrams", "achilles",
       "acropolis", "adams", "agnes", "aires", "alexander", "alexis", "alfred",
       "algiers", "alps", "amadeus", "ames", "amos", "andes", "angeles",
       "annapolis", "antilles", "aquarius", "archimedes", "arkansas", "asher",
       "ashly", "athens", "atkins", "atlantis", "avis", "bahamas", "bangor",
       "barbados", "barger", "bering", "brahms", "brandeis", "brussels",
       "bruxelles", "cairns", "camoros", "camus", "carlos", "celts", "chalker",
       "charles", "cheops", "ching", "christmas", "cocos", "collins",
       "columbus", "confucius", "conners", "connolly", "copernicus", "cramer",
       "cyclops", "cygnus", "cyprus", "dallas", "damascus", "daniels", "davies",
       "davis", "decker", "denning", "dennis", "descartes", "dickens", "doris",
       "douglas", "downs", "dreyfus", "dukakis", "dulles", "dumfries",
       "ecclesiastes", "edwards", "emily", "erasmus", "euphrates", "evans",
       "everglades", "fairbanks", "federales", "fisher", "fitzsimmons",
       "fleming", "forbes", "fowler", "france", "francis", "goering",
       "goodling", "goths", "grenadines", "guiness", "hades", "harding",
       "harris", "hastings", "hawkes", "hawking", "hayes", "heights",
       "hercules", "himalayas", "hippocrates", "hobbs", "holmes", "honduras",
       "hopkins", "hughes", "humphreys", "illinois", "indianapolis",
       "inverness", "iris", "iroquois", "irving", "isaacs", "italy", "james",
       "jarvis", "jeffreys", "jesus", "jones", "josephus", "judas", "julius",
       "kansas", "keynes", "kipling", "kiwanis", "lansing", "laos", "leeds",
       "levis", "leviticus", "lewis", "louis", "maccabees", "madras",
       "maimonides", "maldive", "massachusetts", "matthews", "mauritius",
       "memphis", "mercedes", "midas", "mingus", "minneapolis", "mohammed",
       "moines", "morris", "moses", "myers", "myknos", "nablus", "nanjing",
       "nantes", "naples", "neal", "netherlands", "nevis", "nostradamus",
       "oedipus", "olympus", "orleans", "orly", "papas", "paris", "parker",
       "pauling", "peking", "pershing", "peter", "peters", "philippines",
       "phineas", "pisces", "pryor", "pythagoras", "queens", "rabelais",
       "ramses", "reynolds", "rhesus", "rhodes", "richards", "robins",
       "rodgers", "rogers", "rubens", "sagittarius", "seychelles", "socrates",
       "texas", "thames", "thomas", "tiberias", "tunis", "venus", "vilnius",
       "wales", "warner", "wilkins", "williams", "wyoming", "xmas", "yonkers",
       "zeus", "frances", "aarhus", "adonis", "andrews", "angus", "antares",
       "aquinas", "arcturus", "ares", "artemis", "augustus", "ayers",
       "barnabas", "barnes", "becker", "bejing", "biggs", "billings", "boeing",
       "boris", "borroughs", "briggs", "buenos", "calais", "caracas", "cassius",
       "cerberus", "ceres", "cervantes", "chantilly", "chartres", "chester",
       "connally", "conner", "coors", "cummings", "curtis", "daedalus",
       "dionysus", "dobbs", "dolores", "edmonds"};

         private class DictEntry
         {
             public readonly bool exception;
             public readonly String root;

             public DictEntry(String root, bool isException)
             {
                 this.root = root;
                 this.exception = isException;
             }
         }

         private static readonly IDictionary<string, DictEntry> dict_ht = initializeDictHash();

         /***
          * caching off private int maxCacheSize; private CharArrayMap<String> cache =
          * null; private static final String SAME = "SAME"; // use if stemmed form is
          * the same
          ***/

         private readonly OpenStringBuilder word = new OpenStringBuilder(); // OpenStringBuilder
         private int j; /* index of final letter in stem (within word) */
         private int k; /*
                   * INDEX of final letter in word. You must add 1 to k to get
                   * the current length of word. When you want the length of
                   * word, use the method wordLength, which returns (k+1).
                   */

         /***
          * private void initializeStemHash() { if (maxCacheSize > 0) cache = new
          * CharArrayMap<String>(maxCacheSize,false); }
          ***/

         private char finalChar()
         {
             return word.charAt(k);
         }

         private char penultChar()
         {
             return word.charAt(k - 1);
         }

         private bool isVowel(int index)
         {
             return !isCons(index);
         }

         private bool isCons(int index)
         {
             var ch = word.charAt(index);
             if ((ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u')) return false;
             if ((ch != 'y') || (index == 0)) return true;
             else return (!isCons(index - 1));
         }

         private static IDictionary<string, DictEntry> initializeDictHash()
         {
             DictEntry defaultEntry;
             DictEntry entry;

             IDictionary<string, DictEntry> d = new Dictionary<string, DictEntry>(1000);

             for (int i = 0; i < exceptionWords.Length; i++)
             {
                 if (!d.ContainsKey(exceptionWords[i]))
                 {
                     entry = new DictEntry(exceptionWords[i], true);
                     d.Add(exceptionWords[i], entry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + exceptionWords[i]
                         + "] already in dictionary 1");
                 }
             }

             for (int i = 0; i < directConflations.Length; i++)
             {
                 if (!d.ContainsKey(directConflations[i][0]))
                 {
                     entry = new DictEntry(directConflations[i][1], false);
                     d.Add(directConflations[i][0], entry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + directConflations[i][0]
                         + "] already in dictionary 2");
                 }
             }

             for (int i = 0; i < countryNationality.Length; i++)
             {
                 if (!d.ContainsKey(countryNationality[i][0]))
                 {
                     entry = new DictEntry(countryNationality[i][1], false);
                     d.Add(countryNationality[i][0], entry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + countryNationality[i][0]
                         + "] already in dictionary 3");
                 }
             }

             defaultEntry = new DictEntry(null, false);

             string[] array = KStemData1.data;

             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Add(array[i], defaultEntry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + array[i]
                         + "] already in dictionary 4");
                 }
             }

             array = KStemData2.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Add(array[i], defaultEntry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + array[i]
                         + "] already in dictionary 4");
                 }
             }

             array = KStemData3.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Add(array[i], defaultEntry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + array[i]
                         + "] already in dictionary 4");
                 }
             }

             array = KStemData4.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Add(array[i], defaultEntry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + array[i]
                         + "] already in dictionary 4");
                 }
             }

             array = KStemData5.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Add(array[i], defaultEntry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + array[i]
                         + "] already in dictionary 4");
                 }
             }

             array = KStemData6.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Add(array[i], defaultEntry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + array[i]
                         + "] already in dictionary 4");
                 }
             }

             array = KStemData7.data;
             for (int i = 0; i < array.Length; i++)
             {
                 if (!d.ContainsKey(array[i]))
                 {
                     d.Add(array[i], defaultEntry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + array[i]
                         + "] already in dictionary 4");
                 }
             }

             for (int i = 0; i < KStemData8.data.Length; i++)
             {
                 if (!d.ContainsKey(KStemData8.data[i]))
                 {
                     d.Add(KStemData8.data[i], defaultEntry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + KStemData8.data[i]
                         + "] already in dictionary 4");
                 }
             }

             for (int i = 0; i < supplementDict.Length; i++)
             {
                 if (!d.ContainsKey(supplementDict[i]))
                 {
                     d.Add(supplementDict[i], defaultEntry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + supplementDict[i]
                         + "] already in dictionary 5");
                 }
             }

             for (int i = 0; i < properNouns.Length; i++)
             {
                 if (!d.ContainsKey(properNouns[i]))
                 {
                     d.Add(properNouns[i], defaultEntry);
                 }
                 else
                 {
                     Debug.Write("Warning: Entry [" + properNouns[i]
                         + "] already in dictionary 6");
                 }
             }

             return d;
         }

         private bool isAlpha(char ch)
         {
             return ch >= 'a' && ch <= 'z'; // terms must be lowercased already
         }

         /* length of stem within word */
         private int stemLength()
         {
             return j + 1;
         }

         private bool endsIn(char[] s)
         {
             if (s.Length > k) return false;

             int r = word.length() - s.Length; /* length of word before this suffix */
             j = k;
             for (int r1 = r, i = 0; i < s.Length; i++, r1++)
             {
                 if (s[i] != word.charAt(r1)) return false;
             }
             j = r - 1; /* index of the character BEFORE the posfix */
             return true;
         }

         private bool endsIn(char a, char b)
         {
             if (2 > k) return false;
             // check left to right since the endings have often already matched
             if (word.charAt(k - 1) == a && word.charAt(k) == b)
             {
                 j = k - 2;
                 return true;
             }
             return false;
         }

         private bool endsIn(char a, char b, char c)
         {
             if (3 > k) return false;
             if (word.charAt(k - 2) == a && word.charAt(k - 1) == b
                 && word.charAt(k) == c)
             {
                 j = k - 3;
                 return true;
             }
             return false;
         }

         private bool endsIn(char a, char b, char c, char d)
         {
             if (4 > k) return false;
             if (word.charAt(k - 3) == a && word.charAt(k - 2) == b
                 && word.charAt(k - 1) == c && word.charAt(k) == d)
             {
                 j = k - 4;
                 return true;
             }
             return false;
         }

         private DictEntry wordInDict()
         {
             /***
              * if (matchedEntry != null) { if (dict_ht.get(word.getArray(), 0,
              * word.size()) != matchedEntry) {
              * System.out.println("Uh oh... cached entry doesn't match"); } return
              * matchedEntry; }
              ***/
             if (matchedEntry != null) return matchedEntry;
             String w = word.toString();
             DictEntry e = dict_ht.ContainsKey(w) ? dict_ht[w] : null;
             if (e != null && !e.exception)
             {
                 matchedEntry = e; // only cache if it's not an exception.
             }
             // lookups.add(word.toString());
             return e;
         }

         /* Convert plurals to singular form, and '-ies' to 'y' */
         private void plural()
         {
             if (word.charAt(k) == 's')
             {
                 if (endsIn('i', 'e', 's'))
                 {
                     word.setLength(j + 3);
                     k--;
                     if (lookup()) /* ensure calories -> calorie */
                         return;
                     k++;
                     word.unsafeWrite('s');
                     setSuffix("y");
                     lookup();
                 }
                 else if (endsIn('e', 's'))
                 {
                     /* try just removing the "s" */
                     word.setLength(j + 2);
                     k--;

                     /*
                      * note: don't check for exceptions here. So, `aides' -> `aide', but
                      * `aided' -> `aid'. The exception for double s is used to prevent
                      * crosses -> crosse. This is actually correct if crosses is a plural
                      * noun (a type of racket used in lacrosse), but the verb is much more
                      * common
                      */

                     /****
                      * YCS: this was the one place where lookup was not followed by return.
                      * So restructure it. if ((j>0)&&(lookup(word.toString())) &&
                      * !((word.charAt(j) == 's') && (word.charAt(j-1) == 's'))) return;
                      *****/
                     bool tryE = j > 0
                         && !((word.charAt(j) == 's') && (word.charAt(j - 1) == 's'));
                     if (tryE && lookup()) return;

                     /* try removing the "es" */

                     word.setLength(j + 1);
                     k--;
                     if (lookup()) return;

                     /* the default is to retain the "e" */
                     word.unsafeWrite('e');
                     k++;

                     if (!tryE) lookup(); // if we didn't try the "e" ending before
                     return;
                 }
                 else
                 {
                     if (word.length() > 3 && penultChar() != 's' && !endsIn('o', 'u', 's'))
                     {
                         /* unless the word ends in "ous" or a double "s", remove the final "s" */

                         word.setLength(k);
                         k--;
                         lookup();
                     }
                 }
             }
         }

         private void setSuffix(String s)
         {
             setSuff(s, s.Length);
         }

         /* replace old suffix with s */
         private void setSuff(String s, int len)
         {
             word.setLength(j + 1);
             for (int l = 0; l < len; l++)
             {
                 word.unsafeWrite(s[l]);
             }
             k = j + len;
         }

         /* Returns true if the word is found in the dictionary */
         // almost all uses of lookup() return immediately and are
         // followed by another lookup in the dict. Store the match
         // to avoid this double lookup.
         DictEntry matchedEntry = null;

         private bool lookup()
         {
             /******
              * debugging code String thisLookup = word.toString(); bool added =
              * lookups.add(thisLookup); if (!added) {
              * System.out.println("######extra lookup:" + thisLookup); // occaasional
              * extra lookups aren't necessarily errors... could happen by diff
              * manipulations // throw new RuntimeException("######extra lookup:" +
              * thisLookup); } else { // System.out.println("new lookup:" + thisLookup);
              * }
              ******/
             String w = word.toString();
             matchedEntry = dict_ht.ContainsKey(w) ? dict_ht[w] : null;
             return matchedEntry != null;
         }

         // Set<String> lookups = new HashSet<String>();

         /* convert past tense (-ed) to present, and `-ied' to `y' */
         private void pastTense()
         {
             /*
              * Handle words less than 5 letters with a direct mapping This prevents
              * (fled -> fl).
              */
             if (word.length() <= 4) return;

             if (endsIn('i', 'e', 'd'))
             {
                 word.setLength(j + 3);
                 k--;
                 if (lookup()) /* we almost always want to convert -ied to -y, but */
                     return; /* this isn't true for short words (died->die) */
                 k++; /* I don't know any long words that this applies to, */
                 word.unsafeWrite('d'); /* but just in case... */
                 setSuffix("y");
                 lookup();
                 return;
             }

             /* the vowelInStem() is necessary so we don't stem acronyms */
             if (endsIn('e', 'd') && vowelInStem())
             {
                 /* see if the root ends in `e' */
                 word.setLength(j + 2);
                 k = j + 1;

                 DictEntry entry = wordInDict();
                 if (entry != null) if (!entry.exception) /*
                                                 * if it's in the dictionary and
                                                 * not an exception
                                                 */
                         return;

                 /* try removing the "ed" */
                 word.setLength(j + 1);
                 k = j;
                 if (lookup()) return;

                 /*
                  * try removing a doubled consonant. if the root isn't found in the
                  * dictionary, the default is to leave it doubled. This will correctly
                  * capture `backfilled' -> `backfill' instead of `backfill' ->
                  * `backfille', and seems correct most of the time
                  */

                 if (doubleC(k))
                 {
                     word.setLength(k);
                     k--;
                     if (lookup()) return;
                     word.unsafeWrite(word.charAt(k));
                     k++;
                     lookup();
                     return;
                 }

                 /* if we have a `un-' prefix, then leave the word alone */
                 /* (this will sometimes screw up with `under-', but we */
                 /* will take care of that later) */

                 if ((word.charAt(0) == 'u') && (word.charAt(1) == 'n'))
                 {
                     word.unsafeWrite('e');
                     word.unsafeWrite('d');
                     k = k + 2;
                     // nolookup()
                     return;
                 }

                 /*
                  * it wasn't found by just removing the `d' or the `ed', so prefer to end
                  * with an `e' (e.g., `microcoded' -> `microcode').
                  */

                 word.setLength(j + 1);
                 word.unsafeWrite('e');
                 k = j + 1;
                 // nolookup() - we already tried the "e" ending
                 return;
             }
         }

         /* return TRUE if word ends with a double consonant */
         private bool doubleC(int i)
         {
             if (i < 1) return false;

             if (word.charAt(i) != word.charAt(i - 1)) return false;
             return (isCons(i));
         }

         private bool vowelInStem()
         {
             for (int i = 0; i < stemLength(); i++)
             {
                 if (isVowel(i)) return true;
             }
             return false;
         }

         /* handle `-ing' endings */
         private void aspect()
         {
             /*
              * handle short words (aging -> age) via a direct mapping. This prevents
              * (thing -> the) in the version of this routine that ignores inflectional
              * variants that are mentioned in the dictionary (when the root is also
              * present)
              */

             if (word.length() <= 5) return;

             /* the vowelinstem() is necessary so we don't stem acronyms */
             if (endsIn('i', 'n', 'g') && vowelInStem())
             {

                 /* try adding an `e' to the stem and check against the dictionary */
                 word.setCharAt(j + 1, 'e');
                 word.setLength(j + 2);
                 k = j + 1;

                 DictEntry entry = wordInDict();
                 if (entry != null)
                 {
                     if (!entry.exception) /* if it's in the dictionary and not an exception */
                         return;
                 }

                 /* adding on the `e' didn't work, so remove it */
                 word.setLength(k);
                 k--; /* note that `ing' has also been removed */

                 if (lookup()) return;

                 /* if I can remove a doubled consonant and get a word, then do so */
                 if (doubleC(k))
                 {
                     k--;
                     word.setLength(k + 1);
                     if (lookup()) return;
                     word.unsafeWrite(word.charAt(k)); /* restore the doubled consonant */

                     /* the default is to leave the consonant doubled */
                     /* (e.g.,`fingerspelling' -> `fingerspell'). Unfortunately */
                     /* `bookselling' -> `booksell' and `mislabelling' -> `mislabell'). */
                     /* Without making the algorithm significantly more complicated, this */
                     /* is the best I can do */
                     k++;
                     lookup();
                     return;
                 }

                 /*
                  * the word wasn't in the dictionary after removing the stem, and then
                  * checking with and without a final `e'. The default is to add an `e'
                  * unless the word ends in two consonants, so `microcoding' ->
                  * `microcode'. The two consonants restriction wouldn't normally be
                  * necessary, but is needed because we don't try to deal with prefixes and
                  * compounds, and most of the time it is correct (e.g., footstamping ->
                  * footstamp, not footstampe; however, decoupled -> decoupl). We can
                  * prevent almost all of the incorrect stems if we try to do some prefix
                  * analysis first
                  */

                 if ((j > 0) && isCons(j) && isCons(j - 1))
                 {
                     k = j;
                     word.setLength(k + 1);
                     // nolookup() because we already did according to the comment
                     return;
                 }

                 word.setLength(j + 1);
                 word.unsafeWrite('e');
                 k = j + 1;
                 // nolookup(); we already tried an 'e' ending
                 return;
             }
         }

         /*
          * this routine deals with -ity endings. It accepts -ability, -ibility, and
          * -ality, even without checking the dictionary because they are so
          * productive. The first two are mapped to -ble, and the -ity is remove for
          * the latter
          */
         private void ityEndings()
         {
             int old_k = k;

             if (endsIn('i', 't', 'y'))
             {
                 word.setLength(j + 1); /* try just removing -ity */
                 k = j;
                 if (lookup()) return;
                 word.unsafeWrite('e'); /* try removing -ity and adding -e */
                 k = j + 1;
                 if (lookup()) return;
                 word.setCharAt(j + 1, 'i');
                 word.append("ty");
                 k = old_k;
                 /*
                  * the -ability and -ibility endings are highly productive, so just accept
                  * them
                  */
                 if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'l'))
                 {
                     word.setLength(j - 1);
                     word.append("le"); /* convert to -ble */
                     k = j;
                     lookup();
                     return;
                 }

                 /* ditto for -ivity */
                 if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'v'))
                 {
                     word.setLength(j + 1);
                     word.unsafeWrite('e'); /* convert to -ive */
                     k = j + 1;
                     lookup();
                     return;
                 }
                 /* ditto for -ality */
                 if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'l'))
                 {
                     word.setLength(j + 1);
                     k = j;
                     lookup();
                     return;
                 }

                 /*
                  * if the root isn't in the dictionary, and the variant *is* there, then
                  * use the variant. This allows `immunity'->`immune', but prevents
                  * `capacity'->`capac'. If neither the variant nor the root form are in
                  * the dictionary, then remove the ending as a default
                  */

                 if (lookup()) return;

                 /* the default is to remove -ity altogether */
                 word.setLength(j + 1);
                 k = j;
                 // nolookup(), we already did it.
                 return;
             }
         }

         /* handle -ence and -ance */
         private void nceEndings()
         {
             int old_k = k;
             char word_char;

             if (endsIn('n', 'c', 'e'))
             {
                 word_char = word.charAt(j);
                 if (!((word_char == 'e') || (word_char == 'a'))) return;
                 word.setLength(j);
                 word.unsafeWrite('e'); /* try converting -e/ance to -e (adherance/adhere) */
                 k = j;
                 if (lookup()) return;
                 word.setLength(j); /*
                           * try removing -e/ance altogether
                           * (disappearance/disappear)
                           */
                 k = j - 1;
                 if (lookup()) return;
                 word.unsafeWrite(word_char); /* restore the original ending */
                 word.append("nce");
                 k = old_k;
                 // nolookup() because we restored the original ending
             }
             return;
         }

         /* handle -ness */
         private void nessEndings()
         {
             if (endsIn('n', 'e', 's', 's'))
             { /*
                                        * this is a very productive endings, so
                                        * just accept it
                                        */
                 word.setLength(j + 1);
                 k = j;
                 if (word.charAt(j) == 'i') word.setCharAt(j, 'y');
                 lookup();
             }
             return;
         }

         /* handle -ism */
         private void ismEndings()
         {
             if (endsIn('i', 's', 'm'))
             { /*
                                   * this is a very productive ending, so just
                                   * accept it
                                   */
                 word.setLength(j + 1);
                 k = j;
                 lookup();
             }
             return;
         }

         /* this routine deals with -ment endings. */
         private void mentEndings()
         {
             int old_k = k;

             if (endsIn('m', 'e', 'n', 't'))
             {
                 word.setLength(j + 1);
                 k = j;
                 if (lookup()) return;
                 word.append("ment");
                 k = old_k;
                 // nolookup
             }
             return;
         }

         /* this routine deals with -ize endings. */
         private void izeEndings()
         {
             int old_k = k;

             if (endsIn('i', 'z', 'e'))
             {
                 word.setLength(j + 1); /* try removing -ize entirely */
                 k = j;
                 if (lookup()) return;
                 word.unsafeWrite('i');

                 if (doubleC(j))
                 { /* allow for a doubled consonant */
                     word.setLength(j);
                     k = j - 1;
                     if (lookup()) return;
                     word.unsafeWrite(word.charAt(j - 1));
                 }

                 word.setLength(j + 1);
                 word.unsafeWrite('e'); /* try removing -ize and adding -e */
                 k = j + 1;
                 if (lookup()) return;
                 word.setLength(j + 1);
                 word.append("ize");
                 k = old_k;
                 // nolookup()
             }
             return;
         }

         /* handle -ency and -ancy */
         private void ncyEndings()
         {
             if (endsIn('n', 'c', 'y'))
             {
                 if (!((word.charAt(j) == 'e') || (word.charAt(j) == 'a'))) return;
                 word.setCharAt(j + 2, 't'); /* try converting -ncy to -nt */
                 word.setLength(j + 3);
                 k = j + 2;

                 if (lookup()) return;

                 word.setCharAt(j + 2, 'c'); /* the default is to convert it to -nce */
                 word.unsafeWrite('e');
                 k = j + 3;
                 lookup();
             }
             return;
         }

         /* handle -able and -ible */
         private void bleEndings()
         {
             int old_k = k;
             char word_char;

             if (endsIn('b', 'l', 'e'))
             {
                 if (!((word.charAt(j) == 'a') || (word.charAt(j) == 'i'))) return;
                 word_char = word.charAt(j);
                 word.setLength(j); /* try just removing the ending */
                 k = j - 1;
                 if (lookup()) return;
                 if (doubleC(k))
                 { /* allow for a doubled consonant */
                     word.setLength(k);
                     k--;
                     if (lookup()) return;
                     k++;
                     word.unsafeWrite(word.charAt(k - 1));
                 }
                 word.setLength(j);
                 word.unsafeWrite('e'); /* try removing -a/ible and adding -e */
                 k = j;
                 if (lookup()) return;
                 word.setLength(j);
                 word.append("ate"); /* try removing -able and adding -ate */
                 /* (e.g., compensable/compensate) */
                 k = j + 2;
                 if (lookup()) return;
                 word.setLength(j);
                 word.unsafeWrite(word_char); /* restore the original values */
                 word.append("ble");
                 k = old_k;
                 // nolookup()
             }
             return;
         }

         /*
          * handle -ic endings. This is fairly straightforward, but this is also the
          * only place we try *expanding* an ending, -ic -> -ical. This is to handle
          * cases like `canonic' -> `canonical'
          */
         private void icEndings()
         {
             if (endsIn('i', 'c'))
             {
                 word.setLength(j + 3);
                 word.append("al"); /* try converting -ic to -ical */
                 k = j + 4;
                 if (lookup()) return;

                 word.setCharAt(j + 1, 'y'); /* try converting -ic to -y */
                 word.setLength(j + 2);
                 k = j + 1;
                 if (lookup()) return;

                 word.setCharAt(j + 1, 'e'); /* try converting -ic to -e */
                 if (lookup()) return;

                 word.setLength(j + 1); /* try removing -ic altogether */
                 k = j;
                 if (lookup()) return;
                 word.append("ic"); /* restore the original ending */
                 k = j + 2;
                 // nolookup()
             }
             return;
         }

         private static readonly char[] ization = "ization".ToCharArray();
         private static readonly char[] ition = "ition".ToCharArray();
         private static readonly char[] ation = "ation".ToCharArray();
         private static readonly char[] ication = "ication".ToCharArray();

         /* handle some derivational endings */
         /*
          * this routine deals with -ion, -ition, -ation, -ization, and -ication. The
          * -ization ending is always converted to -ize
          */
         private void ionEndings()
         {
             int old_k = k;
             if (!endsIn('i', 'o', 'n'))
             {
                 return;
             }

             if (endsIn(ization))
             { /*
                             * the -ize ending is very productive, so simply
                             * accept it as the root
                             */
                 word.setLength(j + 3);
                 word.unsafeWrite('e');
                 k = j + 3;
                 lookup();
                 return;
             }

             if (endsIn(ition))
             {
                 word.setLength(j + 1);
                 word.unsafeWrite('e');
                 k = j + 1;
                 if (lookup()) /*
                      * remove -ition and add `e', and check against the
                      * dictionary
                      */
                     return; /* (e.g., definition->define, opposition->oppose) */

                 /* restore original values */
                 word.setLength(j + 1);
                 word.append("ition");
                 k = old_k;
                 // nolookup()
             }
             else if (endsIn(ation))
             {
                 word.setLength(j + 3);
                 word.unsafeWrite('e');
                 k = j + 3;
                 if (lookup()) /* remove -ion and add `e', and check against the dictionary */
                     return; /* (elmination -> eliminate) */

                 word.setLength(j + 1);
                 word.unsafeWrite('e'); /*
                               * remove -ation and add `e', and check against the
                               * dictionary
                               */
                 k = j + 1;
                 if (lookup()) return;

                 word.setLength(j + 1);/*
                              * just remove -ation (resignation->resign) and
                              * check dictionary
                              */
                 k = j;
                 if (lookup()) return;

                 /* restore original values */
                 word.setLength(j + 1);
                 word.append("ation");
                 k = old_k;
                 // nolookup()

             }

             /*
              * test -ication after -ation is attempted (e.g., `complication->complicate'
              * rather than `complication->comply')
              */

             if (endsIn(ication))
             {
                 word.setLength(j + 1);
                 word.unsafeWrite('y');
                 k = j + 1;
                 if (lookup()) /*
                      * remove -ication and add `y', and check against the
                      * dictionary
                      */
                     return; /* (e.g., amplification -> amplify) */

                 /* restore original values */
                 word.setLength(j + 1);
                 word.append("ication");
                 k = old_k;
                 // nolookup()
             }

             // if (endsIn(ion)) {
             if (true)
             { // we checked for this earlier... just need to set "j"
                 j = k - 3; // YCS

                 word.setLength(j + 1);
                 word.unsafeWrite('e');
                 k = j + 1;
                 if (lookup()) /* remove -ion and add `e', and check against the dictionary */
                     return;

                 word.setLength(j + 1);
                 k = j;
                 if (lookup()) /* remove -ion, and if it's found, treat that as the root */
                     return;

                 /* restore original values */
                 word.setLength(j + 1);
                 word.append("ion");
                 k = old_k;
                 // nolookup()
             }

             // nolookup(); all of the other paths restored original values
             return;
         }

         /*
          * this routine deals with -er, -or, -ier, and -eer. The -izer ending is
          * always converted to -ize
          */
         private void erAndOrEndings()
         {
             int old_k = k;

             if (word.charAt(k) != 'r') return; // YCS

             char word_char; /* so we can remember if it was -er or -or */

             if (endsIn('i', 'z', 'e', 'r'))
             { /*
                                        * -ize is very productive, so accept it
                                        * as the root
                                        */
                 word.setLength(j + 4);
                 k = j + 3;
                 lookup();
                 return;
             }

             if (endsIn('e', 'r') || endsIn('o', 'r'))
             {
                 word_char = word.charAt(j + 1);
                 if (doubleC(j))
                 {
                     word.setLength(j);
                     k = j - 1;
                     if (lookup()) return;
                     word.unsafeWrite(word.charAt(j - 1)); /* restore the doubled consonant */
                 }

                 if (word.charAt(j) == 'i')
                 { /* do we have a -ier ending? */
                     word.setCharAt(j, 'y');
                     word.setLength(j + 1);
                     k = j;
                     if (lookup()) /* yes, so check against the dictionary */
                         return;
                     word.setCharAt(j, 'i'); /* restore the endings */
                     word.unsafeWrite('e');
                 }

                 if (word.charAt(j) == 'e')
                 { /* handle -eer */
                     word.setLength(j);
                     k = j - 1;
                     if (lookup()) return;
                     word.unsafeWrite('e');
                 }

                 word.setLength(j + 2); /* remove the -r ending */
                 k = j + 1;
                 if (lookup()) return;
                 word.setLength(j + 1); /* try removing -er/-or */
                 k = j;
                 if (lookup()) return;
                 word.unsafeWrite('e'); /* try removing -or and adding -e */
                 k = j + 1;
                 if (lookup()) return;
                 word.setLength(j + 1);
                 word.unsafeWrite(word_char);
                 word.unsafeWrite('r'); /* restore the word to the way it was */
                 k = old_k;
                 // nolookup()
             }

         }

         /*
          * this routine deals with -ly endings. The -ally ending is always converted
          * to -al Sometimes this will temporarily leave us with a non-word (e.g.,
          * heuristically maps to heuristical), but then the -al is removed in the next
          * step.
          */
         private void lyEndings()
         {
             int old_k = k;

             if (endsIn('l', 'y'))
             {

                 word.setCharAt(j + 2, 'e'); /* try converting -ly to -le */

                 if (lookup()) return;
                 word.setCharAt(j + 2, 'y');

                 word.setLength(j + 1); /* try just removing the -ly */
                 k = j;

                 if (lookup()) return;

                 if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'l')) /*
                                                                               * always
                                                                               * convert
                                                                               * -
                                                                               * ally
                                                                               * to
                                                                               * -
                                                                               * al
                                                                               */
                     return;
                 word.append("ly");
                 k = old_k;

                 if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'b'))
                 { /*
                                                                                 * always
                                                                                 * convert
                                                                                 * -
                                                                                 * ably
                                                                                 * to
                                                                                 * -
                                                                                 * able
                                                                                 */
                     word.setCharAt(j + 2, 'e');
                     k = j + 2;
                     return;
                 }

                 if (word.charAt(j) == 'i')
                 { /* e.g., militarily -> military */
                     word.setLength(j);
                     word.unsafeWrite('y');
                     k = j;
                     if (lookup()) return;
                     word.setLength(j);
                     word.append("ily");
                     k = old_k;
                 }

                 word.setLength(j + 1); /* the default is to remove -ly */

                 k = j;
                 // nolookup()... we already tried removing the "ly" variant
             }
             return;
         }

         /*
          * this routine deals with -al endings. Some of the endings from the previous
          * routine are finished up here.
          */
         private void alEndings()
         {
             int old_k = k;

             if (word.length() < 4) return;
             if (endsIn('a', 'l'))
             {
                 word.setLength(j + 1);
                 k = j;
                 if (lookup()) /* try just removing the -al */
                     return;

                 if (doubleC(j))
                 { /* allow for a doubled consonant */
                     word.setLength(j);
                     k = j - 1;
                     if (lookup()) return;
                     word.unsafeWrite(word.charAt(j - 1));
                 }

                 word.setLength(j + 1);
                 word.unsafeWrite('e'); /* try removing the -al and adding -e */
                 k = j + 1;
                 if (lookup()) return;

                 word.setLength(j + 1);
                 word.append("um"); /* try converting -al to -um */
                 /* (e.g., optimal - > optimum ) */
                 k = j + 2;
                 if (lookup()) return;

                 word.setLength(j + 1);
                 word.append("al"); /* restore the ending to the way it was */
                 k = old_k;

                 if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'c'))
                 {
                     word.setLength(j - 1); /* try removing -ical */
                     k = j - 2;
                     if (lookup()) return;

                     word.setLength(j - 1);
                     word.unsafeWrite('y');/* try turning -ical to -y (e.g., bibliographical) */
                     k = j - 1;
                     if (lookup()) return;

                     word.setLength(j - 1);
                     word.append("ic"); /* the default is to convert -ical to -ic */
                     k = j;
                     // nolookup() ... converting ical to ic means removing "al" which we
                     // already tried
                     // ERROR
                     lookup();
                     return;
                 }

                 if (word.charAt(j) == 'i')
                 { /* sometimes -ial endings should be removed */
                     word.setLength(j); /* (sometimes it gets turned into -y, but we */
                     k = j - 1; /* aren't dealing with that case for now) */
                     if (lookup()) return;
                     word.append("ial");
                     k = old_k;
                     lookup();
                 }

             }
             return;
         }

         /*
          * this routine deals with -ive endings. It normalizes some of the -ative
          * endings directly, and also maps some -ive endings to -ion.
          */
         private void iveEndings()
         {
             int old_k = k;

             if (endsIn('i', 'v', 'e'))
             {
                 word.setLength(j + 1); /* try removing -ive entirely */
                 k = j;
                 if (lookup()) return;

                 word.unsafeWrite('e'); /* try removing -ive and adding -e */
                 k = j + 1;
                 if (lookup()) return;
                 word.setLength(j + 1);
                 word.append("ive");
                 if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 't'))
                 {
                     word.setCharAt(j - 1, 'e'); /* try removing -ative and adding -e */
                     word.setLength(j); /* (e.g., determinative -> determine) */
                     k = j - 1;
                     if (lookup()) return;
                     word.setLength(j - 1); /* try just removing -ative */
                     if (lookup()) return;

                     word.append("ative");
                     k = old_k;
                 }

                 /* try mapping -ive to -ion (e.g., injunctive/injunction) */
                 word.setCharAt(j + 2, 'o');
                 word.setCharAt(j + 3, 'n');
                 if (lookup()) return;

                 word.setCharAt(j + 2, 'v'); /* restore the original values */
                 word.setCharAt(j + 3, 'e');
                 k = old_k;
                 // nolookup()
             }
             return;
         }

         public KStemmer() { }

         public String stem(String term)
         {
             bool changed = stem(term, term.Length);
             if (!changed) return term;
             return asString();
         }

         /**
          * Returns the result of the stem (assuming the word was changed) as a String.
          */
         public String asString()
         {
             String s = getString();
             if (s != null) return s;
             return word.toString();
         }

 //          public CharSequence asCharSequence() {
 //            return result != null ? result : word;
 //          }

         String getString()
         {
             return result;
         }

         char[] getChars()
         {
             return word.getArray();
         }

         int getLength()
         {
             return word.length();
         }

         String result;

         private bool matched()
         {
             /***
              * if (!lookups.contains(word.toString())) { throw new
              * RuntimeException("didn't look up "+word.toString()+" prev="+prevLookup);
              * }
              ***/
             // lookup();
             return matchedEntry != null;
         }

         /**
          * Stems the text in the token. Returns true if changed.
          */
         public bool stem(String term, int len)
         {
             len = term.Length; // HACK for API compatibility
             result = null;

             k = len - 1;
             if ((k <= 1) || (k >= MaxWordLen - 1))
             {
                 return false; // don't stem
             }

             // first check the stemmer dictionaries, and avoid using the
             // cache if it's in there.
             DictEntry entry = dict_ht.ContainsKey(term) ? dict_ht[term] : null;
             if (entry != null)
             {
                 if (entry.root != null)
                 {
                     result = entry.root;
                     return true;
                 }
                 return false;
             }

             /***
              * caching off is normally faster if (cache == null) initializeStemHash();
              *
              * // now check the cache, before we copy chars to "word" if (cache != null)
              * { String val = cache.get(term, 0, len); if (val != null) { if (val !=
              * SAME) { result = val; return true; } return false; } }
              ***/

             word.reset();
             // allocate enough space so that an expansion is never needed
             word.reserve(len + 10);
             for (int i = 0; i < len; i++)
             {
                 char ch = term[i];
                 if (!isAlpha(ch)) return false; // don't stem
                 // don't lowercase... it's a requirement that lowercase filter be
                 // used before this stemmer.
                 word.unsafeWrite(ch);
             }

             matchedEntry = null;
             /***
              * lookups.clear(); lookups.add(word.toString());
              ***/

             /*
              * This while loop will never be executed more than one time; it is here
              * only to allow the break statement to be used to escape as soon as a word
              * is recognized
              */
             while (true)
             {
                 // YCS: extra lookup()s were inserted so we don't need to
                 // do an extra wordInDict() here.
                 plural();
                 if (matched()) break;
                 pastTense();
                 if (matched()) break;
                 aspect();
                 if (matched()) break;
                 ityEndings();
                 if (matched()) break;
                 nessEndings();
                 if (matched()) break;
                 ionEndings();
                 if (matched()) break;
                 erAndOrEndings();
                 if (matched()) break;
                 lyEndings();
                 if (matched()) break;
                 alEndings();
                 if (matched()) break;
                 entry = wordInDict();
                 iveEndings();
                 if (matched()) break;
                 izeEndings();
                 if (matched()) break;
                 mentEndings();
                 if (matched()) break;
                 bleEndings();
                 if (matched()) break;
                 ismEndings();
                 if (matched()) break;
                 icEndings();
                 if (matched()) break;
                 ncyEndings();
                 if (matched()) break;
                 nceEndings();
                 matched();
                 break;
             }

             /*
              * try for a direct mapping (allows for cases like `Italian'->`Italy' and
              * `Italians'->`Italy')
              */
             entry = matchedEntry;
             if (entry != null)
             {
                 result = entry.root; // may be null, which means that "word" is the stem
             }

             /***
              * caching off is normally faster if (cache != null && cache.size() <
              * maxCacheSize) { char[] key = new char[len]; System.arraycopy(term, 0,
              * key, 0, len); if (result != null) { cache.put(key, result); } else {
              * cache.put(key, word.toString()); } }
              ***/

             /***
              * if (entry == null) { if (!word.toString().equals(new String(term,0,len)))
              * { System.out.println("CASE:" + word.toString() + "," + new
              * String(term,0,len));
              *
              * } }
              ***/

             // no entry matched means result is "word"
             return true;
         }
     }

     internal class OpenStringBuilder
     {
         protected char[] buf;
         protected int len;

         public OpenStringBuilder()
             : this(32)
         {
         }

         public OpenStringBuilder(int size)
         {
             buf = new char[size];
         }

         public OpenStringBuilder(char[] arr, int len)
         {
             set(arr, len);
         }

         public void setLength(int len) { this.len = len; }

         public void set(char[] arr, int end)
         {
             this.buf = arr;
             this.len = end;
         }

         public char[] getArray() { return buf; }
         public int size() { return len; }
         public int length() { return len; }
         public int capacity() { return buf.Length; }

         public OpenStringBuilder append(String csq)
         {
             return append(csq, 0, csq.Length);
         }

         public OpenStringBuilder append(String csq, int start, int end)
         {
             reserve(end - start);
             for (int i = start; i < end; i++)
             {
                 unsafeWrite(csq[i]);
             }
             return this;
         }

         public OpenStringBuilder append(char c)
         {
             write(c);
             return this;
         }

         public char charAt(int index)
         {
             return buf[index];
         }

         public void setCharAt(int index, char ch)
         {
             buf[index] = ch;
         }

         public void unsafeWrite(char b)
         {
             buf[len++] = b;
         }

         public void unsafeWrite(int b) { unsafeWrite((char)b); }

         public void unsafeWrite(char[] b, int off, int len)
         {
             Array.Copy(b, off, buf, this.len, len);
             this.len += len;
         }

         protected void resize(int len)
         {
             char[] newbuf = new char[Math.Max(buf.Length << 1, len)];
             Array.Copy(buf, newbuf, size());
             buf = newbuf;
         }

         public void reserve(int num)
         {
             if (len + num > buf.Length) resize(len + num);
         }

         public void write(char b)
         {
             if (len >= buf.Length)
             {
                 resize(len + 1);
             }
             unsafeWrite(b);
         }

         public void write(int b) { write((char)b); }

         public void write(char[] b)
         {
             write(b, 0, b.Length);
         }

         public void write(char[] b, int off, int len)
         {
             reserve(len);
             unsafeWrite(b, off, len);
         }

         public void write(OpenStringBuilder arr)
         {
             write(arr.buf, 0, len);
         }

         //  public void write(String s) {
         //    reserve(s.Length);
         //    s.GetChars(0,s.Length,buf, len);
         //    len +=s.length();
         //  }

         public void flush()
         {
         }

         public void reset()
         {
             len = 0;
         }

         public char[] toCharArray()
         {
             char[] newbuf = new char[size()];
             Array.Copy(buf, newbuf, size());
             return newbuf;
         }

         public String toString()
         {
             return new String(buf, 0, size());
         }
     }
 }