lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemmer.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.analysis.en;

 /*

    Porter stemmer in Java. The original paper is in

        Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
        no. 3, pp 130-137,

    See also http://www.tartarus.org/~martin/PorterStemmer/index.html

    Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
    Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
    is then out outside the bounds of b.

    Similarly,

    Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
    'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
    b[j] is then outside the bounds of b.

    Release 3.

    [ This version is derived from Release 3, modified by Brian Goetz to
      optimize for fewer object creations.  ]

 */

 import org.apache.lucene.util.ArrayUtil;

 /**
  * Stemmer, implementing the Porter Stemming Algorithm
  *
  * <p>The Stemmer class transforms a word into its root form. The input word can be provided a
  * character at time (by calling add()), or at once by calling one of the various stem(something)
  * methods.
  */
 class PorterStemmer {
   private char[] b;
   private int i, /* offset into b */ j, k, k0;
   private boolean dirty = false;
   private static final int INITIAL_SIZE = 50;

   public PorterStemmer() {
     b = new char[INITIAL_SIZE];
     i = 0;
   }

   /**
    * reset() resets the stemmer so it can stem another word. If you invoke the stemmer by calling
    * add(char) and then stem(), you must call reset() before starting another word.
    */
   public void reset() {
     i = 0;
     dirty = false;
   }

   /**
    * Add a character to the word being stemmed. When you are finished adding characters, you can
    * call stem(void) to process the word.
    */
   public void add(char ch) {
     if (b.length <= i) {
       b = ArrayUtil.grow(b, i + 1);
     }
     b[i++] = ch;
   }

   /**
    * After a word has been stemmed, it can be retrieved by toString(), or a reference to the
    * internal buffer can be retrieved by getResultBuffer and getResultLength (which is generally
    * more efficient.)
    */
   @Override
   public String toString() {
     return new String(b, 0, i);
   }

   /** Returns the length of the word resulting from the stemming process. */
   public int getResultLength() {
     return i;
   }

   /**
    * Returns a reference to a character buffer containing the results of the stemming process. You
    * also need to consult getResultLength() to determine the length of the result.
    */
   public char[] getResultBuffer() {
     return b;
   }

   /* cons(i) is true <=> b[i] is a consonant. */

   private final boolean cons(int i) {
     switch (b[i]) {
       case 'a':
       case 'e':
       case 'i':
       case 'o':
       case 'u':
         return false;
       case 'y':
         return (i == k0) ? true : !cons(i - 1);
       default:
         return true;
     }
   }

   /* m() measures the number of consonant sequences between k0 and j. if c is
      a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
      presence,

           <c><v>       gives 0
           <c>vc<v>     gives 1
           <c>vcvc<v>   gives 2
           <c>vcvcvc<v> gives 3
           ....
   */

   private final int m() {
     int n = 0;
     int i = k0;
     while (true) {
       if (i > j) return n;
       if (!cons(i)) break;
       i++;
     }
     i++;
     while (true) {
       while (true) {
         if (i > j) return n;
         if (cons(i)) break;
         i++;
       }
       i++;
       n++;
       while (true) {
         if (i > j) return n;
         if (!cons(i)) break;
         i++;
       }
       i++;
     }
   }

   /* vowelinstem() is true <=> k0,...j contains a vowel */

   private final boolean vowelinstem() {
     int i;
     for (i = k0; i <= j; i++) if (!cons(i)) return true;
     return false;
   }

   /* doublec(j) is true <=> j,(j-1) contain a double consonant. */

   private final boolean doublec(int j) {
     if (j < k0 + 1) return false;
     if (b[j] != b[j - 1]) return false;
     return cons(j);
   }

   /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
      and also if the second c is not w,x or y. this is used when trying to
      restore an e at the end of a short word. e.g.

           cav(e), lov(e), hop(e), crim(e), but
           snow, box, tray.

   */

   private final boolean cvc(int i) {
     if (i < k0 + 2 || !cons(i) || cons(i - 1) || !cons(i - 2)) return false;
     else {
       int ch = b[i];
       if (ch == 'w' || ch == 'x' || ch == 'y') return false;
     }
     return true;
   }

   private final boolean ends(String s) {
     int l = s.length();
     int o = k - l + 1;
     if (o < k0) return false;
     for (int i = 0; i < l; i++) if (b[o + i] != s.charAt(i)) return false;
     j = k - l;
     return true;
   }

   /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
   k. */

   void setto(String s) {
     int l = s.length();
     int o = j + 1;
     for (int i = 0; i < l; i++) b[o + i] = s.charAt(i);
     k = j + l;
     dirty = true;
   }

   /* r(s) is used further down. */

   void r(String s) {
     if (m() > 0) setto(s);
   }

   /* step1() gets rid of plurals and -ed or -ing. e.g.

            caresses  ->  caress
            ponies    ->  poni
            ties      ->  ti
            caress    ->  caress
            cats      ->  cat

            feed      ->  feed
            agreed    ->  agree
            disabled  ->  disable

            matting   ->  mat
            mating    ->  mate
            meeting   ->  meet
            milling   ->  mill
            messing   ->  mess

            meetings  ->  meet

   */

   private final void step1() {
     if (b[k] == 's') {
       if (ends("sses")) k -= 2;
       else if (ends("ies")) setto("i");
       else if (b[k - 1] != 's') k--;
     }
     if (ends("eed")) {
       if (m() > 0) k--;
     } else if ((ends("ed") || ends("ing")) && vowelinstem()) {
       k = j;
       if (ends("at")) setto("ate");
       else if (ends("bl")) setto("ble");
       else if (ends("iz")) setto("ize");
       else if (doublec(k)) {
         int ch = b[k--];
         if (ch == 'l' || ch == 's' || ch == 'z') k++;
       } else if (m() == 1 && cvc(k)) setto("e");
     }
   }

   /* step2() turns terminal y to i when there is another vowel in the stem. */

   private final void step2() {
     if (ends("y") && vowelinstem()) {
       b[k] = 'i';
       dirty = true;
     }
   }

   /* step3() maps double suffices to single ones. so -ization ( = -ize plus
   -ation) maps to -ize etc. note that the string before the suffix must give
   m() > 0. */

   private final void step3() {
     if (k == k0) return; /* For Bug 1 */
     switch (b[k - 1]) {
       case 'a':
         if (ends("ational")) {
           r("ate");
           break;
         }
         if (ends("tional")) {
           r("tion");
           break;
         }
         break;
       case 'c':
         if (ends("enci")) {
           r("ence");
           break;
         }
         if (ends("anci")) {
           r("ance");
           break;
         }
         break;
       case 'e':
         if (ends("izer")) {
           r("ize");
           break;
         }
         break;
       case 'l':
         if (ends("bli")) {
           r("ble");
           break;
         }
         if (ends("alli")) {
           r("al");
           break;
         }
         if (ends("entli")) {
           r("ent");
           break;
         }
         if (ends("eli")) {
           r("e");
           break;
         }
         if (ends("ousli")) {
           r("ous");
           break;
         }
         break;
       case 'o':
         if (ends("ization")) {
           r("ize");
           break;
         }
         if (ends("ation")) {
           r("ate");
           break;
         }
         if (ends("ator")) {
           r("ate");
           break;
         }
         break;
       case 's':
         if (ends("alism")) {
           r("al");
           break;
         }
         if (ends("iveness")) {
           r("ive");
           break;
         }
         if (ends("fulness")) {
           r("ful");
           break;
         }
         if (ends("ousness")) {
           r("ous");
           break;
         }
         break;
       case 't':
         if (ends("aliti")) {
           r("al");
           break;
         }
         if (ends("iviti")) {
           r("ive");
           break;
         }
         if (ends("biliti")) {
           r("ble");
           break;
         }
         break;
       case 'g':
         if (ends("logi")) {
           r("log");
           break;
         }
     }
   }

   /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */

   private final void step4() {
     switch (b[k]) {
       case 'e':
         if (ends("icate")) {
           r("ic");
           break;
         }
         if (ends("ative")) {
           r("");
           break;
         }
         if (ends("alize")) {
           r("al");
           break;
         }
         break;
       case 'i':
         if (ends("iciti")) {
           r("ic");
           break;
         }
         break;
       case 'l':
         if (ends("ical")) {
           r("ic");
           break;
         }
         if (ends("ful")) {
           r("");
           break;
         }
         break;
       case 's':
         if (ends("ness")) {
           r("");
           break;
         }
         break;
     }
   }

   /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */

   private final void step5() {
     if (k == k0) return; /* for Bug 1 */
     switch (b[k - 1]) {
       case 'a':
         if (ends("al")) break;
         return;
       case 'c':
         if (ends("ance")) break;
         if (ends("ence")) break;
         return;
       case 'e':
         if (ends("er")) break;
         return;
       case 'i':
         if (ends("ic")) break;
         return;
       case 'l':
         if (ends("able")) break;
         if (ends("ible")) break;
         return;
       case 'n':
         if (ends("ant")) break;
         if (ends("ement")) break;
         if (ends("ment")) break;
         /* element etc. not stripped before the m */
         if (ends("ent")) break;
         return;
       case 'o':
         if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break;
         /* j >= 0 fixes Bug 2 */
         if (ends("ou")) break;
         return;
         /* takes care of -ous */
       case 's':
         if (ends("ism")) break;
         return;
       case 't':
         if (ends("ate")) break;
         if (ends("iti")) break;
         return;
       case 'u':
         if (ends("ous")) break;
         return;
       case 'v':
         if (ends("ive")) break;
         return;
       case 'z':
         if (ends("ize")) break;
         return;
       default:
         return;
     }
     if (m() > 1) k = j;
   }

   /* step6() removes a final -e if m() > 1. */

   private final void step6() {
     j = k;
     if (b[k] == 'e') {
       int a = m();
       if (a > 1 || a == 1 && !cvc(k - 1)) k--;
     }
     if (b[k] == 'l' && doublec(k) && m() > 1) k--;
   }

   /** Stem a word provided as a String. Returns the result as a String. */
   public String stem(String s) {
     if (stem(s.toCharArray(), s.length())) return toString();
     else return s;
   }

   /**
    * Stem a word contained in a char[]. Returns true if the stemming process resulted in a word
    * different from the input. You can retrieve the result with getResultLength()/getResultBuffer()
    * or toString().
    */
   public boolean stem(char[] word) {
     return stem(word, word.length);
   }

   /**
    * Stem a word contained in a portion of a char[] array. Returns true if the stemming process
    * resulted in a word different from the input. You can retrieve the result with
    * getResultLength()/getResultBuffer() or toString().
    */
   public boolean stem(char[] wordBuffer, int offset, int wordLen) {
     reset();
     if (b.length < wordLen) {
       b = new char[ArrayUtil.oversize(wordLen, Character.BYTES)];
     }
     System.arraycopy(wordBuffer, offset, b, 0, wordLen);
     i = wordLen;
     return stem(0);
   }

   /**
    * Stem a word contained in a leading portion of a char[] array. Returns true if the stemming
    * process resulted in a word different from the input. You can retrieve the result with
    * getResultLength()/getResultBuffer() or toString().
    */
   public boolean stem(char[] word, int wordLen) {
     return stem(word, 0, wordLen);
   }

   /**
    * Stem the word placed into the Stemmer buffer through calls to add(). Returns true if the
    * stemming process resulted in a word different from the input. You can retrieve the result with
    * getResultLength()/getResultBuffer() or toString().
    */
   public boolean stem() {
     return stem(0);
   }

   public boolean stem(int i0) {
     k = i - 1;
     k0 = i0;
     if (k > k0 + 1) {
       step1();
       step2();
       step3();
       step4();
       step5();
       step6();
     }
     // Also, a word is considered dirty if we lopped off letters
     // Thanks to Ifigenia Vairelles for pointing this out.
     if (i != k + 1) dirty = true;
     i = k + 1;
     return dirty;
   }

   /* Test program for demonstrating the Stemmer.  It reads a file and
    * stems each word, writing the result to standard out.
    * Usage: Stemmer file-name
   public static void main(String[] args) {
     PorterStemmer s = new PorterStemmer();

     for (int i = 0; i < args.length; i++) {
       try {
         InputStream in = new FileInputStream(args[i]);
         byte[] buffer = new byte[1024];
         int bufferLen, offset, ch;

         bufferLen = in.read(buffer);
         offset = 0;
         s.reset();

         while(true) {
           if (offset < bufferLen)
             ch = buffer[offset++];
           else {
             bufferLen = in.read(buffer);
             offset = 0;
             if (bufferLen < 0)
               ch = -1;
             else
               ch = buffer[offset++];
           }

           if (Character.isLetter((char) ch)) {
             s.add(Character.toLowerCase((char) ch));
           }
           else {
              s.stem();
              System.out.print(s.toString());
              s.reset();
              if (ch < 0)
                break;
              else {
                System.out.print((char) ch);
              }
            }
         }

         in.close();
       }
       catch (IOException e) {
         System.out.println("error reading " + args[i]);
       }
     }
   }*/
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.analysis.en;

	/*

	Porter stemmer in Java. The original paper is in

	Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
	no. 3, pp 130-137,

	See also http://www.tartarus.org/~martin/PorterStemmer/index.html

	Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
	Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
	is then out outside the bounds of b.

	Similarly,

	Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
	'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
	b[j] is then outside the bounds of b.

	Release 3.

	[ This version is derived from Release 3, modified by Brian Goetz to
	optimize for fewer object creations. ]

	*/

	import org.apache.lucene.util.ArrayUtil;

	/**
	* Stemmer, implementing the Porter Stemming Algorithm
	*
	* <p>The Stemmer class transforms a word into its root form. The input word can be provided a
	* character at time (by calling add()), or at once by calling one of the various stem(something)
	* methods.
	*/
	class PorterStemmer {
	private char[] b;
	private int i, /* offset into b */ j, k, k0;
	private boolean dirty = false;
	private static final int INITIAL_SIZE = 50;

	public PorterStemmer() {
	b = new char[INITIAL_SIZE];
	i = 0;
	}

	/**
	* reset() resets the stemmer so it can stem another word. If you invoke the stemmer by calling
	* add(char) and then stem(), you must call reset() before starting another word.
	*/
	public void reset() {
	i = 0;
	dirty = false;
	}

	/**
	* Add a character to the word being stemmed. When you are finished adding characters, you can
	* call stem(void) to process the word.
	*/
	public void add(char ch) {
	if (b.length <= i) {
	b = ArrayUtil.grow(b, i + 1);
	}
	b[i++] = ch;
	}

	/**
	* After a word has been stemmed, it can be retrieved by toString(), or a reference to the
	* internal buffer can be retrieved by getResultBuffer and getResultLength (which is generally
	* more efficient.)
	*/
	@Override
	public String toString() {
	return new String(b, 0, i);
	}

	/** Returns the length of the word resulting from the stemming process. */
	public int getResultLength() {
	return i;
	}

	/**
	* Returns a reference to a character buffer containing the results of the stemming process. You
	* also need to consult getResultLength() to determine the length of the result.
	*/
	public char[] getResultBuffer() {
	return b;
	}

	/* cons(i) is true <=> b[i] is a consonant. */

	private final boolean cons(int i) {
	switch (b[i]) {
	case 'a':
	case 'e':
	case 'i':
	case 'o':
	case 'u':
	return false;
	case 'y':
	return (i == k0) ? true : !cons(i - 1);
	default:
	return true;
	}
	}

	/* m() measures the number of consonant sequences between k0 and j. if c is
	a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
	presence,

	<c><v> gives 0
	<c>vc<v> gives 1
	<c>vcvc<v> gives 2
	<c>vcvcvc<v> gives 3
	....
	*/

	private final int m() {
	int n = 0;
	int i = k0;
	while (true) {
	if (i > j) return n;
	if (!cons(i)) break;
	i++;
	}
	i++;
	while (true) {
	while (true) {
	if (i > j) return n;
	if (cons(i)) break;
	i++;
	}
	i++;
	n++;
	while (true) {
	if (i > j) return n;
	if (!cons(i)) break;
	i++;
	}
	i++;
	}
	}

	/* vowelinstem() is true <=> k0,...j contains a vowel */

	private final boolean vowelinstem() {
	int i;
	for (i = k0; i <= j; i++) if (!cons(i)) return true;
	return false;
	}

	/* doublec(j) is true <=> j,(j-1) contain a double consonant. */

	private final boolean doublec(int j) {
	if (j < k0 + 1) return false;
	if (b[j] != b[j - 1]) return false;
	return cons(j);
	}

	/* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
	and also if the second c is not w,x or y. this is used when trying to
	restore an e at the end of a short word. e.g.

	cav(e), lov(e), hop(e), crim(e), but
	snow, box, tray.

	*/

	private final boolean cvc(int i) {
	if (i < k0 + 2 \|\| !cons(i) \|\| cons(i - 1) \|\| !cons(i - 2)) return false;
	else {
	int ch = b[i];
	if (ch == 'w' \|\| ch == 'x' \|\| ch == 'y') return false;
	}
	return true;
	}

	private final boolean ends(String s) {
	int l = s.length();
	int o = k - l + 1;
	if (o < k0) return false;
	for (int i = 0; i < l; i++) if (b[o + i] != s.charAt(i)) return false;
	j = k - l;
	return true;
	}

	/* setto(s) sets (j+1),...k to the characters in the string s, readjusting
	k. */

	void setto(String s) {
	int l = s.length();
	int o = j + 1;
	for (int i = 0; i < l; i++) b[o + i] = s.charAt(i);
	k = j + l;
	dirty = true;
	}

	/* r(s) is used further down. */

	void r(String s) {
	if (m() > 0) setto(s);
	}

	/* step1() gets rid of plurals and -ed or -ing. e.g.

	caresses -> caress
	ponies -> poni
	ties -> ti
	caress -> caress
	cats -> cat

	feed -> feed
	agreed -> agree
	disabled -> disable

	matting -> mat
	mating -> mate
	meeting -> meet
	milling -> mill
	messing -> mess

	meetings -> meet

	*/

	private final void step1() {
	if (b[k] == 's') {
	if (ends("sses")) k -= 2;
	else if (ends("ies")) setto("i");
	else if (b[k - 1] != 's') k--;
	}
	if (ends("eed")) {
	if (m() > 0) k--;
	} else if ((ends("ed") \|\| ends("ing")) && vowelinstem()) {
	k = j;
	if (ends("at")) setto("ate");
	else if (ends("bl")) setto("ble");
	else if (ends("iz")) setto("ize");
	else if (doublec(k)) {
	int ch = b[k--];
	if (ch == 'l' \|\| ch == 's' \|\| ch == 'z') k++;
	} else if (m() == 1 && cvc(k)) setto("e");
	}
	}

	/* step2() turns terminal y to i when there is another vowel in the stem. */

	private final void step2() {
	if (ends("y") && vowelinstem()) {
	b[k] = 'i';
	dirty = true;
	}
	}

	/* step3() maps double suffices to single ones. so -ization ( = -ize plus
	-ation) maps to -ize etc. note that the string before the suffix must give
	m() > 0. */

	private final void step3() {
	if (k == k0) return; /* For Bug 1 */
	switch (b[k - 1]) {
	case 'a':
	if (ends("ational")) {
	r("ate");
	break;
	}
	if (ends("tional")) {
	r("tion");
	break;
	}
	break;
	case 'c':
	if (ends("enci")) {
	r("ence");
	break;
	}
	if (ends("anci")) {
	r("ance");
	break;
	}
	break;
	case 'e':
	if (ends("izer")) {
	r("ize");
	break;
	}
	break;
	case 'l':
	if (ends("bli")) {
	r("ble");
	break;
	}
	if (ends("alli")) {
	r("al");
	break;
	}
	if (ends("entli")) {
	r("ent");
	break;
	}
	if (ends("eli")) {
	r("e");
	break;
	}
	if (ends("ousli")) {
	r("ous");
	break;
	}
	break;
	case 'o':
	if (ends("ization")) {
	r("ize");
	break;
	}
	if (ends("ation")) {
	r("ate");
	break;
	}
	if (ends("ator")) {
	r("ate");
	break;
	}
	break;
	case 's':
	if (ends("alism")) {
	r("al");
	break;
	}
	if (ends("iveness")) {
	r("ive");
	break;
	}
	if (ends("fulness")) {
	r("ful");
	break;
	}
	if (ends("ousness")) {
	r("ous");
	break;
	}
	break;
	case 't':
	if (ends("aliti")) {
	r("al");
	break;
	}
	if (ends("iviti")) {
	r("ive");
	break;
	}
	if (ends("biliti")) {
	r("ble");
	break;
	}
	break;
	case 'g':
	if (ends("logi")) {
	r("log");
	break;
	}
	}
	}

	/* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */

	private final void step4() {
	switch (b[k]) {
	case 'e':
	if (ends("icate")) {
	r("ic");
	break;
	}
	if (ends("ative")) {
	r("");
	break;
	}
	if (ends("alize")) {
	r("al");
	break;
	}
	break;
	case 'i':
	if (ends("iciti")) {
	r("ic");
	break;
	}
	break;
	case 'l':
	if (ends("ical")) {
	r("ic");
	break;
	}
	if (ends("ful")) {
	r("");
	break;
	}
	break;
	case 's':
	if (ends("ness")) {
	r("");
	break;
	}
	break;
	}
	}

	/* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */

	private final void step5() {
	if (k == k0) return; /* for Bug 1 */
	switch (b[k - 1]) {
	case 'a':
	if (ends("al")) break;
	return;
	case 'c':
	if (ends("ance")) break;
	if (ends("ence")) break;
	return;
	case 'e':
	if (ends("er")) break;
	return;
	case 'i':
	if (ends("ic")) break;
	return;
	case 'l':
	if (ends("able")) break;
	if (ends("ible")) break;
	return;
	case 'n':
	if (ends("ant")) break;
	if (ends("ement")) break;
	if (ends("ment")) break;
	/* element etc. not stripped before the m */
	if (ends("ent")) break;
	return;
	case 'o':
	if (ends("ion") && j >= 0 && (b[j] == 's' \|\| b[j] == 't')) break;
	/* j >= 0 fixes Bug 2 */
	if (ends("ou")) break;
	return;
	/* takes care of -ous */
	case 's':
	if (ends("ism")) break;
	return;
	case 't':
	if (ends("ate")) break;
	if (ends("iti")) break;
	return;
	case 'u':
	if (ends("ous")) break;
	return;
	case 'v':
	if (ends("ive")) break;
	return;
	case 'z':
	if (ends("ize")) break;
	return;
	default:
	return;
	}
	if (m() > 1) k = j;
	}

	/* step6() removes a final -e if m() > 1. */

	private final void step6() {
	j = k;
	if (b[k] == 'e') {
	int a = m();
	if (a > 1 \|\| a == 1 && !cvc(k - 1)) k--;
	}
	if (b[k] == 'l' && doublec(k) && m() > 1) k--;
	}

	/** Stem a word provided as a String. Returns the result as a String. */
	public String stem(String s) {
	if (stem(s.toCharArray(), s.length())) return toString();
	else return s;
	}

	/**
	* Stem a word contained in a char[]. Returns true if the stemming process resulted in a word
	* different from the input. You can retrieve the result with getResultLength()/getResultBuffer()
	* or toString().
	*/
	public boolean stem(char[] word) {
	return stem(word, word.length);
	}

	/**
	* Stem a word contained in a portion of a char[] array. Returns true if the stemming process
	* resulted in a word different from the input. You can retrieve the result with
	* getResultLength()/getResultBuffer() or toString().
	*/
	public boolean stem(char[] wordBuffer, int offset, int wordLen) {
	reset();
	if (b.length < wordLen) {
	b = new char[ArrayUtil.oversize(wordLen, Character.BYTES)];
	}
	System.arraycopy(wordBuffer, offset, b, 0, wordLen);
	i = wordLen;
	return stem(0);
	}

	/**
	* Stem a word contained in a leading portion of a char[] array. Returns true if the stemming
	* process resulted in a word different from the input. You can retrieve the result with
	* getResultLength()/getResultBuffer() or toString().
	*/
	public boolean stem(char[] word, int wordLen) {
	return stem(word, 0, wordLen);
	}

	/**
	* Stem the word placed into the Stemmer buffer through calls to add(). Returns true if the
	* stemming process resulted in a word different from the input. You can retrieve the result with
	* getResultLength()/getResultBuffer() or toString().
	*/
	public boolean stem() {
	return stem(0);
	}

	public boolean stem(int i0) {
	k = i - 1;
	k0 = i0;
	if (k > k0 + 1) {
	step1();
	step2();
	step3();
	step4();
	step5();
	step6();
	}
	// Also, a word is considered dirty if we lopped off letters
	// Thanks to Ifigenia Vairelles for pointing this out.
	if (i != k + 1) dirty = true;
	i = k + 1;
	return dirty;
	}

	/* Test program for demonstrating the Stemmer. It reads a file and
	* stems each word, writing the result to standard out.
	* Usage: Stemmer file-name
	public static void main(String[] args) {
	PorterStemmer s = new PorterStemmer();

	for (int i = 0; i < args.length; i++) {
	try {
	InputStream in = new FileInputStream(args[i]);
	byte[] buffer = new byte[1024];
	int bufferLen, offset, ch;

	bufferLen = in.read(buffer);
	offset = 0;
	s.reset();

	while(true) {
	if (offset < bufferLen)
	ch = buffer[offset++];
	else {
	bufferLen = in.read(buffer);
	offset = 0;
	if (bufferLen < 0)
	ch = -1;
	else
	ch = buffer[offset++];
	}

	if (Character.isLetter((char) ch)) {
	s.add(Character.toLowerCase((char) ch));
	}
	else {
	s.stem();
	System.out.print(s.toString());
	s.reset();
	if (ch < 0)
	break;
	else {
	System.out.print((char) ch);
	}
	}
	}

	in.close();
	}
	catch (IOException e) {
	System.out.println("error reading " + args[i]);
	}
	}
	}*/
	}