lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemmer.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.analysis.en;


 /*

    Porter stemmer in Java. The original paper is in

        Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
        no. 3, pp 130-137,

    See also http://www.tartarus.org/~martin/PorterStemmer/index.html

    Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
    Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
    is then out outside the bounds of b.

    Similarly,

    Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
    'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
    b[j] is then outside the bounds of b.

    Release 3.

    [ This version is derived from Release 3, modified by Brian Goetz to
      optimize for fewer object creations.  ]

 */


 import org.apache.lucene.util.ArrayUtil;

 /**
  *
  * Stemmer, implementing the Porter Stemming Algorithm
  *
  * The Stemmer class transforms a word into its root form.  The input
  * word can be provided a character at time (by calling add()), or at once
  * by calling one of the various stem(something) methods.
  */

 class PorterStemmer
 {
   private char[] b;
   private int i,    /* offset into b */
     j, k, k0;
   private boolean dirty = false;
   private static final int INITIAL_SIZE = 50;

   public PorterStemmer() {
     b = new char[INITIAL_SIZE];
     i = 0;
   }

   /**
    * reset() resets the stemmer so it can stem another word.  If you invoke
    * the stemmer by calling add(char) and then stem(), you must call reset()
    * before starting another word.
    */
   public void reset() { i = 0; dirty = false; }

   /**
    * Add a character to the word being stemmed.  When you are finished
    * adding characters, you can call stem(void) to process the word.
    */
   public void add(char ch) {
     if (b.length <= i) {
       b = ArrayUtil.grow(b, i+1);
     }
     b[i++] = ch;
   }

   /**
    * After a word has been stemmed, it can be retrieved by toString(),
    * or a reference to the internal buffer can be retrieved by getResultBuffer
    * and getResultLength (which is generally more efficient.)
    */
   @Override
   public String toString() { return new String(b,0,i); }

   /**
    * Returns the length of the word resulting from the stemming process.
    */
   public int getResultLength() { return i; }

   /**
    * Returns a reference to a character buffer containing the results of
    * the stemming process.  You also need to consult getResultLength()
    * to determine the length of the result.
    */
   public char[] getResultBuffer() { return b; }

   /* cons(i) is true <=> b[i] is a consonant. */

   private final boolean cons(int i) {
     switch (b[i]) {
     case 'a': case 'e': case 'i': case 'o': case 'u':
       return false;
     case 'y':
       return (i==k0) ? true : !cons(i-1);
     default:
       return true;
     }
   }

   /* m() measures the number of consonant sequences between k0 and j. if c is
      a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
      presence,

           <c><v>       gives 0
           <c>vc<v>     gives 1
           <c>vcvc<v>   gives 2
           <c>vcvcvc<v> gives 3
           ....
   */

   private final int m() {
     int n = 0;
     int i = k0;
     while(true) {
       if (i > j)
         return n;
       if (! cons(i))
         break;
       i++;
     }
     i++;
     while(true) {
       while(true) {
         if (i > j)
           return n;
         if (cons(i))
           break;
         i++;
       }
       i++;
       n++;
       while(true) {
         if (i > j)
           return n;
         if (! cons(i))
           break;
         i++;
       }
       i++;
     }
   }

   /* vowelinstem() is true <=> k0,...j contains a vowel */

   private final boolean vowelinstem() {
     int i;
     for (i = k0; i <= j; i++)
       if (! cons(i))
         return true;
     return false;
   }

   /* doublec(j) is true <=> j,(j-1) contain a double consonant. */

   private final boolean doublec(int j) {
     if (j < k0+1)
       return false;
     if (b[j] != b[j-1])
       return false;
     return cons(j);
   }

   /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
      and also if the second c is not w,x or y. this is used when trying to
      restore an e at the end of a short word. e.g.

           cav(e), lov(e), hop(e), crim(e), but
           snow, box, tray.

   */

   private final boolean cvc(int i) {
     if (i < k0+2 || !cons(i) || cons(i-1) || !cons(i-2))
       return false;
     else {
       int ch = b[i];
       if (ch == 'w' || ch == 'x' || ch == 'y') return false;
     }
     return true;
   }

   private final boolean ends(String s) {
     int l = s.length();
     int o = k-l+1;
     if (o < k0)
       return false;
     for (int i = 0; i < l; i++)
       if (b[o+i] != s.charAt(i))
         return false;
     j = k-l;
     return true;
   }

   /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
      k. */

   void setto(String s) {
     int l = s.length();
     int o = j+1;
     for (int i = 0; i < l; i++)
       b[o+i] = s.charAt(i);
     k = j+l;
     dirty = true;
   }

   /* r(s) is used further down. */

   void r(String s) { if (m() > 0) setto(s); }

   /* step1() gets rid of plurals and -ed or -ing. e.g.

            caresses  ->  caress
            ponies    ->  poni
            ties      ->  ti
            caress    ->  caress
            cats      ->  cat

            feed      ->  feed
            agreed    ->  agree
            disabled  ->  disable

            matting   ->  mat
            mating    ->  mate
            meeting   ->  meet
            milling   ->  mill
            messing   ->  mess

            meetings  ->  meet

   */

   private final void step1() {
     if (b[k] == 's') {
       if (ends("sses")) k -= 2;
       else if (ends("ies")) setto("i");
       else if (b[k-1] != 's') k--;
     }
     if (ends("eed")) {
       if (m() > 0)
         k--;
     }
     else if ((ends("ed") || ends("ing")) && vowelinstem()) {
       k = j;
       if (ends("at")) setto("ate");
       else if (ends("bl")) setto("ble");
       else if (ends("iz")) setto("ize");
       else if (doublec(k)) {
         int ch = b[k--];
         if (ch == 'l' || ch == 's' || ch == 'z')
           k++;
       }
       else if (m() == 1 && cvc(k))
         setto("e");
     }
   }

   /* step2() turns terminal y to i when there is another vowel in the stem. */

   private final void step2() {
     if (ends("y") && vowelinstem()) {
       b[k] = 'i';
       dirty = true;
     }
   }

   /* step3() maps double suffices to single ones. so -ization ( = -ize plus
      -ation) maps to -ize etc. note that the string before the suffix must give
      m() > 0. */

   private final void step3() {
     if (k == k0) return; /* For Bug 1 */
     switch (b[k-1]) {
     case 'a':
       if (ends("ational")) { r("ate"); break; }
       if (ends("tional")) { r("tion"); break; }
       break;
     case 'c':
       if (ends("enci")) { r("ence"); break; }
       if (ends("anci")) { r("ance"); break; }
       break;
     case 'e':
       if (ends("izer")) { r("ize"); break; }
       break;
     case 'l':
       if (ends("bli")) { r("ble"); break; }
       if (ends("alli")) { r("al"); break; }
       if (ends("entli")) { r("ent"); break; }
       if (ends("eli")) { r("e"); break; }
       if (ends("ousli")) { r("ous"); break; }
       break;
     case 'o':
       if (ends("ization")) { r("ize"); break; }
       if (ends("ation")) { r("ate"); break; }
       if (ends("ator")) { r("ate"); break; }
       break;
     case 's':
       if (ends("alism")) { r("al"); break; }
       if (ends("iveness")) { r("ive"); break; }
       if (ends("fulness")) { r("ful"); break; }
       if (ends("ousness")) { r("ous"); break; }
       break;
     case 't':
       if (ends("aliti")) { r("al"); break; }
       if (ends("iviti")) { r("ive"); break; }
       if (ends("biliti")) { r("ble"); break; }
       break;
     case 'g':
       if (ends("logi")) { r("log"); break; }
     }
   }

   /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */

   private final void step4() {
     switch (b[k]) {
     case 'e':
       if (ends("icate")) { r("ic"); break; }
       if (ends("ative")) { r(""); break; }
       if (ends("alize")) { r("al"); break; }
       break;
     case 'i':
       if (ends("iciti")) { r("ic"); break; }
       break;
     case 'l':
       if (ends("ical")) { r("ic"); break; }
       if (ends("ful")) { r(""); break; }
       break;
     case 's':
       if (ends("ness")) { r(""); break; }
       break;
     }
   }

   /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */

   private final void step5() {
     if (k == k0) return; /* for Bug 1 */
     switch (b[k-1]) {
     case 'a':
       if (ends("al")) break;
       return;
     case 'c':
       if (ends("ance")) break;
       if (ends("ence")) break;
       return;
     case 'e':
       if (ends("er")) break; return;
     case 'i':
       if (ends("ic")) break; return;
     case 'l':
       if (ends("able")) break;
       if (ends("ible")) break; return;
     case 'n':
       if (ends("ant")) break;
       if (ends("ement")) break;
       if (ends("ment")) break;
       /* element etc. not stripped before the m */
       if (ends("ent")) break;
       return;
     case 'o':
       if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break;
       /* j >= 0 fixes Bug 2 */
       if (ends("ou")) break;
       return;
       /* takes care of -ous */
     case 's':
       if (ends("ism")) break;
       return;
     case 't':
       if (ends("ate")) break;
       if (ends("iti")) break;
       return;
     case 'u':
       if (ends("ous")) break;
       return;
     case 'v':
       if (ends("ive")) break;
       return;
     case 'z':
       if (ends("ize")) break;
       return;
     default:
       return;
     }
     if (m() > 1)
       k = j;
   }

   /* step6() removes a final -e if m() > 1. */

   private final void step6() {
     j = k;
     if (b[k] == 'e') {
       int a = m();
       if (a > 1 || a == 1 && !cvc(k-1))
         k--;
     }
     if (b[k] == 'l' && doublec(k) && m() > 1)
       k--;
   }


   /**
    * Stem a word provided as a String.  Returns the result as a String.
    */
   public String stem(String s) {
     if (stem(s.toCharArray(), s.length()))
       return toString();
     else
       return s;
   }

   /** Stem a word contained in a char[].  Returns true if the stemming process
    * resulted in a word different from the input.  You can retrieve the
    * result with getResultLength()/getResultBuffer() or toString().
    */
   public boolean stem(char[] word) {
     return stem(word, word.length);
   }

   /** Stem a word contained in a portion of a char[] array.  Returns
    * true if the stemming process resulted in a word different from
    * the input.  You can retrieve the result with
    * getResultLength()/getResultBuffer() or toString().
    */
   public boolean stem(char[] wordBuffer, int offset, int wordLen) {
     reset();
     if (b.length < wordLen) {
       b = new char[ArrayUtil.oversize(wordLen, Character.BYTES)];
     }
     System.arraycopy(wordBuffer, offset, b, 0, wordLen);
     i = wordLen;
     return stem(0);
   }

   /** Stem a word contained in a leading portion of a char[] array.
    * Returns true if the stemming process resulted in a word different
    * from the input.  You can retrieve the result with
    * getResultLength()/getResultBuffer() or toString().
    */
   public boolean stem(char[] word, int wordLen) {
     return stem(word, 0, wordLen);
   }

   /** Stem the word placed into the Stemmer buffer through calls to add().
    * Returns true if the stemming process resulted in a word different
    * from the input.  You can retrieve the result with
    * getResultLength()/getResultBuffer() or toString().
    */
   public boolean stem() {
     return stem(0);
   }

   public boolean stem(int i0) {
     k = i - 1;
     k0 = i0;
     if (k > k0+1) {
       step1(); step2(); step3(); step4(); step5(); step6();
     }
     // Also, a word is considered dirty if we lopped off letters
     // Thanks to Ifigenia Vairelles for pointing this out.
     if (i != k+1)
       dirty = true;
     i = k+1;
     return dirty;
   }

   /* Test program for demonstrating the Stemmer.  It reads a file and
    * stems each word, writing the result to standard out.
    * Usage: Stemmer file-name
   public static void main(String[] args) {
     PorterStemmer s = new PorterStemmer();

     for (int i = 0; i < args.length; i++) {
       try {
         InputStream in = new FileInputStream(args[i]);
         byte[] buffer = new byte[1024];
         int bufferLen, offset, ch;

         bufferLen = in.read(buffer);
         offset = 0;
         s.reset();

         while(true) {
           if (offset < bufferLen)
             ch = buffer[offset++];
           else {
             bufferLen = in.read(buffer);
             offset = 0;
             if (bufferLen < 0)
               ch = -1;
             else
               ch = buffer[offset++];
           }

           if (Character.isLetter((char) ch)) {
             s.add(Character.toLowerCase((char) ch));
           }
           else {
              s.stem();
              System.out.print(s.toString());
              s.reset();
              if (ch < 0)
                break;
              else {
                System.out.print((char) ch);
              }
            }
         }

         in.close();
       }
       catch (IOException e) {
         System.out.println("error reading " + args[i]);
       }
     }
   }*/
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.analysis.en;


	/*

	Porter stemmer in Java. The original paper is in

	Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
	no. 3, pp 130-137,

	See also http://www.tartarus.org/~martin/PorterStemmer/index.html

	Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
	Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
	is then out outside the bounds of b.

	Similarly,

	Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
	'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
	b[j] is then outside the bounds of b.

	Release 3.

	[ This version is derived from Release 3, modified by Brian Goetz to
	optimize for fewer object creations. ]

	*/


	import org.apache.lucene.util.ArrayUtil;

	/**
	*
	* Stemmer, implementing the Porter Stemming Algorithm
	*
	* The Stemmer class transforms a word into its root form. The input
	* word can be provided a character at time (by calling add()), or at once
	* by calling one of the various stem(something) methods.
	*/

	class PorterStemmer
	{
	private char[] b;
	private int i, /* offset into b */
	j, k, k0;
	private boolean dirty = false;
	private static final int INITIAL_SIZE = 50;

	public PorterStemmer() {
	b = new char[INITIAL_SIZE];
	i = 0;
	}

	/**
	* reset() resets the stemmer so it can stem another word. If you invoke
	* the stemmer by calling add(char) and then stem(), you must call reset()
	* before starting another word.
	*/
	public void reset() { i = 0; dirty = false; }

	/**
	* Add a character to the word being stemmed. When you are finished
	* adding characters, you can call stem(void) to process the word.
	*/
	public void add(char ch) {
	if (b.length <= i) {
	b = ArrayUtil.grow(b, i+1);
	}
	b[i++] = ch;
	}

	/**
	* After a word has been stemmed, it can be retrieved by toString(),
	* or a reference to the internal buffer can be retrieved by getResultBuffer
	* and getResultLength (which is generally more efficient.)
	*/
	@Override
	public String toString() { return new String(b,0,i); }

	/**
	* Returns the length of the word resulting from the stemming process.
	*/
	public int getResultLength() { return i; }

	/**
	* Returns a reference to a character buffer containing the results of
	* the stemming process. You also need to consult getResultLength()
	* to determine the length of the result.
	*/
	public char[] getResultBuffer() { return b; }

	/* cons(i) is true <=> b[i] is a consonant. */

	private final boolean cons(int i) {
	switch (b[i]) {
	case 'a': case 'e': case 'i': case 'o': case 'u':
	return false;
	case 'y':
	return (i==k0) ? true : !cons(i-1);
	default:
	return true;
	}
	}

	/* m() measures the number of consonant sequences between k0 and j. if c is
	a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
	presence,

	<c><v> gives 0
	<c>vc<v> gives 1
	<c>vcvc<v> gives 2
	<c>vcvcvc<v> gives 3
	....
	*/

	private final int m() {
	int n = 0;
	int i = k0;
	while(true) {
	if (i > j)
	return n;
	if (! cons(i))
	break;
	i++;
	}
	i++;
	while(true) {
	while(true) {
	if (i > j)
	return n;
	if (cons(i))
	break;
	i++;
	}
	i++;
	n++;
	while(true) {
	if (i > j)
	return n;
	if (! cons(i))
	break;
	i++;
	}
	i++;
	}
	}

	/* vowelinstem() is true <=> k0,...j contains a vowel */

	private final boolean vowelinstem() {
	int i;
	for (i = k0; i <= j; i++)
	if (! cons(i))
	return true;
	return false;
	}

	/* doublec(j) is true <=> j,(j-1) contain a double consonant. */

	private final boolean doublec(int j) {
	if (j < k0+1)
	return false;
	if (b[j] != b[j-1])
	return false;
	return cons(j);
	}

	/* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
	and also if the second c is not w,x or y. this is used when trying to
	restore an e at the end of a short word. e.g.

	cav(e), lov(e), hop(e), crim(e), but
	snow, box, tray.

	*/

	private final boolean cvc(int i) {
	if (i < k0+2 \|\| !cons(i) \|\| cons(i-1) \|\| !cons(i-2))
	return false;
	else {
	int ch = b[i];
	if (ch == 'w' \|\| ch == 'x' \|\| ch == 'y') return false;
	}
	return true;
	}

	private final boolean ends(String s) {
	int l = s.length();
	int o = k-l+1;
	if (o < k0)
	return false;
	for (int i = 0; i < l; i++)
	if (b[o+i] != s.charAt(i))
	return false;
	j = k-l;
	return true;
	}

	/* setto(s) sets (j+1),...k to the characters in the string s, readjusting
	k. */

	void setto(String s) {
	int l = s.length();
	int o = j+1;
	for (int i = 0; i < l; i++)
	b[o+i] = s.charAt(i);
	k = j+l;
	dirty = true;
	}

	/* r(s) is used further down. */

	void r(String s) { if (m() > 0) setto(s); }

	/* step1() gets rid of plurals and -ed or -ing. e.g.

	caresses -> caress
	ponies -> poni
	ties -> ti
	caress -> caress
	cats -> cat

	feed -> feed
	agreed -> agree
	disabled -> disable

	matting -> mat
	mating -> mate
	meeting -> meet
	milling -> mill
	messing -> mess

	meetings -> meet

	*/

	private final void step1() {
	if (b[k] == 's') {
	if (ends("sses")) k -= 2;
	else if (ends("ies")) setto("i");
	else if (b[k-1] != 's') k--;
	}
	if (ends("eed")) {
	if (m() > 0)
	k--;
	}
	else if ((ends("ed") \|\| ends("ing")) && vowelinstem()) {
	k = j;
	if (ends("at")) setto("ate");
	else if (ends("bl")) setto("ble");
	else if (ends("iz")) setto("ize");
	else if (doublec(k)) {
	int ch = b[k--];
	if (ch == 'l' \|\| ch == 's' \|\| ch == 'z')
	k++;
	}
	else if (m() == 1 && cvc(k))
	setto("e");
	}
	}

	/* step2() turns terminal y to i when there is another vowel in the stem. */

	private final void step2() {
	if (ends("y") && vowelinstem()) {
	b[k] = 'i';
	dirty = true;
	}
	}

	/* step3() maps double suffices to single ones. so -ization ( = -ize plus
	-ation) maps to -ize etc. note that the string before the suffix must give
	m() > 0. */

	private final void step3() {
	if (k == k0) return; /* For Bug 1 */
	switch (b[k-1]) {
	case 'a':
	if (ends("ational")) { r("ate"); break; }
	if (ends("tional")) { r("tion"); break; }
	break;
	case 'c':
	if (ends("enci")) { r("ence"); break; }
	if (ends("anci")) { r("ance"); break; }
	break;
	case 'e':
	if (ends("izer")) { r("ize"); break; }
	break;
	case 'l':
	if (ends("bli")) { r("ble"); break; }
	if (ends("alli")) { r("al"); break; }
	if (ends("entli")) { r("ent"); break; }
	if (ends("eli")) { r("e"); break; }
	if (ends("ousli")) { r("ous"); break; }
	break;
	case 'o':
	if (ends("ization")) { r("ize"); break; }
	if (ends("ation")) { r("ate"); break; }
	if (ends("ator")) { r("ate"); break; }
	break;
	case 's':
	if (ends("alism")) { r("al"); break; }
	if (ends("iveness")) { r("ive"); break; }
	if (ends("fulness")) { r("ful"); break; }
	if (ends("ousness")) { r("ous"); break; }
	break;
	case 't':
	if (ends("aliti")) { r("al"); break; }
	if (ends("iviti")) { r("ive"); break; }
	if (ends("biliti")) { r("ble"); break; }
	break;
	case 'g':
	if (ends("logi")) { r("log"); break; }
	}
	}

	/* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */

	private final void step4() {
	switch (b[k]) {
	case 'e':
	if (ends("icate")) { r("ic"); break; }
	if (ends("ative")) { r(""); break; }
	if (ends("alize")) { r("al"); break; }
	break;
	case 'i':
	if (ends("iciti")) { r("ic"); break; }
	break;
	case 'l':
	if (ends("ical")) { r("ic"); break; }
	if (ends("ful")) { r(""); break; }
	break;
	case 's':
	if (ends("ness")) { r(""); break; }
	break;
	}
	}

	/* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */

	private final void step5() {
	if (k == k0) return; /* for Bug 1 */
	switch (b[k-1]) {
	case 'a':
	if (ends("al")) break;
	return;
	case 'c':
	if (ends("ance")) break;
	if (ends("ence")) break;
	return;
	case 'e':
	if (ends("er")) break; return;
	case 'i':
	if (ends("ic")) break; return;
	case 'l':
	if (ends("able")) break;
	if (ends("ible")) break; return;
	case 'n':
	if (ends("ant")) break;
	if (ends("ement")) break;
	if (ends("ment")) break;
	/* element etc. not stripped before the m */
	if (ends("ent")) break;
	return;
	case 'o':
	if (ends("ion") && j >= 0 && (b[j] == 's' \|\| b[j] == 't')) break;
	/* j >= 0 fixes Bug 2 */
	if (ends("ou")) break;
	return;
	/* takes care of -ous */
	case 's':
	if (ends("ism")) break;
	return;
	case 't':
	if (ends("ate")) break;
	if (ends("iti")) break;
	return;
	case 'u':
	if (ends("ous")) break;
	return;
	case 'v':
	if (ends("ive")) break;
	return;
	case 'z':
	if (ends("ize")) break;
	return;
	default:
	return;
	}
	if (m() > 1)
	k = j;
	}

	/* step6() removes a final -e if m() > 1. */

	private final void step6() {
	j = k;
	if (b[k] == 'e') {
	int a = m();
	if (a > 1 \|\| a == 1 && !cvc(k-1))
	k--;
	}
	if (b[k] == 'l' && doublec(k) && m() > 1)
	k--;
	}


	/**
	* Stem a word provided as a String. Returns the result as a String.
	*/
	public String stem(String s) {
	if (stem(s.toCharArray(), s.length()))
	return toString();
	else
	return s;
	}

	/** Stem a word contained in a char[]. Returns true if the stemming process
	* resulted in a word different from the input. You can retrieve the
	* result with getResultLength()/getResultBuffer() or toString().
	*/
	public boolean stem(char[] word) {
	return stem(word, word.length);
	}

	/** Stem a word contained in a portion of a char[] array. Returns
	* true if the stemming process resulted in a word different from
	* the input. You can retrieve the result with
	* getResultLength()/getResultBuffer() or toString().
	*/
	public boolean stem(char[] wordBuffer, int offset, int wordLen) {
	reset();
	if (b.length < wordLen) {
	b = new char[ArrayUtil.oversize(wordLen, Character.BYTES)];
	}
	System.arraycopy(wordBuffer, offset, b, 0, wordLen);
	i = wordLen;
	return stem(0);
	}

	/** Stem a word contained in a leading portion of a char[] array.
	* Returns true if the stemming process resulted in a word different
	* from the input. You can retrieve the result with
	* getResultLength()/getResultBuffer() or toString().
	*/
	public boolean stem(char[] word, int wordLen) {
	return stem(word, 0, wordLen);
	}

	/** Stem the word placed into the Stemmer buffer through calls to add().
	* Returns true if the stemming process resulted in a word different
	* from the input. You can retrieve the result with
	* getResultLength()/getResultBuffer() or toString().
	*/
	public boolean stem() {
	return stem(0);
	}

	public boolean stem(int i0) {
	k = i - 1;
	k0 = i0;
	if (k > k0+1) {
	step1(); step2(); step3(); step4(); step5(); step6();
	}
	// Also, a word is considered dirty if we lopped off letters
	// Thanks to Ifigenia Vairelles for pointing this out.
	if (i != k+1)
	dirty = true;
	i = k+1;
	return dirty;
	}

	/* Test program for demonstrating the Stemmer. It reads a file and
	* stems each word, writing the result to standard out.
	* Usage: Stemmer file-name
	public static void main(String[] args) {
	PorterStemmer s = new PorterStemmer();

	for (int i = 0; i < args.length; i++) {
	try {
	InputStream in = new FileInputStream(args[i]);
	byte[] buffer = new byte[1024];
	int bufferLen, offset, ch;

	bufferLen = in.read(buffer);
	offset = 0;
	s.reset();

	while(true) {
	if (offset < bufferLen)
	ch = buffer[offset++];
	else {
	bufferLen = in.read(buffer);
	offset = 0;
	if (bufferLen < 0)
	ch = -1;
	else
	ch = buffer[offset++];
	}

	if (Character.isLetter((char) ch)) {
	s.add(Character.toLowerCase((char) ch));
	}
	else {
	s.stem();
	System.out.print(s.toString());
	s.reset();
	if (ch < 0)
	break;
	else {
	System.out.print((char) ch);
	}
	}
	}

	in.close();
	}
	catch (IOException e) {
	System.out.println("error reading " + args[i]);
	}
	}
	}*/
	}