blob: 17d61489eae06a99b1c358c40e32d7515559d3f7 [file] [log] [blame]
package org.apache.lucene.analysis.de;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import java.io.IOException;
import java.util.Hashtable;
/**
* A filter that stemms german words. It supports a table of words that should
* not be stemmed at all.
*
* @author Gerhard Schwarz
* @version $Id$
*/
public final class GermanStemFilter extends TokenFilter {
/**
* The actual token in the input stream.
*/
private Token token = null;
private GermanStemmer stemmer = null;
private Hashtable exclusions = null;
public GermanStemFilter( TokenStream in ) {
stemmer = new GermanStemmer();
input = in;
}
/**
* Builds a GermanStemFilter that uses an exclusiontable.
*/
public GermanStemFilter( TokenStream in, Hashtable exclusiontable ) {
this( in );
this.exclusions = exclusions;
}
/**
* @return Returns the next token in the stream, or null at EOS.
*/
public final Token next()
throws IOException {
if ( ( token = input.next() ) == null ) {
return null;
}
// Check the exclusiontable.
else if ( exclusions != null && exclusions.contains( token.termText() ) ) {
return token;
}
else {
String s = stemmer.stem( token.termText() );
// If not stemmed, dont waste the time creating a new token.
if ( !s.equals( token.termText() ) ) {
return new Token( s, 0, s.length(), token.type() );
}
return token;
}
}
}