blob: 762d0f333958b122adf46363e90a4704529cf84b [file] [log] [blame]
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java (revision 1406601)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java (working copy)
@@ -57,13 +57,13 @@
public void testKeywordAttribute() throws IOException {
MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"), MockTokenizer.WHITESPACE, true);
tokenizer.setEnableChecks(true);
- HunspellStemFilter filter = new HunspellStemFilter(tokenizer, DICTIONARY);
+ HunspellStemFilter filter = new HunspellStemFilter(tokenizer, DICTIONARY, 2);
assertTokenStreamContents(filter, new String[]{"lucene", "lucen", "is", "awesome"}, new int[] {1, 0, 1, 1});
// assert with keywork marker
tokenizer = new MockTokenizer(new StringReader("lucene is awesome"), MockTokenizer.WHITESPACE, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true);
- filter = new HunspellStemFilter(new KeywordMarkerFilter(tokenizer, set), DICTIONARY);
+ filter = new HunspellStemFilter(new KeywordMarkerFilter(tokenizer, set), DICTIONARY, 2);
assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
}
@@ -74,7 +74,7 @@
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, DICTIONARY));
+ return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, DICTIONARY, 2));
}
};
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
@@ -85,7 +85,7 @@
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new KeywordTokenizer(reader);
- return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, DICTIONARY));
+ return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, DICTIONARY, 2));
}
};
checkOneTermReuse(a, "", "");
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java (revision 1406601)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java (working copy)
@@ -49,9 +49,10 @@
*
* @param input TokenStream whose tokens will be stemmed
* @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
+ * @param recursionCap recursion cap
*/
- public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary) {
- this(input, dictionary, true);
+ public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, int recursionCap) {
+ this(input, dictionary, true, recursionCap);
}
/**
@@ -61,11 +62,12 @@
* @param input TokenStream whose tokens will be stemmed
* @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
* @param dedup true if only unique terms should be output.
+ * @param recursionCap recursion cap
*/
- public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, boolean dedup) {
+ public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, boolean dedup, int recursionCap) {
super(input);
this.dedup = dedup;
- this.stemmer = new HunspellStemmer(dictionary);
+ this.stemmer = new HunspellStemmer(dictionary, recursionCap);
}
/**
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java (revision 1406601)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java (working copy)
@@ -56,11 +56,13 @@
private static final String PARAM_AFFIX = "affix";
private static final String PARAM_IGNORE_CASE = "ignoreCase";
private static final String PARAM_STRICT_AFFIX_PARSING = "strictAffixParsing";
+ private static final String PARAM_RECURSION_CAP = "recursionCap";
private static final String TRUE = "true";
private static final String FALSE = "false";
private HunspellDictionary dictionary;
private boolean ignoreCase = false;
+ private int recursionCap = 2;
/**
* Loads the hunspell dictionary and affix files defined in the configuration
@@ -90,6 +92,11 @@
else throw new IllegalArgumentException("Unknown value for " + PARAM_STRICT_AFFIX_PARSING + ": " + strictAffixParsingParam + ". Must be true or false");
}
+ String recursionCap = args.get(PARAM_RECURSION_CAP);
+ if (recursionCap != null) {
+ this.recursionCap = Integer.parseInt(recursionCap);
+ }
+
InputStream affix = null;
List<InputStream> dictionaries = new ArrayList<InputStream>();
@@ -117,6 +124,6 @@
* @return HunspellStemFilter that filters the TokenStream
*/
public TokenStream create(TokenStream tokenStream) {
- return new HunspellStemFilter(tokenStream, dictionary);
+ return new HunspellStemFilter(tokenStream, dictionary, recursionCap);
}
}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java (revision 1406601)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java (working copy)
@@ -37,9 +37,7 @@
* conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.
*/
public class HunspellStemmer {
-
- private static final int RECURSION_CAP = 2;
-
+ private int recursionCap = 2;
private final HunspellDictionary dictionary;
private final StringBuilder segment = new StringBuilder();
private CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_40);
@@ -54,6 +52,17 @@
}
/**
+ * Constructs a new HunspellStemmer which will use the provided HunspellDictionary to create its stems
+ *
+ * @param dictionary HunspellDictionary that will be used to create the stems
+ * @param recursionCap recursion cap
+ */
+ public HunspellStemmer(HunspellDictionary dictionary, int recursionCap) {
+ this(dictionary);
+ this.recursionCap = recursionCap;
+ }
+
+ /**
* Find the stem(s) of the provided word
*
* @param word Word to find the stems for
@@ -197,7 +206,7 @@
}
}
- if (affix.isCrossProduct() && recursionDepth < RECURSION_CAP) {
+ if (affix.isCrossProduct() && recursionDepth < recursionCap) {
stems.addAll(stem(strippedWord, length, affix.getAppendFlags(), ++recursionDepth));
}