LUCENE-10312: Revert changes in PersianAnalyzer from 9x (#904)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
index afc4123..f0202db 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@@ -29,7 +29,6 @@
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
@@ -87,8 +86,6 @@
}
}
- private final CharArraySet stemExclusionSet;
-
/** Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */
public PersianAnalyzer() {
this(DefaultSetHolder.DEFAULT_STOP_SET);
@@ -100,19 +97,7 @@
* @param stopwords a stopword set
*/
public PersianAnalyzer(CharArraySet stopwords) {
- this(stopwords, CharArraySet.EMPTY_SET);
- }
-
- /**
- * Builds an analyzer with the given stop word. If a none-empty stem exclusion set is provided
- * this analyzer will add a {@link SetKeywordMarkerFilter} before {@link PersianStemFilter}.
- *
- * @param stopwords a stopword set
- * @param stemExclusionSet a set of terms not to be stemmed
- */
- public PersianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -136,11 +121,7 @@
* the order here is important: the stopword list is normalized with the
* above!
*/
- result = new StopFilter(result, stopwords);
- if (!stemExclusionSet.isEmpty()) {
- result = new SetKeywordMarkerFilter(result, stemExclusionSet);
- }
- return new TokenStreamComponents(source, new PersianStemFilter(result));
+ return new TokenStreamComponents(source, new StopFilter(result, stopwords));
}
@Override
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
index 983dd07..5b7716c 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
@@ -32,7 +32,14 @@
@Override
public void setUp() throws Exception {
super.setUp();
- a = new PersianAnalyzer();
+ a =
+ new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ final Tokenizer source = new MockTokenizer();
+ return new TokenStreamComponents(source, new PersianStemFilter(source));
+ }
+ };
}
@Override