LUCENE-10312: Revert changes in PersianAnalyzer from 9x (#904)

commit: ba8c3a806ada3d7b3c34d408e449a92376a8481b [log] [tgz]
author: Tomoko Uchida <tomoko.uchida.1111@gmail.com> Thu May 19 21:38:52 2022 +0900
committer: Tomoko Uchida <tomoko.uchida.1111@gmail.com> Thu May 19 21:44:58 2022 +0900
tree: d260211c1463a72d5a225523f038879058851485
parent: 978eef5459c7683038ddcca4ec56e4baa63715d0 [diff]
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
index afc4123..f0202db 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java

@@ -29,7 +29,6 @@
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
 import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.IOUtils;
 
@@ -87,8 +86,6 @@
     }
   }
 
-  private final CharArraySet stemExclusionSet;
-
   /** Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */
   public PersianAnalyzer() {
     this(DefaultSetHolder.DEFAULT_STOP_SET);
@@ -100,19 +97,7 @@
    * @param stopwords a stopword set
    */
   public PersianAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
-  }
-
-  /**
-   * Builds an analyzer with the given stop word. If a none-empty stem exclusion set is provided
-   * this analyzer will add a {@link SetKeywordMarkerFilter} before {@link PersianStemFilter}.
-   *
-   * @param stopwords a stopword set
-   * @param stemExclusionSet a set of terms not to be stemmed
-   */
-  public PersianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
     super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
   }
 
   /**
@@ -136,11 +121,7 @@
      * the order here is important: the stopword list is normalized with the
      * above!
      */
-    result = new StopFilter(result, stopwords);
-    if (!stemExclusionSet.isEmpty()) {
-      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
-    }
-    return new TokenStreamComponents(source, new PersianStemFilter(result));
+    return new TokenStreamComponents(source, new StopFilter(result, stopwords));
   }
 
   @Override

diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
index 983dd07..5b7716c 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java

@@ -32,7 +32,14 @@
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    a = new PersianAnalyzer();
+    a =
+        new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName) {
+            final Tokenizer source = new MockTokenizer();
+            return new TokenStreamComponents(source, new PersianStemFilter(source));
+          }
+        };
   }
 
   @Override
commit	ba8c3a806ada3d7b3c34d408e449a92376a8481b	[log] [tgz]
author	Tomoko Uchida <tomoko.uchida.1111@gmail.com>	Thu May 19 21:38:52 2022 +0900
committer	Tomoko Uchida <tomoko.uchida.1111@gmail.com>	Thu May 19 21:44:58 2022 +0900
tree	d260211c1463a72d5a225523f038879058851485
parent	978eef5459c7683038ddcca4ec56e4baa63715d0 [diff]