LUCENE-9785: Hunspell: don't check case in compound middle and end (#2398)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
index 1f1f9ba..76db921 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
@@ -160,6 +160,7 @@
private Root<CharsRef> findStem(
char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) {
checkCanceled.run();
+ boolean checkCase = context != COMPOUND_MIDDLE && context != COMPOUND_END;
@SuppressWarnings({"rawtypes", "unchecked"})
Root<CharsRef>[] result = new Root[1];
stemmer.doStem(
@@ -168,7 +169,7 @@
length,
context,
(stem, formID, morphDataId) -> {
- if (!acceptCase(originalCase, formID, stem)) {
+ if (checkCase && !acceptCase(originalCase, formID, stem)) {
return dictionary.hasFlag(formID, Dictionary.HIDDEN_FLAG);
}
if (acceptsStem(formID)) {
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff
index 4b56950..215e53c 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff
@@ -4,3 +4,5 @@
SFX X Y 1
SFX X 0 s . +s
+
+COMPOUNDFLAG C
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic
index 4d497dc..07c826d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic
@@ -1,4 +1,4 @@
-9
+11
drink/X
walk/XZ
test/Z
@@ -8,3 +8,6 @@
Quux./Z
way/X
ways/Z
+tvv/ZC
+school/C
+uni/ZC
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.good
index 795112e..bb8c788 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.good
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.good
@@ -5,4 +5,8 @@
way
Way
WAY
-ways
\ No newline at end of file
+ways
+schooltvv
+Schooltvv
+SCHOOLTVV
+unitvv
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.sug b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.sug
index e4cb7fa..3052b2d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.sug
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.sug
@@ -6,4 +6,6 @@
baz.
Quux.
Quux.
-Way
\ No newline at end of file
+Way
+unitvv, Uni tvv, uni
+unitvv, UNI TVV, uni
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.wrong
index 1986f6d..9807992 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.wrong
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.wrong
@@ -7,3 +7,5 @@
quux.
QUUX.
Ways
+Unitvv
+UNITVV