LUCENE-9805: Hunspell: fix space + mixed case heuristics on suggestions (#2420)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
index 86e34c7..286d1ee 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
@@ -70,18 +70,18 @@
}
List<String> adjusted =
- result.stream().map(s -> capitalizeAfterSpace(low, s)).collect(Collectors.toList());
+ result.stream().map(s -> capitalizeAfterSpace(word, s)).collect(Collectors.toList());
result.clear();
result.addAll(adjusted);
}
}
// aNew -> "a New" (instead of "a new")
- private String capitalizeAfterSpace(String lowMisspelled, String candidate) {
+ private String capitalizeAfterSpace(String misspelled, String candidate) {
int space = candidate.indexOf(' ');
int tail = candidate.length() - space - 1;
if (space > 0
- && lowMisspelled.regionMatches(lowMisspelled.length() - tail, candidate, space + 1, tail)) {
+ && !misspelled.regionMatches(misspelled.length() - tail, candidate, space + 1, tail)) {
return candidate.substring(0, space + 1)
+ Character.toUpperCase(candidate.charAt(space + 1))
+ candidate.substring(space + 2);
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic
index 6725561..6ea06f1 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic
@@ -15,4 +15,8 @@
inspire
Saiph
sahib
-ship
\ No newline at end of file
+ship
+ESP
+esp
+s
+S
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
index 4595756..e7a52c4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
@@ -13,3 +13,4 @@
permanent
in, in a
Saiph, Ship, Sahib
+ESP, ESP s, Esp, Esp s
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
index d8875d8..356a08d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
@@ -14,4 +14,5 @@
permanent-vacation
permqnent-vacation
ina
-Sahip
\ No newline at end of file
+Sahip
+ESPs
\ No newline at end of file