blob: 5870d505b1561c3d5ca1a83c501e3d75e38378d7 [file] [log] [blame]
Index: src/java/org/apache/lucene/index/TermsHashPerField.java
===================================================================
--- src/java/org/apache/lucene/index/TermsHashPerField.java (revision 831090)
+++ src/java/org/apache/lucene/index/TermsHashPerField.java (working copy)
@@ -361,8 +361,8 @@
char ch = tokenText[--downto];
if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END) {
- if (0 == downto) {
- // Unpaired
+ if (0 == downto || ch >= 0xdffe) {
+ // Unpaired, or process-internal (treat as unpaired, replace)
ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;
} else {
final char ch2 = tokenText[downto-1];
@@ -378,8 +378,8 @@
}
}
} else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && (ch <= UnicodeUtil.UNI_SUR_HIGH_END ||
- ch == 0xffff)) {
- // Unpaired or 0xffff
+ ch == 0xffff || ch == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef))) {
+ // Unpaired or 0xffff, 0xfffe, or FDD0-FDEF
ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;
}