| Index: src/java/org/apache/lucene/index/TermsHashPerField.java
|
| ===================================================================
|
| --- src/java/org/apache/lucene/index/TermsHashPerField.java (revision 831090)
|
| +++ src/java/org/apache/lucene/index/TermsHashPerField.java (working copy)
|
| @@ -361,8 +361,8 @@
|
| char ch = tokenText[--downto]; |
| |
| if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END) { |
| - if (0 == downto) { |
| - // Unpaired |
| + if (0 == downto || ch >= 0xdffe) { |
| + // Unpaired, or process-internal (treat as unpaired, replace) |
| ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR; |
| } else { |
| final char ch2 = tokenText[downto-1]; |
| @@ -378,8 +378,8 @@
|
| } |
| } |
| } else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && (ch <= UnicodeUtil.UNI_SUR_HIGH_END || |
| - ch == 0xffff)) { |
| - // Unpaired or 0xffff |
| + ch == 0xffff || ch == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef))) { |
| + // Unpaired or 0xffff, 0xfffe, or FDD0-FDEF |
| ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR; |
| } |
| |