| Index: lucene/JRE_VERSION_MIGRATION.txt
|
| ===================================================================
|
| --- lucene/JRE_VERSION_MIGRATION.txt (revision 1697082)
|
| +++ lucene/JRE_VERSION_MIGRATION.txt (working copy)
|
| @@ -17,6 +17,7 @@
|
| * Java 6, Unicode 4.0 |
| * Java 7, Unicode 6.0 |
| * Java 8, Unicode 6.2 |
| + * Java 9 (not yet released / offcially supported by Lucene), Unicode 7.0 |
| |
| In general, whether or not you need to re-index largely depends upon the data that |
| you are searching, and what was changed in any given Unicode version. For example, |
| Index: lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
|
| ===================================================================
|
| --- lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (revision 1697082)
|
| +++ lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (working copy)
|
| @@ -35,6 +35,7 @@
|
| import java.util.HashMap; |
| import java.util.Iterator; |
| import java.util.List; |
| +import java.util.Locale; |
| import java.util.Map; |
| import java.util.NoSuchElementException; |
| import java.util.Random; |
| @@ -1188,7 +1189,7 @@
|
| int offset = nextInt(r, 0, WHITESPACE_CHARACTERS.length-1); |
| char c = WHITESPACE_CHARACTERS[offset]; |
| // sanity check |
| - Assert.assertTrue("Not really whitespace? (@"+offset+"): " + c, Character.isWhitespace(c)); |
| + assert Character.isWhitespace(c) : String.format(Locale.ENGLISH, "Not really whitespace? WHITESPACE_CHARACTERS[%d] is '\\u%04X'", offset, (int) c); |
| out.append(c); |
| } |
| return out.toString(); |
| @@ -1307,9 +1308,9 @@
|
| '\u001E', |
| '\u001F', |
| '\u0020', |
| - // '\u0085', faild sanity check? |
| + // '\u0085', failed sanity check? |
| '\u1680', |
| - '\u180E', |
| + // '\u180E', no longer whitespace in Unicode 7.0 (Java 9)! |
| '\u2000', |
| '\u2001', |
| '\u2002', |