| Index: modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java
|
| ===================================================================
|
| --- modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (revision 991049)
|
| +++ modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (working copy)
|
| @@ -68,5 +68,12 @@
|
| |
| // ascii-folding-filter type stuff |
| assertAnalyzesTo(a, "đis is cræzy", new String[] { "dis", "is", "craezy" }); |
| + |
| + // proper downcasing of Turkish dotted-capital I |
| + // (according to default case folding rules) |
| + assertAnalyzesTo(a, "ELİF", new String[] { "elif" }); |
| + |
| + // handling of decomposed combining-dot-above |
| + assertAnalyzesTo(a, "eli\u0307f", new String[] { "elif" }); |
| } |
| } |
| Index: modules/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm
|
| ===================================================================
|
| Cannot display: file marked as a binary type.
|
| svn:mime-type = application/octet-stream
|
| Index: modules/analysis/icu/build.xml
|
| ===================================================================
|
| --- modules/analysis/icu/build.xml (revision 991049)
|
| +++ modules/analysis/icu/build.xml (working copy)
|
| @@ -65,17 +65,27 @@
|
| <property name="gennorm2.src.dir" value="src/data/utr30"/> |
| <property name="gennorm2.src.files" |
| value="nfkc.txt nfkc_cf.txt BasicFoldings.txt DiacriticFolding.txt DingbatFolding.txt HanRadicalFolding.txt NativeDigitFolding.txt"/> |
| + <property name="gennorm2.tmp" value="${build.dir}/gennorm2/utr30.tmp"/> |
| <property name="gennorm2.dst" value="src/resources/org/apache/lucene/analysis/icu/utr30.nrm"/> |
| <target name="gennorm2"> |
| - <echo>Warning: only works on a big-endian platform!</echo> |
| + <echo>Note that the gennorm2 and icupkg tools must be on your PATH. These tools |
| +are part of the ICU4C package. See http://site.icu-project.org/ </echo> |
| + <mkdir dir="${build.dir}/gennorm2"/> |
| <exec executable="gennorm2" failonerror="true"> |
| <arg value="-v"/> |
| <arg value="-s"/> |
| <arg value="${gennorm2.src.dir}"/> |
| - <arg value="${gennorm2.src.files}"/> |
| + <arg line="${gennorm2.src.files}"/> |
| <arg value="-o"/> |
| + <arg value="${gennorm2.tmp}"/> |
| + </exec> |
| + <!-- now convert binary file to big-endian --> |
| + <exec executable="icupkg" failonerror="true"> |
| + <arg value="-tb"/> |
| + <arg value="${gennorm2.tmp}"/> |
| <arg value="${gennorm2.dst}"/> |
| </exec> |
| + <delete file="${gennorm2.tmp}"/> |
| </target> |
| |
| <property name="rbbi.src.dir" location="src/data/uax29"/> |
| Index: lucene/contrib/CHANGES.txt
|
| ===================================================================
|
| --- lucene/contrib/CHANGES.txt (revision 990885)
|
| +++ lucene/contrib/CHANGES.txt (working copy)
|
| @@ -117,6 +117,11 @@
|
| * LUCENE-2615: Fix DirectIOLinuxDirectory to not assign bogus |
| permissions to newly created files, and to not silently hardwire |
| buffer size to 1 MB. (Mark Miller, Robert Muir, Mike McCandless) |
| + |
| +* LUCENE-2629: Fix gennorm2 task for generating ICUFoldingFilter's .nrm file. This allows |
| + you to customize its normalization/folding, by editing the source data files in src/data |
| + and regenerating a new .nrm with 'ant gennorm2'. (David Bowen via Robert Muir) |
| + |
| |
| API Changes |
| |