blob: 8a07482e6a4dff9db386f1b4134424f3636f4865 [file] [log] [blame]
Index: modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java
===================================================================
--- modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (revision 991049)
+++ modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (working copy)
@@ -68,5 +68,12 @@
// ascii-folding-filter type stuff
assertAnalyzesTo(a, "đis is cræzy", new String[] { "dis", "is", "craezy" });
+
+ // proper downcasing of Turkish dotted-capital I
+ // (according to default case folding rules)
+ assertAnalyzesTo(a, "ELİF", new String[] { "elif" });
+
+ // handling of decomposed combining-dot-above
+ assertAnalyzesTo(a, "eli\u0307f", new String[] { "elif" });
}
}
Index: modules/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Index: modules/analysis/icu/build.xml
===================================================================
--- modules/analysis/icu/build.xml (revision 991049)
+++ modules/analysis/icu/build.xml (working copy)
@@ -65,17 +65,27 @@
<property name="gennorm2.src.dir" value="src/data/utr30"/>
<property name="gennorm2.src.files"
value="nfkc.txt nfkc_cf.txt BasicFoldings.txt DiacriticFolding.txt DingbatFolding.txt HanRadicalFolding.txt NativeDigitFolding.txt"/>
+ <property name="gennorm2.tmp" value="${build.dir}/gennorm2/utr30.tmp"/>
<property name="gennorm2.dst" value="src/resources/org/apache/lucene/analysis/icu/utr30.nrm"/>
<target name="gennorm2">
- <echo>Warning: only works on a big-endian platform!</echo>
+ <echo>Note that the gennorm2 and icupkg tools must be on your PATH. These tools
+are part of the ICU4C package. See http://site.icu-project.org/ </echo>
+ <mkdir dir="${build.dir}/gennorm2"/>
<exec executable="gennorm2" failonerror="true">
<arg value="-v"/>
<arg value="-s"/>
<arg value="${gennorm2.src.dir}"/>
- <arg value="${gennorm2.src.files}"/>
+ <arg line="${gennorm2.src.files}"/>
<arg value="-o"/>
+ <arg value="${gennorm2.tmp}"/>
+ </exec>
+ <!-- now convert binary file to big-endian -->
+ <exec executable="icupkg" failonerror="true">
+ <arg value="-tb"/>
+ <arg value="${gennorm2.tmp}"/>
<arg value="${gennorm2.dst}"/>
</exec>
+ <delete file="${gennorm2.tmp}"/>
</target>
<property name="rbbi.src.dir" location="src/data/uax29"/>
Index: lucene/contrib/CHANGES.txt
===================================================================
--- lucene/contrib/CHANGES.txt (revision 990885)
+++ lucene/contrib/CHANGES.txt (working copy)
@@ -117,6 +117,11 @@
* LUCENE-2615: Fix DirectIOLinuxDirectory to not assign bogus
permissions to newly created files, and to not silently hardwire
buffer size to 1 MB. (Mark Miller, Robert Muir, Mike McCandless)
+
+* LUCENE-2629: Fix gennorm2 task for generating ICUFoldingFilter's .nrm file. This allows
+ you to customize its normalization/folding, by editing the source data files in src/data
+ and regenerating a new .nrm with 'ant gennorm2'. (David Bowen via Robert Muir)
+
API Changes