blob: ac6ac1eb05ba0d07d6490e3b16972c8b5099b86c [file] [log] [blame]
# Copyright 2001-2010 Unicode, Inc.
#
# Disclaimer
#
# This source code is provided as is by Unicode, Inc. No claims are
# made as to fitness for any particular purpose. No warranties of any
# kind are expressed or implied. The recipient agrees to determine
# applicability of information provided. If this file has been
# purchased on magnetic or optical media from Unicode, Inc., the
# sole remedy for any claim will be exchange of defective media
# within 90 days of receipt.
#
# Limitations on Rights to Redistribute This Code
#
# Unicode, Inc. hereby grants the right to freely use the information
# supplied in this file in the creation of products supporting the
# Unicode Standard, and to make copies of this file in any form
# for internal or external distribution as long as this notice
# remains attached.
### Custom Normalization mappings for UTR#30
### (http://www.unicode.org/reports/tr30/tr30-4.html)
###
### Created from Unicode 5.2 UCD
###
#### WARNING ####
#### Rule: lines direct content generation.
#### All non-comments will be REMOVED when this file's contents
#### are generated by 'ant gen-utr30-data-files'.
#### Use "# Rule: verbatim" to keep non-comments up until
#### the next "# Rule:" line.
#### WARNING ####
## Remove diacritics
# Rule: [:Diacritic:] >
005E>
0060>
00A8>
00AF>
00B4>
00B7..00B8>
02B0..034E>
0350..0357>
035D..0362>
0374..0375>
037A>
0384..0385>
0483..0487>
0559>
0591..05A1>
05A3..05BD>
05BF>
05C1..05C2>
05C4>
064B..0652>
0657..0658>
06DF..06E0>
06E5..06E6>
06EA..06EC>
0730..074A>
07A6..07B0>
07EB..07F5>
0818..0819>
08E3..08FE>
093C>
094D>
0951..0954>
0971>
09BC>
09CD>
0A3C>
0A4D>
0ABC>
0ACD>
0AFD..0AFF>
0B3C>
0B4D>
0BCD>
0C4D>
0CBC>
0CCD>
0D3B..0D3C>
0D4D>
0DCA>
0E47..0E4C>
0E4E>
0EC8..0ECC>
0F18..0F19>
0F35>
0F37>
0F39>
0F3E..0F3F>
0F82..0F84>
0F86..0F87>
0FC6>
1037>
1039..103A>
1087..108D>
108F>
109A..109B>
17C9..17D3>
17DD>
1939..193B>
1A75..1A7C>
1A7F>
1AB0..1ABD>
1B34>
1B44>
1B6B..1B73>
1BAA..1BAB>
1C36..1C37>
1C78..1C7D>
1CD0..1CE8>
1CED>
1CF4>
1CF7..1CF9>
1D2C..1D6A>
1DC4..1DCF>
1DF5..1DF9>
1DFD..1DFF>
1FBD>
1FBF..1FC1>
1FCD..1FCF>
1FDD..1FDF>
1FED..1FEF>
1FFD..1FFE>
2CEF..2CF1>
2E2F>
302A..302F>
3099..309C>
30FC>
A66F>
A67C..A67D>
A67F>
A69C..A69D>
A6F0..A6F1>
A717..A721>
A788>
A7F8..A7F9>
A8C4>
A8E0..A8F1>
A92B..A92E>
A953>
A9B3>
A9C0>
A9E5>
AA7B..AA7D>
AABF..AAC2>
AAF6>
AB5B..AB5F>
ABEC..ABED>
FB1E>
FE20..FE2F>
FF3E>
FF40>
FF70>
FF9E..FF9F>
FFE3>
102E0>
10AE5..10AE6>
10D22..10D27>
10F46..10F50>
110B9..110BA>
11133..11134>
11173>
111C0>
111CA..111CC>
11235..11236>
112E9..112EA>
1133C>
1134D>
11366..1136C>
11370..11374>
11442>
11446>
114C2..114C3>
115BF..115C0>
1163F>
116B6..116B7>
1172B>
11839..1183A>
11A34>
11A47>
11A99>
11C3F>
11D42>
11D44..11D45>
11D97>
16AF0..16AF4>
16F8F..16F9F>
1D167..1D169>
1D16D..1D172>
1D17B..1D182>
1D185..1D18B>
1D1AA..1D1AD>
1E8D0..1E8D6>
1E944..1E946>
1E948..1E94A>
# Latin script "composed" that do not further decompose, so decompose here
# These are from AsciiFoldingFilter
# Rule: verbatim
00E6>0061 0065
00F0>0064
00F8>006F
00FE>0074 0068
0111>0064
0127>0068
0131>0069
0138>0071
0142>006C
014B>006E
0153>006F 0065
0167>0074
0180>0062
0183>0062
0185>0062
0188>0063
018C>0064
018D>0064
0192>0066
0195>0068 0076
0199>006B
019A>006C
#019B>
019E>006E
#01A3>
01A5>0070
#01A8>
#01AA>
01AB>0074
01AD>0074
01B4>0079
01B6>007A
#01B9>
#01BA>
01BB>0032
01BD>0035
#01BE>
01BF>0077
01C0>007C
01C1>007C 007C
#01C2>
01C3>0021
01DD>0065
01E5>0047
021D>007A
0221>0064
0223>006F 0075
0225>007A
0234>006C
0235>006E
0236>0074
0237>006A
0238>0064 0062
0239>0071 0070
023C>0063
023F>0073
0240>007A
#0242>
0247>0065
0249>006A
024B>0071
024D>0072
024F>0079
0250>0061
0251>0061
0252>0061
0253>0062
0254>006F
0255>0063
0256>0064
0257>0064
0258>0065
0259>0061
025A>0061
025B>0065
025C>0065
025D>0065
025E>0065
025F>006A
0260>0067
0261>0067
0262>0047
#0263>
#0264>
0265>0068
0266>0068
#0267>
0268>0069
0269>0069
026A>0049
026B>006C
026C>006C
026D>006C
#026E>
026F>006D
0270>006D
0271>006D
0272>006E
0273>006E
0274>004E
0275>006F
0276>004F 0045
#0277>
#0278>
#0279>
#027A>
#027B>
027C>0072
027D>0072
027E>0072
027F>0072
0280>0052
0281>0052
0282>0073
#0283>
0284>006A
#0285>
#0286>
0287>0074
0288>0074
0289>0075
#028A>
028B>0076
028C>0076
028D>0077
028E>0079
028F>0059
0290>007A
0291>007A
#0292>
#0293>
#0294>
#0295>
#0296>
0297>0043
0298>006F
0299>0042
029A>0065
029B>0047
029C>0048
029D>006A
029E>006B
029F>004C
02A0>0071
#02A1>
#02A2>
02A3>0064 007A
#02A4>
02A5>0064 007A
02A6>0074 0073
#02A7>
02A8>0074 0063
02A9>0066 006E
02AA>006C 0073
02AB>006C 007A
02AC>0077 0077
#02AD>
02AE>0068
02AF>0068
1D00>0041
1D01>0041 0045
1D02>0061 0065
1D03>0042
1D04>0043
1D05>0044
1D06>0044
1D07>0045
1D08>0065
1D09>0069
1D0A>004A
1D0B>004B
1D0C>004C
1D0D>004D
1D0E>004E
1D0F>004F
1D10>004F
1D11>006F
#1D12>
1D13>006F
1D14>006F 0065
1D15>004F 0055
1D16>006F
1D17>006F
1D18>0050
1D19>0052
1D1A>0052
1D1B>0054
1D1C>0055
1D1D>0075
1D1E>0075
1D1F>006D
1D20>0056
1D21>0057
1D22>005A
#1D23>
#1D24>
#1D25>
1D6B>0075 0065
1D6C>0062
1D6D>0064
1D6E>0066
1D6F>006D
1D70>006E
1D71>0070
1D72>0072
1D73>0072
1D74>0073
1D75>0074
1D76>007A
1D77>0067
1D79>0067
1D7A>0074 0068
1D7B>0049
1D7C>0069
1D7D>0070
1D7E>0055
#1D7F>
1D80>0062
1D81>0064
1D82>0066
1D83>0067
1D84>006B
1D85>006C
1D86>006D
1D87>006E
1D88>0070
1D89>0072
1D8A>0073
#1D8B>
1D8C>0076
1D8D>0078
1D8E>007A
1D8F>0061
1D90>0061
1D91>0064
1D92>0065
1D93>0065
1D94>0065
1D95>0061
1D96>0069
1D97>006F
#1D98>
1D99>0075
#1D9A>
1E9C>0073
1E9D>0073
1E9F>0064
1EFB>006C 006C
1EFD>0076
1EFF>0079
214E>0066
#2180>
#2181>
#2182>
2184>0063
#2185>
#2186>
#2187>
#2188>
2C61>006C
2C65>0061
2C66>0074
2C68>0068
2C6A>006B
2C6C>007A
2C71>0076
2C73>0077
2C74>0076
2C76>0068
#2C77>
2C78>0065
#2C79>
2C7A>006F
2C7B>0045
#A723>
#A725>
#A727>
A729>0074 007A
#A72B>
#A72D>
#A72F>
A730>0046
A731>0053
A733>0061 0061
A735>0061 006F
A737>0061 0075
A739>0061 0076
A73B>0061 0076
A73D>0061 0079
A73F>0063
A741>006B
A743>006B
A745>006B
A747>006C
A749>006C
A74B>006F
A74D>006F
A74F>006F 006F
A751>0070
A753>0070
A755>0070
A757>0071
A759>0071
A75B>0072
#A75D>
A75F>0076
A761>0076 0079
A763>007A
A765>0074 0068
A767>0074 0068
A769>0076
#A76B>
#A76D>
#A76F>
#A771>
#A772>
#A773>
#A774>
#A775>
#A776>
#A777>
#A778>
A77A>0064
A77C>0066
A77F>0067
A781>006C
A783>0072
A785>0053
A787>0074
A78C>0027
A7FB>0046
A7FC>0070
A7FD>004D
A7FE>0049
A7FF>004D
# Cyrillic script "composed" that do not further decompose, so decompose here
# These are from UTR#30 DiacriticFolding.txt
# Rule: verbatim
047D>0461
048B>0439
048F>0440
0491>0433
0493>0433
0495>0433
0497>0436
0499>0437
049B>043A
049D>043A
049F>043A
04A3>043D
04A7>043F
04AB>0441
04AD>0442
04B1>04AF
04B3>0425
04B7>04BC
04B9>0447
04BF>04BC
04C4>043A
04C6>043B
04C8>043D
04CA>043D
04CC>04BC
04CE>043C
# Additional signs and diacritic, from examination of [:Mark:]&[:Lm:]
# Rule: verbatim
0358..035C>
05A2>
05C5>
05C7>
0610..061A>
0640>
06D6..06DE>
06E1..06E4>
06E7..06E9>
06ED>
0653..0656>
0659..065F>
0670>
0711>
07FA>
0816..0817>
081B..0823>
0825..0827>
0829>
082A..082D>
0900>0901
1714>
1734>
1DC0..1DC3>
1DD0..1DE6>
20D0..20F0>
2DE0..2DFF>
A670..A672>
A802>
10A3F>
11046>
1D165..1D166>
1D242..1D244>
# Additional Arabic/Hebrew decompositions
# Rule: verbatim
05F3>0027
05F4>0022
0629>0647
0649>064A
06A9>0643
06CC>064A