| # |
| # Copyright (C) 2002-2009, International Business Machines Corporation and others. |
| # All Rights Reserved. |
| # |
| # file: char.txt |
| # |
| # ICU Character Break Rules, also known as Grapheme Cluster Boundaries |
| # See Unicode Standard Annex #29. |
| # These rules are based on TR29 Revision 13, for Unicode Version 5.1 |
| # |
| |
| # |
| # Character Class Definitions. |
| # |
| $CR = [\p{Grapheme_Cluster_Break = CR}]; |
| $LF = [\p{Grapheme_Cluster_Break = LF}]; |
| $Control = [\p{Grapheme_Cluster_Break = Control}]; |
| $Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; |
| $Extend = [\p{Grapheme_Cluster_Break = Extend}]; |
| $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; |
| $BengaliLetter = [\u0985-\u09B9 \u09CE \u09DC-\u09E1 \u09F0-\u09F1]; |
| $BengaliSignVirama = \u09CD; |
| $GujaratiLetter = [\u0A85-\u0A8C \u0A8F-\u0A90 \u0A93-\u0AB9 \u0AE0-\u0AE1]; |
| $GujaratiSignVirama = \u0ACD; |
| $DevanagariLetter = [\u0904-\u0939 \u0958-\u0961 \u0972-\u097F]; |
| $DevanagariSignVirama = \u094D; |
| $KannadaLetter = [\u0C85-\u0CB9 \u0CDE-\u0CE1]; |
| $KannadaSignVirama = \u0CCD; |
| $MalayalamLetter = [\u0D05-\u0D39 \u0D60-\u0D61 \u0D7A-\u0D7F]; |
| $MalayalamSignVirama = \u0D4D; |
| $OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71]; |
| $OriyaSignVirama = \u0B4D; |
| $GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E]; |
| $GurmukhiSignVirama = \u0A4D; |
| $TamilLetter = [\u0B85-\u0BB9]; |
| $TamilSignVirama = \u0BCD; |
| $TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61]; |
| $TeluguSignVirama = \u0C4D; |
| |
| # |
| # Korean Syllable Definitions |
| # |
| $L = [\p{Grapheme_Cluster_Break = L}]; |
| $V = [\p{Grapheme_Cluster_Break = V}]; |
| $T = [\p{Grapheme_Cluster_Break = T}]; |
| |
| $LV = [\p{Grapheme_Cluster_Break = LV}]; |
| $LVT = [\p{Grapheme_Cluster_Break = LVT}]; |
| |
| |
| ## ------------------------------------------------- |
| !!chain; |
| |
| !!forward; |
| |
| $CR $LF; |
| |
| $BengaliLetter ($BengaliSignVirama $BengaliLetter?)+; |
| $GujaratiLetter ($GujaratiSignVirama $GujaratiLetter?)+; |
| $DevanagariLetter ($DevanagariSignVirama $DevanagariLetter?)+; |
| $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+; |
| $MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+; |
| $OriyaLetter ($OriyaSignVirama $OriyaLetter?)+; |
| $GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+; |
| $TamilLetter ($TamilSignVirama $TamilLetter?)+; |
| $TeluguLetter ($TeluguSignVirama $TeluguLetter?)+; |
| |
| $L ($L | $V | $LV | $LVT); |
| ($LV | $V) ($V | $T); |
| ($LVT | $T) $T; |
| |
| [^$Control $CR $LF] $Extend; |
| |
| [^$Control $CR $LF] $SpacingMark; |
| $Prepend [^$Control $CR $LF]; |
| |
| |
| ## ------------------------------------------------- |
| |
| !!reverse; |
| $LF $CR; |
| ($BengaliLetter? $BengaliSignVirama)+ $BengaliLetter; |
| ($GujaratiLetter? $GujaratiSignVirama)+ $GujaratiLetter; |
| ($DevanagariLetter? $DevanagariSignVirama)+ $DevanagariLetter; |
| ($KannadaLetter? $KannadaSignVirama)+ $KannadaLetter; |
| ($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter; |
| ($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter; |
| ($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter; |
| ($TamilLetter? $TamilSignVirama)+ $TamilLetter; |
| ($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter; |
| ($L | $V | $LV | $LVT) $L; |
| ($V | $T) ($LV | $V); |
| $T ($LVT | $T); |
| |
| $Extend [^$Control $CR $LF]; |
| $SpacingMark [^$Control $CR $LF]; |
| [^$Control $CR $LF] $Prepend; |
| |
| |
| ## ------------------------------------------------- |
| |
| !!safe_reverse; |
| |
| |
| ## ------------------------------------------------- |
| |
| !!safe_forward; |
| |