| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // format of each entry rule in the table |
| // (pattern, left context, right context, phonetic) |
| // where |
| // pattern is a sequence of characters that might appear in the word to be transliterated |
| // left context is the context that precedes the pattern |
| // right context is the context that follows the pattern |
| // phonetic is the result that this rule generates |
| // |
| // note that both left context and right context can be regular expressions |
| // ex: left context of ^ would mean start of word |
| // left context of [aeiouy] means following a vowel |
| // right context of [^aeiouy] means preceding a consonant |
| // right context of e$ means preceding a final e |
| |
| //GENERIC |
| |
| // CONVERTING FEMININE TO MASCULINE |
| "yna" "" "$" "(in[russian]|ina)" |
| "ina" "" "$" "(in[russian]|ina)" |
| "liova" "" "$" "(lova|lof[russian]|lef[russian])" |
| "lova" "" "$" "(lova|lof[russian]|lef[russian]|l[czech]|el[czech])" |
| "kova" "" "$" "(kova|kof[russian]|k[czech]|ek[czech])" |
| "ova" "" "$" "(ova|of[russian]|[czech])" |
| "ová" "" "$" "(ova|[czech])" |
| "eva" "" "$" "(eva|ef[russian])" |
| "aia" "" "$" "(aja|i[russian])" |
| "aja" "" "$" "(aja|i[russian])" |
| "aya" "" "$" "(aja|i[russian])" |
| |
| "lowa" "" "$" "(lova|lof[polish]|l[polish]|el[polish])" |
| "kowa" "" "$" "(kova|kof[polish]|k[polish]|ek[polish])" |
| "owa" "" "$" "(ova|of[polish]|)" |
| "lowna" "" "$" "(lovna|levna|l[polish]|el[polish])" |
| "kowna" "" "$" "(kovna|k[polish]|ek[polish])" |
| "owna" "" "$" "(ovna|[polish])" |
| "lówna" "" "$" "(l|el)" // polish |
| "kówna" "" "$" "(k|ek)" // polish |
| "ówna" "" "$" "" // polish |
| "á" "" "$" "(a|i[czech])" |
| "a" "" "$" "(a|i[polish+czech])" |
| |
| // CONSONANTS |
| "pf" "" "" "(pf|p|f)" |
| "que" "" "$" "(k[french]|ke|kve)" |
| "qu" "" "" "(kv|k)" |
| |
| "m" "" "[bfpv]" "(m|n)" |
| "m" "[aeiouy]" "[aeiouy]" "m" |
| "m" "[aeiouy]" "" "(m|n[french+portuguese])" // nasal |
| |
| "ly" "" "[au]" "l" |
| "li" "" "[au]" "l" |
| "lio" "" "" "(lo|le[russian])" |
| "lyo" "" "" "(lo|le[russian])" |
| //array("ll" "" "" "(l|J[spanish])" // Disabled Argentinian rule |
| "lt" "u" "$" "(lt|[french])" |
| |
| "v" "^" "" "(v|f[german]|b[spanish])" |
| |
| "ex" "" "[aáuiíoóeéêy]" "(ez[portuguese]|eS[portuguese]|eks|egz)" |
| "ex" "" "[cs]" "(e[portuguese]|ek)" |
| "x" "u" "$" "(ks|[french])" |
| |
| "ck" "" "" "(k|tsk[polish+czech])" |
| "cz" "" "" "(tS|tsz[czech])" // Polish |
| |
| //Processing of "h" in various combinations |
| "rh" "^" "" "r" |
| "dh" "^" "" "d" |
| "bh" "^" "" "b" |
| |
| "ph" "" "" "(ph|f)" |
| "kh" "" "" "(x[russian+english]|kh)" |
| |
| "lh" "" "" "(lh|l[portuguese])" |
| "nh" "" "" "(nh|nj[portuguese])" |
| |
| "ssch" "" "" "S" // german |
| "chsch" "" "" "xS" // german |
| "tsch" "" "" "tS" // german |
| |
| ///"desch" "^" "" "deS" |
| ///"desh" "^" "" "(dES|de[french])" |
| ///"des" "^" "[^aeiouy]" "(dEs|de[french])" |
| |
| "sch" "[aeiouy]" "[ei]" "(S|StS[russian]|sk[romanian+italian])" |
| "sch" "[aeiouy]" "" "(S|StS[russian])" |
| "sch" "" "[ei]" "(sk[romanian+italian]|S|StS[russian])" |
| "sch" "" "" "(S|StS[russian])" |
| "ssh" "" "" "S" |
| |
| "sh" "" "[äöü]" "sh" // german |
| "sh" "" "[aeiou]" "(S[russian+english]|sh)" |
| "sh" "" "" "S" |
| |
| "zh" "" "" "(Z[english+russian]|zh|tsh[german])" |
| |
| "chs" "" "" "(ks[german]|xs|tSs[russian+english])" |
| "ch" "" "[ei]" "(x|tS[spanish+english+russian]|k[romanian+italian]|S[portuguese+french])" |
| "ch" "" "" "(x|tS[spanish+english+russian]|S[portuguese+french])" |
| |
| "th" "^" "" "t" // english+german+greeklatin |
| "th" "" "[äöüaeiou]" "(t[english+german+greeklatin]|th)" |
| "th" "" "" "t" // english+german+greeklatin |
| |
| "gh" "" "[ei]" "(g[romanian+italian+greeklatin]|gh)" |
| |
| "ouh" "" "[aioe]" "(v[french]|uh)" |
| "uh" "" "[aioe]" "(v|uh)" |
| "h" "." "$" "" // match h at the end of words, but not as a single letter: difference to the original version |
| "h" "[aeiouyäöü]" "" "" // german |
| "h" "^" "" "(h|x[romanian+greeklatin]|H[english+romanian+polish+french+portuguese+italian+spanish])" |
| |
| //Processing of "ci" "ce" & "cy" |
| "cia" "" "" "(tSa[polish]|tsa)" // Polish |
| "cią" "" "[bp]" "(tSom|tsom)" // Polish |
| "cią" "" "" "(tSon[polish]|tson)" // Polish |
| "cię" "" "[bp]" "(tSem[polish]|tsem)" // Polish |
| "cię" "" "" "(tSen[polish]|tsen)" // Polish |
| "cie" "" "" "(tSe[polish]|tse)" // Polish |
| "cio" "" "" "(tSo[polish]|tso)" // Polish |
| "ciu" "" "" "(tSu[polish]|tsu)" // Polish |
| |
| "sci" "" "$" "(Si[italian]|stsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)" |
| "sc" "" "[ei]" "(S[italian]|sts[polish+czech]|dZ[turkish]|tS[polish+romanian]|s)" |
| "ci" "" "$" "(tsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)" |
| "cy" "" "" "(si|tsi[polish])" |
| "c" "" "[ei]" "(ts[polish+czech]|dZ[turkish]|tS[polish+romanian]|k[greeklatin]|s)" |
| |
| //Processing of "s" |
| "sç" "" "[aeiou]" "(s|stS[turkish])" |
| "ssz" "" "" "S" // polish |
| "sz" "^" "" "(S|s[hungarian])" // polish |
| "sz" "" "$" "(S|s[hungarian])" // polish |
| "sz" "" "" "(S|s[hungarian]|sts[german])" // polish |
| "ssp" "" "" "(Sp[german]|sp)" |
| "sp" "" "" "(Sp[german]|sp)" |
| "sst" "" "" "(St[german]|st)" |
| "st" "" "" "(St[german]|st)" |
| "ss" "" "" "s" |
| "sj" "^" "" "S" // dutch |
| "sj" "" "$" "S" // dutch |
| "sj" "" "" "(sj|S[dutch]|sx[spanish]|sZ[romanian+turkish])" |
| |
| "sia" "" "" "(Sa[polish]|sa[polish]|sja)" |
| "sią" "" "[bp]" "(Som[polish]|som)" // polish |
| "sią" "" "" "(Son[polish]|son)" // polish |
| "się" "" "[bp]" "(Sem[polish]|sem)" // polish |
| "się" "" "" "(Sen[polish]|sen)" // polish |
| "sie" "" "" "(se|sje|Se[polish]|zi[german])" |
| |
| "sio" "" "" "(So[polish]|so)" |
| "siu" "" "" "(Su[polish]|sju)" |
| |
| "si" "[äöëaáuiíoóeéêy]" "" "(Si[polish]|si|zi[portuguese+french+italian+german])" |
| "si" "" "" "(Si[polish]|si|zi[german])" |
| "s" "[aáuiíoóeéêy]" "[aáuíoóeéêy]" "(s|z[portuguese+french+italian+german])" |
| "s" "" "[aeouäöë]" "(s|z[german])" |
| "s" "[aeiouy]" "[dglmnrv]" "(s|z|Z[portuguese]|[french])" // Groslot |
| "s" "" "[dglmnrv]" "(s|z|Z[portuguese])" |
| |
| //Processing of "g" |
| "gue" "" "$" "(k[french]|gve)" // portuguese+spanish |
| "gu" "" "[ei]" "(g[french]|gv[portuguese+spanish])" // portuguese+spanish |
| "gu" "" "[ao]" "gv" // portuguese+spanish |
| "guy" "" "" "gi" // french |
| |
| "gli" "" "" "(glI|l[italian])" |
| "gni" "" "" "(gnI|ni[italian+french])" |
| "gn" "" "[aeou]" "(n[italian+french]|nj[italian+french]|gn)" |
| |
| "ggie" "" "" "(je[greeklatin]|dZe)" // dZ is Italian |
| "ggi" "" "[aou]" "(j[greeklatin]|dZ)" // dZ is Italian |
| |
| "ggi" "[yaeiou]" "[aou]" "(gI|dZ[italian]|j[greeklatin])" |
| "gge" "[yaeiou]" "" "(gE|xe[spanish]|gZe[portuguese+french]|dZe[english+romanian+italian+spanish]|je[greeklatin])" |
| "ggi" "[yaeiou]" "" "(gI|xi[spanish]|gZi[portuguese+french]|dZi[english+romanian+italian+spanish]|i[greeklatin])" |
| "ggi" "" "[aou]" "(gI|dZ[italian]|j[greeklatin])" |
| |
| "gie" "" "$" "(ge|gi[german]|ji[french]|dZe[italian])" |
| "gie" "" "" "(ge|gi[german]|dZe[italian]|je[greeklatin])" |
| "gi" "" "[aou]" "(i[greeklatin]|dZ)" // dZ is Italian |
| |
| "ge" "[yaeiou]" "" "(gE|xe[spanish]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])" |
| "gi" "[yaeiou]" "" "(gI|xi[spanish]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])" |
| "ge" "" "" "(gE|xe[spanish]|hE[russian]|je[greeklatin]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])" |
| "gi" "" "" "(gI|xi[spanish]|hI[russian]|i[greeklatin]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])" |
| "gy" "" "[aeouáéóúüöőű]" "(gi|dj[hungarian])" |
| "gy" "" "" "(gi|d[hungarian])" |
| "g" "[yaeiou]" "[aouyei]" "g" |
| "g" "" "[aouei]" "(g|h[russian])" |
| |
| //Processing of "j" |
| "ij" "" "" "(i|ej[dutch]|ix[spanish]|iZ[french+romanian+turkish+portuguese])" |
| "j" "" "[aoeiuy]" "(j|dZ[english]|x[spanish]|Z[french+romanian+turkish+portuguese])" |
| |
| //Processing of "z" |
| "rz" "t" "" "(S[polish]|r)" // polish |
| "rz" "" "" "(rz|rts[german]|Z[polish]|r[polish]|rZ[polish])" |
| |
| "tz" "" "$" "(ts|tS[english+german])" |
| "tz" "^" "" "(ts[english+german+russian]|tS[english+german])" |
| "tz" "" "" "(ts[english+german+russian]|tz)" |
| |
| "zia" "" "[bcdgkpstwzż]" "(Za[polish]|za[polish]|zja)" |
| "zia" "" "" "(Za[polish]|zja)" |
| "zią" "" "[bp]" "(Zom[polish]|zom)" // polish |
| "zią" "" "" "(Zon[polish]|zon)" // polish |
| "zię" "" "[bp]" "(Zem[polish]|zem)" // polish |
| "zię" "" "" "(Zen[polish]|zen)" // polish |
| "zie" "" "[bcdgkpstwzż]" "(Ze[polish]|ze[polish]|ze|tsi[german])" |
| "zie" "" "" "(ze|Ze[polish]|tsi[german])" |
| "zio" "" "" "(Zo[polish]|zo)" |
| "ziu" "" "" "(Zu[polish]|zju)" |
| "zi" "" "" "(Zi[polish]|zi|tsi[german]|dzi[italian]|tsi[italian]|si[spanish])" |
| |
| "z" "" "$" "(s|ts[german]|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s in Sp, z Fr |
| "z" "" "[bdgv]" "(z|dz[italian]|Z[portuguese])" // dz It, Z/z Port, z Sp & Fr |
| "z" "" "[ptckf]" "(s|ts[italian]|S[portuguese])" // ts It, s/S/z Port, z/s Sp |
| |
| // VOWELS |
| "aue" "" "" "aue" |
| "oue" "" "" "(oue|ve[french])" |
| "eau" "" "" "o" // French |
| |
| "ae" "" "" "(Y[german]|aje[russian]|ae)" |
| "ai" "" "" "aj" |
| "au" "" "" "(au|o[french])" |
| "ay" "" "" "aj" |
| "ão" "" "" "(au|an)" // Port |
| "ãe" "" "" "(aj|an)" // Port |
| "ãi" "" "" "(aj|an)" // Port |
| "ea" "" "" "(ea|ja[romanian])" |
| "ee" "" "" "(i[english]|aje[russian]|e)" |
| "ei" "" "" "(aj|ej)" |
| "eu" "" "" "(eu|Yj[german]|ej[german]|oj[german]|Y[dutch])" |
| "ey" "" "" "(aj|ej)" |
| "ia" "" "" "ja" |
| "ie" "" "" "(i[german]|e[polish]|ije[russian]|Q[dutch]|je)" |
| "ii" "" "$" "i" // russian |
| "io" "" "" "(jo|e[russian])" |
| "iu" "" "" "ju" |
| "iy" "" "$" "i" // russian |
| "oe" "" "" "(Y[german]|oje[russian]|u[dutch]|oe)" |
| "oi" "" "" "oj" |
| "oo" "" "" "(u[english]|o)" |
| "ou" "" "" "(ou|u[french+greeklatin]|au[dutch])" |
| "où" "" "" "u" // french |
| "oy" "" "" "oj" |
| "õe" "" "" "(oj|on)" // Port |
| "ua" "" "" "va" |
| "ue" "" "" "(Q[german]|uje[russian]|ve)" |
| "ui" "" "" "(uj|vi|Y[dutch])" |
| "uu" "" "" "(u|Q[dutch])" |
| "uo" "" "" "(vo|o)" |
| "uy" "" "" "uj" |
| "ya" "" "" "ja" |
| "ye" "" "" "(je|ije[russian])" |
| "yi" "^" "" "i" |
| "yi" "" "$" "i" // russian |
| "yo" "" "" "(jo|e[russian])" |
| "yu" "" "" "ju" |
| "yy" "" "$" "i" // russian |
| |
| "i" "[áóéê]" "" "j" |
| "y" "[áóéê]" "" "j" |
| |
| "e" "^" "" "(e|je[russian])" |
| "e" "" "$" "(e|EE[english+french])" |
| |
| // LANGUAGE SPECIFIC CHARACTERS |
| "ą" "" "[bp]" "om" // polish |
| "ą" "" "" "on" // polish |
| "ä" "" "" "(Y|e)" |
| "á" "" "" "a" // Port & Sp |
| "à" "" "" "a" |
| "â" "" "" "a" |
| "ã" "" "" "(a|an)" // Port |
| "ă" "" "" "(e[romanian]|a)" // romanian |
| "č" "" "" "tS" // czech |
| "ć" "" "" "(tS[polish]|ts)" // polish |
| "ç" "" "" "(s|tS[turkish])" |
| "ď" "" "" "(d|dj[czech])" |
| "ę" "" "[bp]" "em" // polish |
| "ę" "" "" "en" // polish |
| "é" "" "" "e" |
| "è" "" "" "e" |
| "ê" "" "" "e" |
| "ě" "" "" "(e|je[czech])" |
| "ğ" "" "" "" // turkish |
| "í" "" "" "i" |
| "î" "" "" "i" |
| "ı" "" "" "(i|e[turkish]|[turkish])" |
| "ł" "" "" "l" |
| "ń" "" "" "(n|nj[polish])" // polish |
| "ñ" "" "" "(n|nj[spanish])" |
| "ó" "" "" "(u[polish]|o)" |
| "ô" "" "" "o" // Port & Fr |
| "õ" "" "" "(o|on[portuguese]|Y[hungarian])" |
| "ò" "" "" "o" // Sp & It |
| "ö" "" "" "Y" |
| "ř" "" "" "(r|rZ[czech])" |
| "ś" "" "" "(S[polish]|s)" |
| "ş" "" "" "S" // romanian+turkish |
| "š" "" "" "S" // czech |
| "ţ" "" "" "ts" // romanian |
| "ť" "" "" "(t|tj[czech])" |
| "ű" "" "" "Q" // hungarian |
| "ü" "" "" "(Q|u[portuguese+spanish])" |
| "ú" "" "" "u" |
| "ů" "" "" "u" // czech |
| "ù" "" "" "u" // french |
| "ý" "" "" "i" // czech |
| "ż" "" "" "Z" // polish |
| "ź" "" "" "(Z[polish]|z)" |
| |
| "ß" "" "" "s" // german |
| "'" "" "" "" // russian |
| "\"" "" "" "" // russian |
| |
| "o" "" "[bcćdgklłmnńrsśtwzźż]" "(O|P[polish])" |
| |
| // LATIN ALPHABET |
| "a" "" "" "A" |
| "b" "" "" "B" |
| "c" "" "" "(k|ts[polish+czech]|dZ[turkish])" |
| "d" "" "" "d" |
| "e" "" "" "E" |
| "f" "" "" "f" |
| //array("g" "" "" "(g|x[dutch])" // Dutch sound disabled |
| "g" "" "" "g" |
| "h" "" "" "(h|x[romanian]|H[french+portuguese+italian+spanish])" |
| "i" "" "" "I" |
| "j" "" "" "(j|x[spanish]|Z[french+romanian+turkish+portuguese])" |
| "k" "" "" "k" |
| "l" "" "" "l" |
| "m" "" "" "m" |
| "n" "" "" "n" |
| "o" "" "" "O" |
| "p" "" "" "p" |
| "q" "" "" "k" |
| "r" "" "" "r" |
| "s" "" "" "(s|S[portuguese])" |
| "t" "" "" "t" |
| "u" "" "" "U" |
| "v" "" "" "V" |
| "w" "" "" "(v|w[english+dutch])" |
| "x" "" "" "(ks|gz|S[portuguese+spanish])" // S/ks Port & Sp, gz Sp, It only ks |
| "y" "" "" "i" |
| "z" "" "" "(z|ts[german]|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z Port & Fr, z/s Sp |