| /************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *************************************************************/ |
| |
| |
| |
| // MARKER(update_precomp.py): autogen include statement, do not remove |
| #include "precompiled_i18npool.hxx" |
| |
| // prevent internal compiler error with MSVC6SP3 |
| #include <utility> |
| |
| #define TRANSLITERATION_ProlongedSoundMark_ja_JP |
| #include <transliteration_Ignore.hxx> |
| |
| using namespace com::sun::star::uno; |
| using namespace com::sun::star::lang; |
| using namespace rtl; |
| |
| namespace com { namespace sun { namespace star { namespace i18n { |
| |
| static sal_Unicode table_normalwidth[] = { |
| // 0x0000, // 0x3040 |
| 0x3041, // 0x3041 HIRAGANA LETTER SMALL A |
| 0x3042, // 0x3042 HIRAGANA LETTER A |
| 0x3043, // 0x3043 HIRAGANA LETTER SMALL I |
| 0x3044, // 0x3044 HIRAGANA LETTER I |
| 0x3045, // 0x3045 HIRAGANA LETTER SMALL U |
| 0x3046, // 0x3046 HIRAGANA LETTER U |
| 0x3047, // 0x3047 HIRAGANA LETTER SMALL E |
| 0x3048, // 0x3048 HIRAGANA LETTER E |
| 0x3049, // 0x3049 HIRAGANA LETTER SMALL O |
| 0x304a, // 0x304a HIRAGANA LETTER O |
| 0x3042, // 0x304b HIRAGANA LETTER KA |
| 0x3042, // 0x304c HIRAGANA LETTER GA |
| 0x3044, // 0x304d HIRAGANA LETTER KI |
| 0x3044, // 0x304e HIRAGANA LETTER GI |
| 0x3046, // 0x304f HIRAGANA LETTER KU |
| 0x3046, // 0x3050 HIRAGANA LETTER GU |
| 0x3048, // 0x3051 HIRAGANA LETTER KE |
| 0x3048, // 0x3052 HIRAGANA LETTER GE |
| 0x304a, // 0x3053 HIRAGANA LETTER KO |
| 0x304a, // 0x3054 HIRAGANA LETTER GO |
| 0x3042, // 0x3055 HIRAGANA LETTER SA |
| 0x3042, // 0x3056 HIRAGANA LETTER ZA |
| 0x3044, // 0x3057 HIRAGANA LETTER SI |
| 0x3044, // 0x3058 HIRAGANA LETTER ZI |
| 0x3046, // 0x3059 HIRAGANA LETTER SU |
| 0x3046, // 0x305a HIRAGANA LETTER ZU |
| 0x3048, // 0x305b HIRAGANA LETTER SE |
| 0x3048, // 0x305c HIRAGANA LETTER ZE |
| 0x304a, // 0x305d HIRAGANA LETTER SO |
| 0x304a, // 0x305e HIRAGANA LETTER ZO |
| 0x3042, // 0x305f HIRAGANA LETTER TA |
| 0x3042, // 0x3060 HIRAGANA LETTER DA |
| 0x3044, // 0x3061 HIRAGANA LETTER TI |
| 0x3044, // 0x3062 HIRAGANA LETTER DI |
| 0x3045, // 0x3063 HIRAGANA LETTER SMALL TU |
| 0x3046, // 0x3064 HIRAGANA LETTER TU |
| 0x3046, // 0x3065 HIRAGANA LETTER DU |
| 0x3048, // 0x3066 HIRAGANA LETTER TE |
| 0x3048, // 0x3067 HIRAGANA LETTER DE |
| 0x304a, // 0x3068 HIRAGANA LETTER TO |
| 0x304a, // 0x3069 HIRAGANA LETTER DO |
| 0x3042, // 0x306a HIRAGANA LETTER NA |
| 0x3044, // 0x306b HIRAGANA LETTER NI |
| 0x3046, // 0x306c HIRAGANA LETTER NU |
| 0x3048, // 0x306d HIRAGANA LETTER NE |
| 0x304a, // 0x306e HIRAGANA LETTER NO |
| 0x3042, // 0x306f HIRAGANA LETTER HA |
| 0x3042, // 0x3070 HIRAGANA LETTER BA |
| 0x3042, // 0x3071 HIRAGANA LETTER PA |
| 0x3044, // 0x3072 HIRAGANA LETTER HI |
| 0x3044, // 0x3073 HIRAGANA LETTER BI |
| 0x3044, // 0x3074 HIRAGANA LETTER PI |
| 0x3046, // 0x3075 HIRAGANA LETTER HU |
| 0x3046, // 0x3076 HIRAGANA LETTER BU |
| 0x3046, // 0x3077 HIRAGANA LETTER PU |
| 0x3048, // 0x3078 HIRAGANA LETTER HE |
| 0x3048, // 0x3079 HIRAGANA LETTER BE |
| 0x3048, // 0x307a HIRAGANA LETTER PE |
| 0x304a, // 0x307b HIRAGANA LETTER HO |
| 0x304a, // 0x307c HIRAGANA LETTER BO |
| 0x304a, // 0x307d HIRAGANA LETTER PO |
| 0x3042, // 0x307e HIRAGANA LETTER MA |
| 0x3044, // 0x307f HIRAGANA LETTER MI |
| 0x3046, // 0x3080 HIRAGANA LETTER MU |
| 0x3048, // 0x3081 HIRAGANA LETTER ME |
| 0x304a, // 0x3082 HIRAGANA LETTER MO |
| 0x3041, // 0x3083 HIRAGANA LETTER SMALL YA |
| 0x3042, // 0x3084 HIRAGANA LETTER YA |
| 0x3045, // 0x3085 HIRAGANA LETTER SMALL YU |
| 0x3046, // 0x3086 HIRAGANA LETTER YU |
| 0x3049, // 0x3087 HIRAGANA LETTER SMALL YO |
| 0x304a, // 0x3088 HIRAGANA LETTER YO |
| 0x3042, // 0x3089 HIRAGANA LETTER RA |
| 0x3044, // 0x308a HIRAGANA LETTER RI |
| 0x3046, // 0x308b HIRAGANA LETTER RU |
| 0x3048, // 0x308c HIRAGANA LETTER RE |
| 0x304a, // 0x308d HIRAGANA LETTER RO |
| 0x3041, // 0x308e HIRAGANA LETTER SMALL WA |
| 0x3042, // 0x308f HIRAGANA LETTER WA |
| 0x3044, // 0x3090 HIRAGANA LETTER WI |
| 0x3048, // 0x3091 HIRAGANA LETTER WE |
| 0x304a, // 0x3092 HIRAGANA LETTER WO |
| 0x0000, // 0x3093 HIRAGANA LETTER N |
| 0x3046, // 0x3094 HIRAGANA LETTER VU |
| 0x0000, // 0x3095 |
| 0x0000, // 0x3096 |
| 0x0000, // 0x3097 |
| 0x0000, // 0x3098 |
| 0x0000, // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK |
| 0x0000, // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK |
| 0x0000, // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK |
| 0x0000, // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK |
| 0x0000, // 0x309d HIRAGANA ITERATION MARK |
| 0x0000, // 0x309e HIRAGANA VOICED ITERATION MARK |
| 0x0000, // 0x309f |
| 0x0000, // 0x30a0 |
| 0x30a1, // 0x30a1 KATAKANA LETTER SMALL A |
| 0x30a2, // 0x30a2 KATAKANA LETTER A |
| 0x30a3, // 0x30a3 KATAKANA LETTER SMALL I |
| 0x30a4, // 0x30a4 KATAKANA LETTER I |
| 0x30a5, // 0x30a5 KATAKANA LETTER SMALL U |
| 0x30a6, // 0x30a6 KATAKANA LETTER U |
| 0x30a7, // 0x30a7 KATAKANA LETTER SMALL E |
| 0x30a8, // 0x30a8 KATAKANA LETTER E |
| 0x30a9, // 0x30a9 KATAKANA LETTER SMALL O |
| 0x30aa, // 0x30aa KATAKANA LETTER O |
| 0x30a2, // 0x30ab KATAKANA LETTER KA |
| 0x30a2, // 0x30ac KATAKANA LETTER GA |
| 0x30a4, // 0x30ad KATAKANA LETTER KI |
| 0x30a4, // 0x30ae KATAKANA LETTER GI |
| 0x30a6, // 0x30af KATAKANA LETTER KU |
| 0x30a6, // 0x30b0 KATAKANA LETTER GU |
| 0x30a8, // 0x30b1 KATAKANA LETTER KE |
| 0x30a8, // 0x30b2 KATAKANA LETTER GE |
| 0x30aa, // 0x30b3 KATAKANA LETTER KO |
| 0x30aa, // 0x30b4 KATAKANA LETTER GO |
| 0x30a2, // 0x30b5 KATAKANA LETTER SA |
| 0x30a2, // 0x30b6 KATAKANA LETTER ZA |
| 0x30a4, // 0x30b7 KATAKANA LETTER SI |
| 0x30a4, // 0x30b8 KATAKANA LETTER ZI |
| 0x30a6, // 0x30b9 KATAKANA LETTER SU |
| 0x30a6, // 0x30ba KATAKANA LETTER ZU |
| 0x30a8, // 0x30bb KATAKANA LETTER SE |
| 0x30a8, // 0x30bc KATAKANA LETTER ZE |
| 0x30aa, // 0x30bd KATAKANA LETTER SO |
| 0x30aa, // 0x30be KATAKANA LETTER ZO |
| 0x30a2, // 0x30bf KATAKANA LETTER TA |
| 0x30a2, // 0x30c0 KATAKANA LETTER DA |
| 0x30a4, // 0x30c1 KATAKANA LETTER TI |
| 0x30a4, // 0x30c2 KATAKANA LETTER DI |
| 0x30a5, // 0x30c3 KATAKANA LETTER SMALL TU |
| 0x30a6, // 0x30c4 KATAKANA LETTER TU |
| 0x30a6, // 0x30c5 KATAKANA LETTER DU |
| 0x30a8, // 0x30c6 KATAKANA LETTER TE |
| 0x30a8, // 0x30c7 KATAKANA LETTER DE |
| 0x30aa, // 0x30c8 KATAKANA LETTER TO |
| 0x30aa, // 0x30c9 KATAKANA LETTER DO |
| 0x30a2, // 0x30ca KATAKANA LETTER NA |
| 0x30a4, // 0x30cb KATAKANA LETTER NI |
| 0x30a6, // 0x30cc KATAKANA LETTER NU |
| 0x30a8, // 0x30cd KATAKANA LETTER NE |
| 0x30aa, // 0x30ce KATAKANA LETTER NO |
| 0x30a2, // 0x30cf KATAKANA LETTER HA |
| 0x30a2, // 0x30d0 KATAKANA LETTER BA |
| 0x30a2, // 0x30d1 KATAKANA LETTER PA |
| 0x30a4, // 0x30d2 KATAKANA LETTER HI |
| 0x30a4, // 0x30d3 KATAKANA LETTER BI |
| 0x30a4, // 0x30d4 KATAKANA LETTER PI |
| 0x30a6, // 0x30d5 KATAKANA LETTER HU |
| 0x30a6, // 0x30d6 KATAKANA LETTER BU |
| 0x30a6, // 0x30d7 KATAKANA LETTER PU |
| 0x30a8, // 0x30d8 KATAKANA LETTER HE |
| 0x30a8, // 0x30d9 KATAKANA LETTER BE |
| 0x30a8, // 0x30da KATAKANA LETTER PE |
| 0x30aa, // 0x30db KATAKANA LETTER HO |
| 0x30aa, // 0x30dc KATAKANA LETTER BO |
| 0x30aa, // 0x30dd KATAKANA LETTER PO |
| 0x30a2, // 0x30de KATAKANA LETTER MA |
| 0x30a4, // 0x30df KATAKANA LETTER MI |
| 0x30a6, // 0x30e0 KATAKANA LETTER MU |
| 0x30a8, // 0x30e1 KATAKANA LETTER ME |
| 0x30aa, // 0x30e2 KATAKANA LETTER MO |
| 0x30a1, // 0x30e3 KATAKANA LETTER SMALL YA |
| 0x30a2, // 0x30e4 KATAKANA LETTER YA |
| 0x30a5, // 0x30e5 KATAKANA LETTER SMALL YU |
| 0x30a6, // 0x30e6 KATAKANA LETTER YU |
| 0x30a9, // 0x30e7 KATAKANA LETTER SMALL YO |
| 0x30aa, // 0x30e8 KATAKANA LETTER YO |
| 0x30a2, // 0x30e9 KATAKANA LETTER RA |
| 0x30a4, // 0x30ea KATAKANA LETTER RI |
| 0x30a6, // 0x30eb KATAKANA LETTER RU |
| 0x30a8, // 0x30ec KATAKANA LETTER RE |
| 0x30aa, // 0x30ed KATAKANA LETTER RO |
| 0x30a1, // 0x30ee KATAKANA LETTER SMALL WA |
| 0x30a2, // 0x30ef KATAKANA LETTER WA |
| 0x30a4, // 0x30f0 KATAKANA LETTER WI |
| 0x30a8, // 0x30f1 KATAKANA LETTER WE |
| 0x30aa, // 0x30f2 KATAKANA LETTER WO |
| 0x0000, // 0x30f3 KATAKANA LETTER N |
| 0x30a6, // 0x30f4 KATAKANA LETTER VU |
| 0x30a1, // 0x30f5 KATAKANA LETTER SMALL KA |
| 0x30a7, // 0x30f6 KATAKANA LETTER SMALL KE |
| 0x30a2, // 0x30f7 KATAKANA LETTER VA |
| 0x30a4, // 0x30f8 KATAKANA LETTER VI |
| 0x30a8, // 0x30f9 KATAKANA LETTER VE |
| 0x30aa // 0x30fa KATAKANA LETTER VO |
| // 0x0000, // 0x30fb KATAKANA MIDDLE DOT |
| // 0x0000, // 0x30fc KATAKANA-HIRAGANA PROLONGED SOUND MARK |
| // 0x0000, // 0x30fd KATAKANA ITERATION MARK |
| // 0x0000, // 0x30fe KATAKANA VOICED ITERATION MARK |
| // 0x0000 // 0x30ff |
| }; |
| |
| static sal_Unicode table_halfwidth[] = { |
| // 0x0000, // 0xff61 HALFWIDTH IDEOGRAPHIC FULL STOP |
| // 0x0000, // 0xff62 HALFWIDTH LEFT CORNER BRACKET |
| // 0x0000, // 0xff63 HALFWIDTH RIGHT CORNER BRACKET |
| // 0x0000, // 0xff64 HALFWIDTH IDEOGRAPHIC COMMA |
| // 0x0000, // 0xff65 HALFWIDTH KATAKANA MIDDLE DOT |
| 0xff75, // 0xff66 HALFWIDTH KATAKANA LETTER WO |
| 0xff67, // 0xff67 HALFWIDTH KATAKANA LETTER SMALL A |
| 0xff68, // 0xff68 HALFWIDTH KATAKANA LETTER SMALL I |
| 0xff69, // 0xff69 HALFWIDTH KATAKANA LETTER SMALL U |
| 0xff6a, // 0xff6a HALFWIDTH KATAKANA LETTER SMALL E |
| 0xff6b, // 0xff6b HALFWIDTH KATAKANA LETTER SMALL O |
| 0xff67, // 0xff6c HALFWIDTH KATAKANA LETTER SMALL YA |
| 0xff69, // 0xff6d HALFWIDTH KATAKANA LETTER SMALL YU |
| 0xff6b, // 0xff6e HALFWIDTH KATAKANA LETTER SMALL YO |
| 0xff69, // 0xff6f HALFWIDTH KATAKANA LETTER SMALL TU |
| 0x0000, // 0xff70 HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK |
| 0xff71, // 0xff71 HALFWIDTH KATAKANA LETTER A |
| 0xff72, // 0xff72 HALFWIDTH KATAKANA LETTER I |
| 0xff73, // 0xff73 HALFWIDTH KATAKANA LETTER U |
| 0xff74, // 0xff74 HALFWIDTH KATAKANA LETTER E |
| 0xff75, // 0xff75 HALFWIDTH KATAKANA LETTER O |
| 0xff71, // 0xff76 HALFWIDTH KATAKANA LETTER KA |
| 0xff72, // 0xff77 HALFWIDTH KATAKANA LETTER KI |
| 0xff73, // 0xff78 HALFWIDTH KATAKANA LETTER KU |
| 0xff74, // 0xff79 HALFWIDTH KATAKANA LETTER KE |
| 0xff75, // 0xff7a HALFWIDTH KATAKANA LETTER KO |
| 0xff71, // 0xff7b HALFWIDTH KATAKANA LETTER SA |
| 0xff72, // 0xff7c HALFWIDTH KATAKANA LETTER SI |
| 0xff73, // 0xff7d HALFWIDTH KATAKANA LETTER SU |
| 0xff74, // 0xff7e HALFWIDTH KATAKANA LETTER SE |
| 0xff75, // 0xff7f HALFWIDTH KATAKANA LETTER SO |
| 0xff71, // 0xff80 HALFWIDTH KATAKANA LETTER TA |
| 0xff72, // 0xff81 HALFWIDTH KATAKANA LETTER TI |
| 0xff73, // 0xff82 HALFWIDTH KATAKANA LETTER TU |
| 0xff74, // 0xff83 HALFWIDTH KATAKANA LETTER TE |
| 0xff75, // 0xff84 HALFWIDTH KATAKANA LETTER TO |
| 0xff71, // 0xff85 HALFWIDTH KATAKANA LETTER NA |
| 0xff72, // 0xff86 HALFWIDTH KATAKANA LETTER NI |
| 0xff73, // 0xff87 HALFWIDTH KATAKANA LETTER NU |
| 0xff74, // 0xff88 HALFWIDTH KATAKANA LETTER NE |
| 0xff75, // 0xff89 HALFWIDTH KATAKANA LETTER NO |
| 0xff71, // 0xff8a HALFWIDTH KATAKANA LETTER HA |
| 0xff72, // 0xff8b HALFWIDTH KATAKANA LETTER HI |
| 0xff73, // 0xff8c HALFWIDTH KATAKANA LETTER HU |
| 0xff74, // 0xff8d HALFWIDTH KATAKANA LETTER HE |
| 0xff75, // 0xff8e HALFWIDTH KATAKANA LETTER HO |
| 0xff71, // 0xff8f HALFWIDTH KATAKANA LETTER MA |
| 0xff72, // 0xff90 HALFWIDTH KATAKANA LETTER MI |
| 0xff73, // 0xff91 HALFWIDTH KATAKANA LETTER MU |
| 0xff74, // 0xff92 HALFWIDTH KATAKANA LETTER ME |
| 0xff75, // 0xff93 HALFWIDTH KATAKANA LETTER MO |
| 0xff71, // 0xff94 HALFWIDTH KATAKANA LETTER YA |
| 0xff73, // 0xff95 HALFWIDTH KATAKANA LETTER YU |
| 0xff75, // 0xff96 HALFWIDTH KATAKANA LETTER YO |
| 0xff71, // 0xff97 HALFWIDTH KATAKANA LETTER RA |
| 0xff72, // 0xff98 HALFWIDTH KATAKANA LETTER RI |
| 0xff73, // 0xff99 HALFWIDTH KATAKANA LETTER RU |
| 0xff74, // 0xff9a HALFWIDTH KATAKANA LETTER RE |
| 0xff75, // 0xff9b HALFWIDTH KATAKANA LETTER RO |
| 0xff71 // 0xff9c HALFWIDTH KATAKANA LETTER WA |
| // 0x0000, // 0xff9d HALFWIDTH KATAKANA LETTER N |
| // 0x0000, // 0xff9e HALFWIDTH KATAKANA VOICED SOUND MARK |
| // 0x0000 // 0xff9f HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK |
| }; |
| |
| |
| OUString SAL_CALL |
| ignoreProlongedSoundMark_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) |
| throw(RuntimeException) |
| { |
| // Create a string buffer which can hold nCount + 1 characters. |
| // The reference count is 0 now. |
| rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h |
| sal_Unicode * dst = newStr->buffer; |
| const sal_Unicode * src = inStr.getStr() + startPos; |
| |
| sal_Int32 *p = 0; |
| sal_Int32 position = 0; |
| |
| if (useOffset) { |
| // Allocate nCount length to offset argument. |
| offset.realloc( nCount ); |
| p = offset.getArray(); |
| position = startPos; |
| } |
| |
| // |
| sal_Unicode previousChar = *src ++; |
| sal_Unicode currentChar; |
| |
| // Conversion |
| while (-- nCount > 0) { |
| currentChar = *src ++; |
| |
| if (currentChar == 0x30fc || // KATAKANA-HIRAGANA PROLONGED SOUND MARK |
| currentChar == 0xff70) { // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK |
| |
| if (0x3041 <= previousChar && previousChar <= 0x30fa) { |
| currentChar = table_normalwidth[ previousChar - 0x3041 ]; |
| } |
| else if (0xff66 <= previousChar && previousChar <= 0xff9c) { |
| currentChar = table_halfwidth[ previousChar - 0xff66 ]; |
| } |
| } |
| |
| if (useOffset) |
| *p ++ = position ++; |
| *dst ++ = previousChar; |
| previousChar = currentChar; |
| } |
| |
| if (nCount == 0) { |
| if (useOffset) |
| *p = position; |
| *dst ++ = previousChar; |
| } |
| |
| *dst = (sal_Unicode) 0; |
| |
| newStr->length = sal_Int32(dst - newStr->buffer); |
| if (useOffset) |
| offset.realloc(newStr->length); |
| return OUString( newStr, SAL_NO_ACQUIRE ); // take over ownership of <newStr> |
| |
| } |
| |
| } } } } |