| /************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *************************************************************/ |
| |
| |
| |
| // MARKER(update_precomp.py): autogen include statement, do not remove |
| #include "precompiled_i18npool.hxx" |
| |
| #include <cclass_unicode.hxx> |
| #include <unicode/uchar.h> |
| #include <rtl/math.hxx> |
| #include <rtl/ustring.hxx> |
| #include <com/sun/star/i18n/KParseTokens.hpp> |
| #include <com/sun/star/i18n/KParseType.hpp> |
| #include <com/sun/star/i18n/UnicodeType.hpp> |
| #include <com/sun/star/i18n/XLocaleData.hpp> |
| #include <com/sun/star/i18n/NativeNumberMode.hpp> |
| |
| #include <string.h> // memcpy() |
| |
| using namespace ::com::sun::star::uno; |
| using namespace ::com::sun::star::lang; |
| using namespace ::rtl; |
| |
| namespace com { namespace sun { namespace star { namespace i18n { |
| |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL = 0x00000000; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR = 0x00000001; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL = 0x00000002; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD = 0x00000004; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE = 0x00000008; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING = 0x00000010; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL = 0x00000040; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD = 0x00000080; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP = 0x00000100; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE = 0x00000200; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP = 0x00000400; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP = 0x00000800; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN = 0x00001000; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE = 0x00002000; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT = 0x00004000; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP = 0x20000000; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP = 0x40000000; |
| const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED = 0x80000000; |
| |
| #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT) |
| |
| // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]* |
| |
| const sal_uInt8 cclass_Unicode::nDefCnt = 128; |
| const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] = |
| { |
| // (...) == Calc formula compiler specific, commented out and modified |
| |
| /* \0 */ TOKEN_EXCLUDED, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| /* 9 \t */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL) |
| TOKEN_ILLEGAL, |
| /* 11 \v */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL) |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| TOKEN_ILLEGAL, |
| /* 32 */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| /* 33 ! */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| /* 34 " */ TOKEN_CHAR_STRING | TOKEN_STRING_SEP, |
| /* 35 # */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD_SEP) |
| /* 36 $ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD) |
| /* 37 % */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_VALUE) |
| /* 38 & */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| /* 39 ' */ TOKEN_NAME_SEP, |
| /* 40 ( */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| /* 41 ) */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| /* 42 * */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| /* 43 + */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN, |
| /* 44 , */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_VALUE | TOKEN_VALUE) |
| /* 45 - */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN, |
| /* 46 . */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE) |
| /* 47 / */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| //for ( i = 48; i < 58; i++ ) |
| /* 48 0 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, |
| /* 49 1 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, |
| /* 50 2 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, |
| /* 51 3 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, |
| /* 52 4 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, |
| /* 53 5 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, |
| /* 54 6 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, |
| /* 55 7 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, |
| /* 56 8 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, |
| /* 57 9 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, |
| /* 58 : */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD) |
| /* 59 ; */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| /* 60 < */ TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| /* 61 = */ TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| /* 62 > */ TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| /* 63 ? */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD) |
| /* 64 @ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) |
| //for ( i = 65; i < 91; i++ ) |
| /* 65 A */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 66 B */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 67 C */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 68 D */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 69 E */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 70 F */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 71 G */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 72 H */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 73 I */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 74 J */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 75 K */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 76 L */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 77 M */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 78 N */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 79 O */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 80 P */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 81 Q */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 82 R */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 83 S */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 84 T */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 85 U */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 86 V */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 87 W */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 88 X */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 89 Y */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 90 Z */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 91 [ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) |
| /* 92 \ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) |
| /* 93 ] */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) |
| /* 94 ^ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, |
| /* 95 _ */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 96 ` */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) |
| //for ( i = 97; i < 123; i++ ) |
| /* 97 a */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 98 b */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 99 c */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 100 d */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 101 e */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 102 f */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 103 g */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 104 h */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 105 i */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 106 j */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 107 k */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 108 l */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 109 m */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 110 n */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 111 o */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 112 p */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 113 q */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 114 r */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 115 s */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 116 t */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 117 u */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 118 v */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 119 w */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 120 x */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 121 y */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 122 z */ TOKEN_CHAR_WORD | TOKEN_WORD, |
| /* 123 { */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) |
| /* 124 | */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) |
| /* 125 } */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) |
| /* 126 ~ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) |
| /* 127 */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP // (TOKEN_ILLEGAL // UNUSED) |
| }; |
| |
| |
| const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] = |
| { |
| /* \0 */ KParseTokens::ASC_OTHER, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| /* 9 \t */ KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| /* 11 \v */ KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| KParseTokens::ASC_CONTROL, |
| /* 32 */ KParseTokens::ASC_OTHER, |
| /* 33 ! */ KParseTokens::ASC_OTHER, |
| /* 34 " */ KParseTokens::ASC_OTHER, |
| /* 35 # */ KParseTokens::ASC_OTHER, |
| /* 36 $ */ KParseTokens::ASC_DOLLAR, |
| /* 37 % */ KParseTokens::ASC_OTHER, |
| /* 38 & */ KParseTokens::ASC_OTHER, |
| /* 39 ' */ KParseTokens::ASC_OTHER, |
| /* 40 ( */ KParseTokens::ASC_OTHER, |
| /* 41 ) */ KParseTokens::ASC_OTHER, |
| /* 42 * */ KParseTokens::ASC_OTHER, |
| /* 43 + */ KParseTokens::ASC_OTHER, |
| /* 44 , */ KParseTokens::ASC_OTHER, |
| /* 45 - */ KParseTokens::ASC_OTHER, |
| /* 46 . */ KParseTokens::ASC_DOT, |
| /* 47 / */ KParseTokens::ASC_OTHER, |
| //for ( i = 48; i < 58; i++ ) |
| /* 48 0 */ KParseTokens::ASC_DIGIT, |
| /* 49 1 */ KParseTokens::ASC_DIGIT, |
| /* 50 2 */ KParseTokens::ASC_DIGIT, |
| /* 51 3 */ KParseTokens::ASC_DIGIT, |
| /* 52 4 */ KParseTokens::ASC_DIGIT, |
| /* 53 5 */ KParseTokens::ASC_DIGIT, |
| /* 54 6 */ KParseTokens::ASC_DIGIT, |
| /* 55 7 */ KParseTokens::ASC_DIGIT, |
| /* 56 8 */ KParseTokens::ASC_DIGIT, |
| /* 57 9 */ KParseTokens::ASC_DIGIT, |
| /* 58 : */ KParseTokens::ASC_COLON, |
| /* 59 ; */ KParseTokens::ASC_OTHER, |
| /* 60 < */ KParseTokens::ASC_OTHER, |
| /* 61 = */ KParseTokens::ASC_OTHER, |
| /* 62 > */ KParseTokens::ASC_OTHER, |
| /* 63 ? */ KParseTokens::ASC_OTHER, |
| /* 64 @ */ KParseTokens::ASC_OTHER, |
| //for ( i = 65; i < 91; i++ ) |
| /* 65 A */ KParseTokens::ASC_UPALPHA, |
| /* 66 B */ KParseTokens::ASC_UPALPHA, |
| /* 67 C */ KParseTokens::ASC_UPALPHA, |
| /* 68 D */ KParseTokens::ASC_UPALPHA, |
| /* 69 E */ KParseTokens::ASC_UPALPHA, |
| /* 70 F */ KParseTokens::ASC_UPALPHA, |
| /* 71 G */ KParseTokens::ASC_UPALPHA, |
| /* 72 H */ KParseTokens::ASC_UPALPHA, |
| /* 73 I */ KParseTokens::ASC_UPALPHA, |
| /* 74 J */ KParseTokens::ASC_UPALPHA, |
| /* 75 K */ KParseTokens::ASC_UPALPHA, |
| /* 76 L */ KParseTokens::ASC_UPALPHA, |
| /* 77 M */ KParseTokens::ASC_UPALPHA, |
| /* 78 N */ KParseTokens::ASC_UPALPHA, |
| /* 79 O */ KParseTokens::ASC_UPALPHA, |
| /* 80 P */ KParseTokens::ASC_UPALPHA, |
| /* 81 Q */ KParseTokens::ASC_UPALPHA, |
| /* 82 R */ KParseTokens::ASC_UPALPHA, |
| /* 83 S */ KParseTokens::ASC_UPALPHA, |
| /* 84 T */ KParseTokens::ASC_UPALPHA, |
| /* 85 U */ KParseTokens::ASC_UPALPHA, |
| /* 86 V */ KParseTokens::ASC_UPALPHA, |
| /* 87 W */ KParseTokens::ASC_UPALPHA, |
| /* 88 X */ KParseTokens::ASC_UPALPHA, |
| /* 89 Y */ KParseTokens::ASC_UPALPHA, |
| /* 90 Z */ KParseTokens::ASC_UPALPHA, |
| /* 91 [ */ KParseTokens::ASC_OTHER, |
| /* 92 \ */ KParseTokens::ASC_OTHER, |
| /* 93 ] */ KParseTokens::ASC_OTHER, |
| /* 94 ^ */ KParseTokens::ASC_OTHER, |
| /* 95 _ */ KParseTokens::ASC_UNDERSCORE, |
| /* 96 ` */ KParseTokens::ASC_OTHER, |
| //for ( i = 97; i < 123; i++ ) |
| /* 97 a */ KParseTokens::ASC_LOALPHA, |
| /* 98 b */ KParseTokens::ASC_LOALPHA, |
| /* 99 c */ KParseTokens::ASC_LOALPHA, |
| /* 100 d */ KParseTokens::ASC_LOALPHA, |
| /* 101 e */ KParseTokens::ASC_LOALPHA, |
| /* 102 f */ KParseTokens::ASC_LOALPHA, |
| /* 103 g */ KParseTokens::ASC_LOALPHA, |
| /* 104 h */ KParseTokens::ASC_LOALPHA, |
| /* 105 i */ KParseTokens::ASC_LOALPHA, |
| /* 106 j */ KParseTokens::ASC_LOALPHA, |
| /* 107 k */ KParseTokens::ASC_LOALPHA, |
| /* 108 l */ KParseTokens::ASC_LOALPHA, |
| /* 109 m */ KParseTokens::ASC_LOALPHA, |
| /* 110 n */ KParseTokens::ASC_LOALPHA, |
| /* 111 o */ KParseTokens::ASC_LOALPHA, |
| /* 112 p */ KParseTokens::ASC_LOALPHA, |
| /* 113 q */ KParseTokens::ASC_LOALPHA, |
| /* 114 r */ KParseTokens::ASC_LOALPHA, |
| /* 115 s */ KParseTokens::ASC_LOALPHA, |
| /* 116 t */ KParseTokens::ASC_LOALPHA, |
| /* 117 u */ KParseTokens::ASC_LOALPHA, |
| /* 118 v */ KParseTokens::ASC_LOALPHA, |
| /* 119 w */ KParseTokens::ASC_LOALPHA, |
| /* 120 x */ KParseTokens::ASC_LOALPHA, |
| /* 121 y */ KParseTokens::ASC_LOALPHA, |
| /* 122 z */ KParseTokens::ASC_LOALPHA, |
| /* 123 { */ KParseTokens::ASC_OTHER, |
| /* 124 | */ KParseTokens::ASC_OTHER, |
| /* 125 } */ KParseTokens::ASC_OTHER, |
| /* 126 ~ */ KParseTokens::ASC_OTHER, |
| /* 127 */ KParseTokens::ASC_OTHER |
| }; |
| |
| |
| // static |
| const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c ) |
| { |
| if ( !pStr ) |
| return NULL; |
| while ( *pStr ) |
| { |
| if ( *pStr == c ) |
| return pStr; |
| pStr++; |
| } |
| return NULL; |
| } |
| |
| |
| sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos ) |
| { |
| sal_Unicode c = aStr[nPos]; |
| if ( c < nDefCnt ) |
| return pParseTokensType[ sal_uInt8(c) ]; |
| else |
| { |
| |
| //! all KParseTokens::UNI_... must be matched |
| switch ( u_charType( (sal_uInt32) c ) ) |
| { |
| case U_UPPERCASE_LETTER : |
| return KParseTokens::UNI_UPALPHA; |
| case U_LOWERCASE_LETTER : |
| return KParseTokens::UNI_LOALPHA; |
| case U_TITLECASE_LETTER : |
| return KParseTokens::UNI_TITLE_ALPHA; |
| case U_MODIFIER_LETTER : |
| return KParseTokens::UNI_MODIFIER_LETTER; |
| case U_OTHER_LETTER : |
| // Non_Spacing_Mark could not be as leading character |
| if (nPos == 0) break; |
| // fall through, treat it as Other_Letter. |
| case U_NON_SPACING_MARK : |
| return KParseTokens::UNI_OTHER_LETTER; |
| case U_DECIMAL_DIGIT_NUMBER : |
| return KParseTokens::UNI_DIGIT; |
| case U_LETTER_NUMBER : |
| return KParseTokens::UNI_LETTER_NUMBER; |
| case U_OTHER_NUMBER : |
| return KParseTokens::UNI_OTHER_NUMBER; |
| } |
| |
| return KParseTokens::UNI_OTHER; |
| } |
| } |
| |
| sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale ) |
| { |
| sal_Bool bChanged = (aParserLocale.Language != rLocale.Language |
| || aParserLocale.Country != rLocale.Country |
| || aParserLocale.Variant != rLocale.Variant); |
| if ( bChanged ) |
| { |
| aParserLocale.Language = rLocale.Language; |
| aParserLocale.Country = rLocale.Country; |
| aParserLocale.Variant = rLocale.Variant; |
| } |
| if ( !xLocaleData.is() && xMSF.is() ) |
| { |
| Reference < |
| XInterface > xI = |
| xMSF->createInstance( OUString( |
| RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) ); |
| if ( xI.is() ) |
| { |
| Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) ); |
| x >>= xLocaleData; |
| } |
| } |
| return bChanged; |
| } |
| |
| |
| void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType, |
| const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType, |
| const OUString& userDefinedCharactersCont ) |
| { |
| bool bIntlEqual = (rLocale.Language == aParserLocale.Language && |
| rLocale.Country == aParserLocale.Country && |
| rLocale.Variant == aParserLocale.Variant); |
| if ( !pTable || !bIntlEqual || |
| startCharTokenType != nStartTypes || |
| contCharTokenType != nContTypes || |
| userDefinedCharactersStart != aStartChars || |
| userDefinedCharactersCont != aContChars ) |
| initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart, |
| contCharTokenType, userDefinedCharactersCont ); |
| } |
| |
| |
| void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType, |
| const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType, |
| const OUString& userDefinedCharactersCont ) |
| { |
| // (Re)Init |
| setupInternational( rLocale ); |
| // Memory of pTable is reused. |
| if ( !pTable ) |
| pTable = new UPT_FLAG_TYPE[nDefCnt]; |
| memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt ); |
| // Start and cont tables only need reallocation if different length. |
| if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() ) |
| { |
| delete [] pStart; |
| pStart = NULL; |
| } |
| if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() ) |
| { |
| delete [] pCont; |
| pCont = NULL; |
| } |
| nStartTypes = startCharTokenType; |
| nContTypes = contCharTokenType; |
| aStartChars = userDefinedCharactersStart; |
| aContChars = userDefinedCharactersCont; |
| |
| // specials |
| if( xLocaleData.is() ) |
| { |
| LocaleDataItem aItem = |
| xLocaleData->getLocaleItem( aParserLocale ); |
| //!TODO: theoretically separators may be a string, adjustment would have to be |
| //! done here and in parsing and in ::rtl::math::stringToDouble() |
| cGroupSep = aItem.thousandSeparator.getStr()[0]; |
| cDecimalSep = aItem.decimalSeparator.getStr()[0]; |
| } |
| |
| if ( cGroupSep < nDefCnt ) |
| pTable[cGroupSep] |= TOKEN_VALUE; |
| if ( cDecimalSep < nDefCnt ) |
| pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE; |
| |
| // Modify characters according to KParseTokens definitions. |
| { |
| using namespace KParseTokens; |
| sal_uInt8 i; |
| |
| if ( !(nStartTypes & ASC_UPALPHA) ) |
| for ( i = 65; i < 91; i++ ) |
| pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character |
| if ( !(nContTypes & ASC_UPALPHA) ) |
| for ( i = 65; i < 91; i++ ) |
| pTable[i] &= ~TOKEN_WORD; // not allowed as cont character |
| |
| if ( !(nStartTypes & ASC_LOALPHA) ) |
| for ( i = 97; i < 123; i++ ) |
| pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character |
| if ( !(nContTypes & ASC_LOALPHA) ) |
| for ( i = 97; i < 123; i++ ) |
| pTable[i] &= ~TOKEN_WORD; // not allowed as cont character |
| |
| if ( nStartTypes & ASC_DIGIT ) |
| for ( i = 48; i < 58; i++ ) |
| pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character |
| if ( !(nContTypes & ASC_DIGIT) ) |
| for ( i = 48; i < 58; i++ ) |
| pTable[i] &= ~TOKEN_WORD; // not allowed as cont character |
| |
| if ( !(nStartTypes & ASC_UNDERSCORE) ) |
| pTable[95] &= ~TOKEN_CHAR_WORD; // not allowed as start character |
| if ( !(nContTypes & ASC_UNDERSCORE) ) |
| pTable[95] &= ~TOKEN_WORD; // not allowed as cont character |
| |
| if ( nStartTypes & ASC_DOLLAR ) |
| pTable[36] |= TOKEN_CHAR_WORD; // allowed as start character |
| if ( nContTypes & ASC_DOLLAR ) |
| pTable[36] |= TOKEN_WORD; // allowed as cont character |
| |
| if ( nStartTypes & ASC_DOT ) |
| pTable[46] |= TOKEN_CHAR_WORD; // allowed as start character |
| if ( nContTypes & ASC_DOT ) |
| pTable[46] |= TOKEN_WORD; // allowed as cont character |
| |
| if ( nStartTypes & ASC_COLON ) |
| pTable[58] |= TOKEN_CHAR_WORD; // allowed as start character |
| if ( nContTypes & ASC_COLON ) |
| pTable[58] |= TOKEN_WORD; // allowed as cont character |
| |
| if ( nStartTypes & ASC_CONTROL ) |
| for ( i = 1; i < 32; i++ ) |
| pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character |
| if ( nContTypes & ASC_CONTROL ) |
| for ( i = 1; i < 32; i++ ) |
| pTable[i] |= TOKEN_WORD; // allowed as cont character |
| |
| if ( nStartTypes & ASC_ANY_BUT_CONTROL ) |
| for ( i = 32; i < nDefCnt; i++ ) |
| pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character |
| if ( nContTypes & ASC_ANY_BUT_CONTROL ) |
| for ( i = 32; i < nDefCnt; i++ ) |
| pTable[i] |= TOKEN_WORD; // allowed as cont character |
| |
| } |
| |
| // Merge in (positively override with) user defined characters. |
| // StartChars |
| sal_Int32 nLen = aStartChars.getLength(); |
| if ( nLen ) |
| { |
| if ( !pStart ) |
| pStart = new UPT_FLAG_TYPE[ nLen ]; |
| const sal_Unicode* p = aStartChars.getStr(); |
| for ( sal_Int32 j=0; j<nLen; j++, p++ ) |
| { |
| pStart[j] = TOKEN_CHAR_WORD; |
| if ( *p < nDefCnt ) |
| pTable[*p] |= TOKEN_CHAR_WORD; |
| } |
| } |
| // ContChars |
| nLen = aContChars.getLength(); |
| if ( nLen ) |
| { |
| if ( !pCont ) |
| pCont = new UPT_FLAG_TYPE[ nLen ]; |
| const sal_Unicode* p = aContChars.getStr(); |
| for ( sal_Int32 j=0; j<nLen; j++ ) |
| { |
| pCont[j] = TOKEN_WORD; |
| if ( *p < nDefCnt ) |
| pTable[*p] |= TOKEN_WORD; |
| } |
| } |
| } |
| |
| |
| void cclass_Unicode::destroyParserTable() |
| { |
| if ( pCont ) |
| delete [] pCont; |
| if ( pStart ) |
| delete [] pStart; |
| if ( pTable ) |
| delete [] pTable; |
| } |
| |
| |
| UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos ) |
| { |
| UPT_FLAG_TYPE nMask; |
| sal_Unicode c = aStr[nPos]; |
| if ( c < nDefCnt ) |
| nMask = pTable[ sal_uInt8(c) ]; |
| else |
| nMask = getFlagsExtended( aStr, nPos ); |
| switch ( eState ) |
| { |
| case ssGetChar : |
| case ssRewindFromValue : |
| case ssIgnoreLeadingInRewind : |
| case ssGetWordFirstChar : |
| if ( !(nMask & TOKEN_CHAR_WORD) ) |
| { |
| nMask |= getStartCharsFlags( c ); |
| if ( nMask & TOKEN_CHAR_WORD ) |
| nMask &= ~TOKEN_EXCLUDED; |
| } |
| break; |
| case ssGetValue : |
| case ssGetWord : |
| if ( !(nMask & TOKEN_WORD) ) |
| { |
| nMask |= getContCharsFlags( c ); |
| if ( nMask & TOKEN_WORD ) |
| nMask &= ~TOKEN_EXCLUDED; |
| } |
| break; |
| default: |
| ; // other cases aren't needed, no compiler warning |
| } |
| return nMask; |
| } |
| |
| |
| UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos ) |
| { |
| sal_Unicode c = aStr[nPos]; |
| if ( c == cGroupSep ) |
| return TOKEN_VALUE; |
| else if ( c == cDecimalSep ) |
| return TOKEN_CHAR_VALUE | TOKEN_VALUE; |
| using namespace i18n; |
| bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar || |
| eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind); |
| sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes); |
| |
| //! all KParseTokens::UNI_... must be matched |
| switch ( u_charType( (sal_uInt32) c ) ) |
| { |
| case U_UPPERCASE_LETTER : |
| return (nTypes & KParseTokens::UNI_UPALPHA) ? |
| (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : |
| TOKEN_ILLEGAL; |
| case U_LOWERCASE_LETTER : |
| return (nTypes & KParseTokens::UNI_LOALPHA) ? |
| (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : |
| TOKEN_ILLEGAL; |
| case U_TITLECASE_LETTER : |
| return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ? |
| (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : |
| TOKEN_ILLEGAL; |
| case U_MODIFIER_LETTER : |
| return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ? |
| (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : |
| TOKEN_ILLEGAL; |
| case U_NON_SPACING_MARK : |
| case U_COMBINING_SPACING_MARK : |
| // Non_Spacing_Mark can't be a leading character, |
| // nor can a spacing combining mark. |
| if (bStart) |
| return TOKEN_ILLEGAL; |
| // fall through, treat it as Other_Letter. |
| case U_OTHER_LETTER : |
| return (nTypes & KParseTokens::UNI_OTHER_LETTER) ? |
| (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : |
| TOKEN_ILLEGAL; |
| case U_DECIMAL_DIGIT_NUMBER : |
| return ((nTypes & KParseTokens::UNI_DIGIT) ? |
| (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : |
| TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; |
| case U_LETTER_NUMBER : |
| return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ? |
| (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : |
| TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; |
| case U_OTHER_NUMBER : |
| return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ? |
| (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : |
| TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; |
| case U_SPACE_SEPARATOR : |
| return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ? |
| TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) )); |
| } |
| |
| return TOKEN_ILLEGAL; |
| } |
| |
| |
| UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c ) |
| { |
| if ( pStart ) |
| { |
| const sal_Unicode* pStr = aStartChars.getStr(); |
| const sal_Unicode* p = StrChr( pStr, c ); |
| if ( p ) |
| return pStart[ p - pStr ]; |
| } |
| return TOKEN_ILLEGAL; |
| } |
| |
| |
| UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c ) |
| { |
| if ( pCont ) |
| { |
| const sal_Unicode* pStr = aContChars.getStr(); |
| const sal_Unicode* p = StrChr( pStr, c ); |
| if ( p ) |
| return pCont[ p - pStr ]; |
| } |
| return TOKEN_ILLEGAL; |
| } |
| |
| |
| void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType ) |
| { |
| using namespace i18n; |
| const sal_Unicode* const pTextStart = rText.getStr() + nPos; |
| eState = ssGetChar; |
| |
| //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue! |
| const sal_Unicode* pSym = pTextStart; |
| const sal_Unicode* pSrc = pSym; |
| OUString aSymbol; |
| sal_Unicode c = *pSrc; |
| sal_Unicode cLast = 0; |
| int nDecSeps = 0; |
| bool bQuote = false; |
| bool bMightBeWord = true; |
| bool bMightBeWordLast = true; |
| //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue! |
| |
| while ( (c != 0) && (eState != ssStop) ) |
| { |
| UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart ); |
| if ( nMask & TOKEN_EXCLUDED ) |
| eState = ssBounce; |
| if ( bMightBeWord ) |
| { // only relevant for ssGetValue fall back |
| if ( eState == ssGetChar || eState == ssRewindFromValue || |
| eState == ssIgnoreLeadingInRewind ) |
| bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0); |
| else |
| bMightBeWord = ((nMask & TOKEN_WORD) != 0); |
| } |
| sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart ); |
| pSrc++; |
| switch (eState) |
| { |
| case ssGetChar : |
| case ssRewindFromValue : |
| case ssIgnoreLeadingInRewind : |
| { |
| if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue |
| && eState != ssIgnoreLeadingInRewind ) |
| { //! must be first, may fall back to ssGetWord via bMightBeWord |
| eState = ssGetValue; |
| if ( nMask & TOKEN_VALUE_DIGIT ) |
| { |
| if ( 128 <= c ) |
| r.TokenType = KParseType::UNI_NUMBER; |
| else |
| r.TokenType = KParseType::ASC_NUMBER; |
| } |
| else if ( c == cDecimalSep ) |
| { |
| if ( *pSrc ) |
| ++nDecSeps; |
| else |
| eState = ssRewindFromValue; |
| // retry for ONE_SINGLE_CHAR or others |
| } |
| } |
| else if ( nMask & TOKEN_CHAR_WORD ) |
| { |
| eState = ssGetWord; |
| r.TokenType = KParseType::IDENTNAME; |
| } |
| else if ( nMask & TOKEN_NAME_SEP ) |
| { |
| eState = ssGetWordFirstChar; |
| bQuote = true; |
| pSym++; |
| nParseTokensType = 0; // will be taken of first real character |
| r.TokenType = KParseType::SINGLE_QUOTE_NAME; |
| } |
| else if ( nMask & TOKEN_CHAR_STRING ) |
| { |
| eState = ssGetString; |
| pSym++; |
| nParseTokensType = 0; // will be taken of first real character |
| r.TokenType = KParseType::DOUBLE_QUOTE_STRING; |
| } |
| else if ( nMask & TOKEN_CHAR_DONTCARE ) |
| { |
| if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS ) |
| { |
| if (eState == ssRewindFromValue) |
| eState = ssIgnoreLeadingInRewind; |
| r.LeadingWhiteSpace++; |
| pSym++; |
| nParseTokensType = 0; // wait until real character |
| bMightBeWord = true; |
| } |
| else |
| eState = ssBounce; |
| } |
| else if ( nMask & TOKEN_CHAR_BOOL ) |
| { |
| eState = ssGetBool; |
| r.TokenType = KParseType::BOOLEAN; |
| } |
| else if ( nMask & TOKEN_CHAR ) |
| { //! must be last |
| eState = ssStop; |
| r.TokenType = KParseType::ONE_SINGLE_CHAR; |
| } |
| else |
| eState = ssBounce; // not known |
| } |
| break; |
| case ssGetValue : |
| { |
| if ( nMask & TOKEN_VALUE_DIGIT ) |
| { |
| if ( 128 <= c ) |
| r.TokenType = KParseType::UNI_NUMBER; |
| else if ( r.TokenType != KParseType::UNI_NUMBER ) |
| r.TokenType = KParseType::ASC_NUMBER; |
| } |
| if ( nMask & TOKEN_VALUE ) |
| { |
| if ( c == cDecimalSep && ++nDecSeps > 1 ) |
| { |
| if ( pSrc - pTextStart == 2 ) |
| eState = ssRewindFromValue; |
| // consecutive separators |
| else |
| eState = ssStopBack; |
| } |
| // else keep it going |
| } |
| else if ( c == 'E' || c == 'e' ) |
| { |
| UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart ); |
| if ( nNext & TOKEN_VALUE_EXP ) |
| ; // keep it going |
| else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) ) |
| { // might be a numerical name (1.2efg) |
| eState = ssGetWord; |
| r.TokenType = KParseType::IDENTNAME; |
| } |
| else |
| eState = ssStopBack; |
| } |
| else if ( nMask & TOKEN_VALUE_SIGN ) |
| { |
| if ( (cLast == 'E') || (cLast == 'e') ) |
| { |
| UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart ); |
| if ( nNext & TOKEN_VALUE_EXP_VALUE ) |
| ; // keep it going |
| else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) ) |
| { // might be a numerical name (1.2e+fg) |
| eState = ssGetWord; |
| r.TokenType = KParseType::IDENTNAME; |
| } |
| else |
| eState = ssStopBack; |
| } |
| else if ( bMightBeWord ) |
| { // might be a numerical name (1.2+fg) |
| eState = ssGetWord; |
| r.TokenType = KParseType::IDENTNAME; |
| } |
| else |
| eState = ssStopBack; |
| } |
| else if ( bMightBeWord && (nMask & TOKEN_WORD) ) |
| { // might be a numerical name (1995.A1) |
| eState = ssGetWord; |
| r.TokenType = KParseType::IDENTNAME; |
| } |
| else |
| eState = ssStopBack; |
| } |
| break; |
| case ssGetWordFirstChar : |
| eState = ssGetWord; |
| // fall thru |
| case ssGetWord : |
| { |
| if ( nMask & TOKEN_WORD ) |
| ; // keep it going |
| else if ( nMask & TOKEN_NAME_SEP ) |
| { |
| if ( bQuote ) |
| { |
| if ( cLast == '\\' ) |
| { // escaped |
| aSymbol += OUString( pSym, pSrc - pSym - 2 ); |
| aSymbol += OUString( &c, 1); |
| } |
| else |
| { |
| eState = ssStop; |
| aSymbol += OUString( pSym, pSrc - pSym - 1 ); |
| } |
| pSym = pSrc; |
| } |
| else |
| eState = ssStopBack; |
| } |
| else if ( bQuote ) |
| ; // keep it going |
| else |
| eState = ssStopBack; |
| } |
| break; |
| case ssGetString : |
| { |
| if ( nMask & TOKEN_STRING_SEP ) |
| { |
| if ( cLast == '\\' ) |
| { // escaped |
| aSymbol += OUString( pSym, pSrc - pSym - 2 ); |
| aSymbol += OUString( &c, 1); |
| } |
| else if ( c == *pSrc && |
| !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) ) |
| { // "" => literal " escaped |
| aSymbol += OUString( pSym, pSrc - pSym ); |
| pSrc++; |
| } |
| else |
| { |
| eState = ssStop; |
| aSymbol += OUString( pSym, pSrc - pSym - 1 ); |
| } |
| pSym = pSrc; |
| } |
| } |
| break; |
| case ssGetBool : |
| { |
| if ( (nMask & TOKEN_BOOL) ) |
| eState = ssStop; // maximum 2: <, >, <>, <=, >= |
| else |
| eState = ssStopBack; |
| } |
| break; |
| case ssStopBack : |
| case ssBounce : |
| case ssStop : |
| ; // nothing, no compiler warning |
| break; |
| } |
| if ( eState == ssRewindFromValue ) |
| { |
| r = ParseResult(); |
| pSym = pTextStart; |
| pSrc = pSym; |
| aSymbol = OUString(); |
| c = *pSrc; |
| cLast = 0; |
| nDecSeps = 0; |
| bQuote = false; |
| bMightBeWord = true; |
| bMightBeWordLast = true; |
| } |
| else |
| { |
| if ( !(r.TokenType & nTokenType) ) |
| { |
| if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER)) |
| && (nTokenType & KParseType::IDENTNAME) && bMightBeWord ) |
| ; // keep a number that might be a word |
| else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) ) |
| ; // keep ignored white space |
| else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) ) |
| ; // keep uncertain value |
| else |
| eState = ssBounce; |
| } |
| if ( eState == ssBounce ) |
| { |
| r.TokenType = 0; |
| eState = ssStopBack; |
| } |
| if ( eState == ssStopBack ) |
| { // put back |
| pSrc--; |
| bMightBeWord = bMightBeWordLast; |
| eState = ssStop; |
| } |
| if ( eState != ssStop ) |
| { |
| if ( !r.StartFlags ) |
| r.StartFlags |= nParseTokensType; |
| else |
| r.ContFlags |= nParseTokensType; |
| } |
| bMightBeWordLast = bMightBeWord; |
| cLast = c; |
| c = *pSrc; |
| } |
| } |
| // r.CharLen is the length in characters (not code points) of the parsed |
| // token not including any leading white space, change this calculation if |
| // multi-code-point Unicode characters are to be supported. |
| r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace; |
| r.EndPos = nPos + (pSrc - pTextStart); |
| if ( r.TokenType & KParseType::ASC_NUMBER ) |
| { |
| r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace, |
| pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL ); |
| if ( bMightBeWord ) |
| r.TokenType |= KParseType::IDENTNAME; |
| } |
| else if ( r.TokenType & KParseType::UNI_NUMBER ) |
| { |
| if ( !xNatNumSup.is() ) |
| { |
| #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier" |
| if ( xMSF.is() ) |
| { |
| xNatNumSup = Reference< XNativeNumberSupplier > ( |
| xMSF->createInstance( OUString( |
| RTL_CONSTASCII_USTRINGPARAM( |
| NATIVENUMBERSUPPLIER_SERVICENAME ) ) ), |
| UNO_QUERY ); |
| } |
| if ( !xNatNumSup.is() ) |
| { |
| throw RuntimeException( OUString( |
| #ifdef DBG_UTIL |
| RTL_CONSTASCII_USTRINGPARAM( |
| "cclass_Unicode::parseText: can't instanciate " |
| NATIVENUMBERSUPPLIER_SERVICENAME ) |
| #endif |
| ), *this ); |
| } |
| #undef NATIVENUMBERSUPPLIER_SERVICENAME |
| } |
| OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos + |
| r.LeadingWhiteSpace ); |
| // transliterate to ASCII |
| aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale, |
| NativeNumberMode::NATNUM0 ); |
| r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL ); |
| if ( bMightBeWord ) |
| r.TokenType |= KParseType::IDENTNAME; |
| } |
| else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) ) |
| { |
| if ( pSym < pSrc ) |
| { //! open quote |
| aSymbol += OUString( pSym, pSrc - pSym ); |
| r.TokenType |= KParseType::MISSING_QUOTE; |
| } |
| r.DequotedNameOrString = aSymbol; |
| } |
| } |
| |
| } } } } |