AOO410/main/i18npool/source/characterclassification/cclass_unicode_parser.cxx - openoffice - Git at Google

 /**************************************************************
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  *
  *************************************************************/


 // MARKER(update_precomp.py): autogen include statement, do not remove
 #include "precompiled_i18npool.hxx"

 #include <cclass_unicode.hxx>
 #include <unicode/uchar.h>
 #include <rtl/math.hxx>
 #include <rtl/ustring.hxx>
 #include <com/sun/star/i18n/KParseTokens.hpp>
 #include <com/sun/star/i18n/KParseType.hpp>
 #include <com/sun/star/i18n/UnicodeType.hpp>
 #include <com/sun/star/i18n/XLocaleData.hpp>
 #include <com/sun/star/i18n/NativeNumberMode.hpp>

 #include <string.h>		// memcpy()

 using namespace ::com::sun::star::uno;
 using namespace ::com::sun::star::lang;
 using namespace ::rtl;

 namespace com { namespace sun { namespace star { namespace i18n {

 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL		= 0x00000000;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR			= 0x00000001;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL	= 0x00000002;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD	= 0x00000004;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE	= 0x00000008;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING	= 0x00000010;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL			= 0x00000040;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD			= 0x00000080;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP		= 0x00000100;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE		= 0x00000200;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP	= 0x00000400;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP	= 0x00000800;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN	= 0x00001000;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE	= 0x00002000;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT	= 0x00004000;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP		= 0x20000000;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP	= 0x40000000;
 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED		= 0x80000000;

 #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)

 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*

 const sal_uInt8 cclass_Unicode::nDefCnt = 128;
 const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
 {
 // (...) == Calc formula compiler specific, commented out and modified

 	/* \0 */	TOKEN_EXCLUDED,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 	/*  9 \t */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
 				TOKEN_ILLEGAL,
 	/* 11 \v */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 				TOKEN_ILLEGAL,
 	/*  32   */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	/*  33 ! */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	/*  34 " */	TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
 	/*  35 # */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD_SEP)
 	/*  36 $ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
 	/*  37 % */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_VALUE)
 	/*  38 & */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	/*  39 ' */	TOKEN_NAME_SEP,
 	/*  40 ( */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	/*  41 ) */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	/*  42 * */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	/*  43 + */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
 	/*  44 , */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_VALUE | TOKEN_VALUE)
 	/*  45 - */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
 	/*  46 . */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
 	/*  47 / */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	//for ( i = 48; i < 58; i++ )
 	/*  48 0 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 	/*  49 1 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 	/*  50 2 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 	/*  51 3 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 	/*  52 4 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 	/*  53 5 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 	/*  54 6 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 	/*  55 7 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 	/*  56 8 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 	/*  57 9 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 	/*  58 : */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD)
 	/*  59 ; */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	/*  60 < */	TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	/*  61 = */	TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	/*  62 > */	TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	/*  63 ? */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
 	/*  64 @ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
 	//for ( i = 65; i < 91; i++ )
 	/*  65 A */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  66 B */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  67 C */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  68 D */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  69 E */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  70 F */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  71 G */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  72 H */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  73 I */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  74 J */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  75 K */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  76 L */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  77 M */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  78 N */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  79 O */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  80 P */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  81 Q */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  82 R */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  83 S */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  84 T */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  85 U */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  86 V */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  87 W */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  88 X */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  89 Y */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  90 Z */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  91 [ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
 	/*  92 \ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
 	/*  93 ] */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
 	/*  94 ^ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 	/*  95 _ */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  96 ` */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
 	//for ( i = 97; i < 123; i++ )
 	/*  97 a */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  98 b */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/*  99 c */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 100 d */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 101 e */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 102 f */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 103 g */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 104 h */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 105 i */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 106 j */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 107 k */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 108 l */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 109 m */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 110 n */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 111 o */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 112 p */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 113 q */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 114 r */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 115 s */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 116 t */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 117 u */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 118 v */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 119 w */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 120 x */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 121 y */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 122 z */	TOKEN_CHAR_WORD | TOKEN_WORD,
 	/* 123 { */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
 	/* 124 | */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
 	/* 125 } */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
 	/* 126 ~ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
 	/* 127   */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP	// (TOKEN_ILLEGAL // UNUSED)
 };


 const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
 {
 	/* \0 */	KParseTokens::ASC_OTHER,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 	/*  9 \t */	KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 	/* 11 \v */	KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 				KParseTokens::ASC_CONTROL,
 	/*  32   */	KParseTokens::ASC_OTHER,
 	/*  33 ! */	KParseTokens::ASC_OTHER,
 	/*  34 " */	KParseTokens::ASC_OTHER,
 	/*  35 # */	KParseTokens::ASC_OTHER,
 	/*  36 $ */	KParseTokens::ASC_DOLLAR,
 	/*  37 % */	KParseTokens::ASC_OTHER,
 	/*  38 & */	KParseTokens::ASC_OTHER,
 	/*  39 ' */	KParseTokens::ASC_OTHER,
 	/*  40 ( */	KParseTokens::ASC_OTHER,
 	/*  41 ) */	KParseTokens::ASC_OTHER,
 	/*  42 * */	KParseTokens::ASC_OTHER,
 	/*  43 + */	KParseTokens::ASC_OTHER,
 	/*  44 , */	KParseTokens::ASC_OTHER,
 	/*  45 - */	KParseTokens::ASC_OTHER,
 	/*  46 . */	KParseTokens::ASC_DOT,
 	/*  47 / */	KParseTokens::ASC_OTHER,
 	//for ( i = 48; i < 58; i++ )
 	/*  48 0 */	KParseTokens::ASC_DIGIT,
 	/*  49 1 */	KParseTokens::ASC_DIGIT,
 	/*  50 2 */	KParseTokens::ASC_DIGIT,
 	/*  51 3 */	KParseTokens::ASC_DIGIT,
 	/*  52 4 */	KParseTokens::ASC_DIGIT,
 	/*  53 5 */	KParseTokens::ASC_DIGIT,
 	/*  54 6 */	KParseTokens::ASC_DIGIT,
 	/*  55 7 */	KParseTokens::ASC_DIGIT,
 	/*  56 8 */	KParseTokens::ASC_DIGIT,
 	/*  57 9 */	KParseTokens::ASC_DIGIT,
 	/*  58 : */	KParseTokens::ASC_COLON,
 	/*  59 ; */	KParseTokens::ASC_OTHER,
 	/*  60 < */	KParseTokens::ASC_OTHER,
 	/*  61 = */	KParseTokens::ASC_OTHER,
 	/*  62 > */	KParseTokens::ASC_OTHER,
 	/*  63 ? */	KParseTokens::ASC_OTHER,
 	/*  64 @ */	KParseTokens::ASC_OTHER,
 	//for ( i = 65; i < 91; i++ )
 	/*  65 A */	KParseTokens::ASC_UPALPHA,
 	/*  66 B */	KParseTokens::ASC_UPALPHA,
 	/*  67 C */	KParseTokens::ASC_UPALPHA,
 	/*  68 D */	KParseTokens::ASC_UPALPHA,
 	/*  69 E */	KParseTokens::ASC_UPALPHA,
 	/*  70 F */	KParseTokens::ASC_UPALPHA,
 	/*  71 G */	KParseTokens::ASC_UPALPHA,
 	/*  72 H */	KParseTokens::ASC_UPALPHA,
 	/*  73 I */	KParseTokens::ASC_UPALPHA,
 	/*  74 J */	KParseTokens::ASC_UPALPHA,
 	/*  75 K */	KParseTokens::ASC_UPALPHA,
 	/*  76 L */	KParseTokens::ASC_UPALPHA,
 	/*  77 M */	KParseTokens::ASC_UPALPHA,
 	/*  78 N */	KParseTokens::ASC_UPALPHA,
 	/*  79 O */	KParseTokens::ASC_UPALPHA,
 	/*  80 P */	KParseTokens::ASC_UPALPHA,
 	/*  81 Q */	KParseTokens::ASC_UPALPHA,
 	/*  82 R */	KParseTokens::ASC_UPALPHA,
 	/*  83 S */	KParseTokens::ASC_UPALPHA,
 	/*  84 T */	KParseTokens::ASC_UPALPHA,
 	/*  85 U */	KParseTokens::ASC_UPALPHA,
 	/*  86 V */	KParseTokens::ASC_UPALPHA,
 	/*  87 W */	KParseTokens::ASC_UPALPHA,
 	/*  88 X */	KParseTokens::ASC_UPALPHA,
 	/*  89 Y */	KParseTokens::ASC_UPALPHA,
 	/*  90 Z */	KParseTokens::ASC_UPALPHA,
 	/*  91 [ */	KParseTokens::ASC_OTHER,
 	/*  92 \ */	KParseTokens::ASC_OTHER,
 	/*  93 ] */	KParseTokens::ASC_OTHER,
 	/*  94 ^ */	KParseTokens::ASC_OTHER,
 	/*  95 _ */	KParseTokens::ASC_UNDERSCORE,
 	/*  96 ` */	KParseTokens::ASC_OTHER,
 	//for ( i = 97; i < 123; i++ )
 	/*  97 a */	KParseTokens::ASC_LOALPHA,
 	/*  98 b */	KParseTokens::ASC_LOALPHA,
 	/*  99 c */	KParseTokens::ASC_LOALPHA,
 	/* 100 d */	KParseTokens::ASC_LOALPHA,
 	/* 101 e */	KParseTokens::ASC_LOALPHA,
 	/* 102 f */	KParseTokens::ASC_LOALPHA,
 	/* 103 g */	KParseTokens::ASC_LOALPHA,
 	/* 104 h */	KParseTokens::ASC_LOALPHA,
 	/* 105 i */	KParseTokens::ASC_LOALPHA,
 	/* 106 j */	KParseTokens::ASC_LOALPHA,
 	/* 107 k */	KParseTokens::ASC_LOALPHA,
 	/* 108 l */	KParseTokens::ASC_LOALPHA,
 	/* 109 m */	KParseTokens::ASC_LOALPHA,
 	/* 110 n */	KParseTokens::ASC_LOALPHA,
 	/* 111 o */	KParseTokens::ASC_LOALPHA,
 	/* 112 p */	KParseTokens::ASC_LOALPHA,
 	/* 113 q */	KParseTokens::ASC_LOALPHA,
 	/* 114 r */	KParseTokens::ASC_LOALPHA,
 	/* 115 s */	KParseTokens::ASC_LOALPHA,
 	/* 116 t */	KParseTokens::ASC_LOALPHA,
 	/* 117 u */	KParseTokens::ASC_LOALPHA,
 	/* 118 v */	KParseTokens::ASC_LOALPHA,
 	/* 119 w */	KParseTokens::ASC_LOALPHA,
 	/* 120 x */	KParseTokens::ASC_LOALPHA,
 	/* 121 y */	KParseTokens::ASC_LOALPHA,
 	/* 122 z */	KParseTokens::ASC_LOALPHA,
 	/* 123 { */	KParseTokens::ASC_OTHER,
 	/* 124 | */	KParseTokens::ASC_OTHER,
 	/* 125 } */	KParseTokens::ASC_OTHER,
 	/* 126 ~ */	KParseTokens::ASC_OTHER,
 	/* 127   */	KParseTokens::ASC_OTHER
 };


 // static
 const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
 {
 	if ( !pStr )
 		return NULL;
 	while ( *pStr )
 	{
 		if ( *pStr == c )
 			return pStr;
 		pStr++;
 	}
 	return NULL;
 }


 sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
 {
 	sal_Unicode c = aStr[nPos];
 	if ( c < nDefCnt )
 		return pParseTokensType[ sal_uInt8(c) ];
 	else
 	{

 		//! all KParseTokens::UNI_... must be matched
         switch ( u_charType( (sal_uInt32) c ) )
 		{
 			case U_UPPERCASE_LETTER :
 				return KParseTokens::UNI_UPALPHA;
 			case U_LOWERCASE_LETTER :
 				return KParseTokens::UNI_LOALPHA;
 			case U_TITLECASE_LETTER :
 				return KParseTokens::UNI_TITLE_ALPHA;
 			case U_MODIFIER_LETTER :
 				return KParseTokens::UNI_MODIFIER_LETTER;
 			case U_OTHER_LETTER :
 				// Non_Spacing_Mark could not be as leading character
 				if (nPos == 0) break;
 				// fall through, treat it as Other_Letter.
 			case U_NON_SPACING_MARK :
 				return KParseTokens::UNI_OTHER_LETTER;
 			case U_DECIMAL_DIGIT_NUMBER :
 				return KParseTokens::UNI_DIGIT;
 			case U_LETTER_NUMBER :
 				return KParseTokens::UNI_LETTER_NUMBER;
 			case U_OTHER_NUMBER :
 				return KParseTokens::UNI_OTHER_NUMBER;
 		}

 		return KParseTokens::UNI_OTHER;
 	}
 }

 sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale )
 {
 	sal_Bool bChanged = (aParserLocale.Language != rLocale.Language
 		|| aParserLocale.Country != rLocale.Country
 		|| aParserLocale.Variant != rLocale.Variant);
 	if ( bChanged )
 	{
 		aParserLocale.Language = rLocale.Language;
 		aParserLocale.Country = rLocale.Country;
 		aParserLocale.Variant = rLocale.Variant;
 	}
 	if ( !xLocaleData.is() && xMSF.is() )
 	{
 		Reference <
 			XInterface > xI =
 			xMSF->createInstance( OUString(
 			RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) );
 		if ( xI.is() )
 		{
 			Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) );
 			x >>= xLocaleData;
 		}
 	}
 	return bChanged;
 }


 void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
             const OUString& userDefinedCharactersCont )
 {
 	bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
 		rLocale.Country == aParserLocale.Country &&
 		rLocale.Variant == aParserLocale.Variant);
 	if ( !pTable || !bIntlEqual ||
 			startCharTokenType != nStartTypes ||
 			contCharTokenType != nContTypes ||
 			userDefinedCharactersStart != aStartChars ||
 			userDefinedCharactersCont != aContChars )
 		initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
 			contCharTokenType, userDefinedCharactersCont );
 }


 void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
             const OUString& userDefinedCharactersCont )
 {
 	// (Re)Init
 	setupInternational( rLocale );
 	// Memory of pTable is reused.
 	if ( !pTable )
 		pTable = new UPT_FLAG_TYPE[nDefCnt];
 	memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
 	// Start and cont tables only need reallocation if different length.
     if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
 	{
 		delete [] pStart;
 		pStart = NULL;
 	}
     if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
 	{
 		delete [] pCont;
 		pCont = NULL;
 	}
 	nStartTypes = startCharTokenType;
 	nContTypes = contCharTokenType;
 	aStartChars = userDefinedCharactersStart;
 	aContChars = userDefinedCharactersCont;

 	// specials
 	if( xLocaleData.is() )
 	{
 		LocaleDataItem aItem =
 			xLocaleData->getLocaleItem( aParserLocale );
 //!TODO: theoretically separators may be a string, adjustment would have to be
 //! done here and in parsing and in ::rtl::math::stringToDouble()
 		cGroupSep = aItem.thousandSeparator.getStr()[0];
         cDecimalSep = aItem.decimalSeparator.getStr()[0];
 	}

 	if ( cGroupSep < nDefCnt )
 		pTable[cGroupSep] |= TOKEN_VALUE;
 	if ( cDecimalSep < nDefCnt )
 		pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;

 	// Modify characters according to KParseTokens definitions.
 	{
 		using namespace KParseTokens;
 		sal_uInt8 i;

 		if ( !(nStartTypes & ASC_UPALPHA) )
 			for ( i = 65; i < 91; i++ )
 				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
 		if ( !(nContTypes & ASC_UPALPHA) )
 			for ( i = 65; i < 91; i++ )
 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character

 		if ( !(nStartTypes & ASC_LOALPHA) )
 			for ( i = 97; i < 123; i++ )
 				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
 		if ( !(nContTypes & ASC_LOALPHA) )
 			for ( i = 97; i < 123; i++ )
 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character

 		if ( nStartTypes & ASC_DIGIT )
 			for ( i = 48; i < 58; i++ )
 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
 		if ( !(nContTypes & ASC_DIGIT) )
 			for ( i = 48; i < 58; i++ )
 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character

 		if ( !(nStartTypes & ASC_UNDERSCORE) )
 			pTable[95] &= ~TOKEN_CHAR_WORD;		// not allowed as start character
 		if ( !(nContTypes & ASC_UNDERSCORE) )
 			pTable[95] &= ~TOKEN_WORD;			// not allowed as cont character

 		if ( nStartTypes & ASC_DOLLAR )
 			pTable[36] |= TOKEN_CHAR_WORD;		// allowed as start character
 		if ( nContTypes & ASC_DOLLAR )
 			pTable[36] |= TOKEN_WORD;			// allowed as cont character

 		if ( nStartTypes & ASC_DOT )
 			pTable[46] |= TOKEN_CHAR_WORD;		// allowed as start character
 		if ( nContTypes & ASC_DOT )
 			pTable[46] |= TOKEN_WORD;			// allowed as cont character

 		if ( nStartTypes & ASC_COLON )
 			pTable[58] |= TOKEN_CHAR_WORD;		// allowed as start character
 		if ( nContTypes & ASC_COLON )
 			pTable[58] |= TOKEN_WORD;			// allowed as cont character

 		if ( nStartTypes & ASC_CONTROL )
 			for ( i = 1; i < 32; i++ )
 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
 		if ( nContTypes & ASC_CONTROL )
 			for ( i = 1; i < 32; i++ )
 				pTable[i] |= TOKEN_WORD;		// allowed as cont character

 		if ( nStartTypes & ASC_ANY_BUT_CONTROL )
 			for ( i = 32; i < nDefCnt; i++ )
 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
 		if ( nContTypes & ASC_ANY_BUT_CONTROL )
 			for ( i = 32; i < nDefCnt; i++ )
 				pTable[i] |= TOKEN_WORD;		// allowed as cont character

 	}

 	// Merge in (positively override with) user defined characters.
 	// StartChars
     sal_Int32 nLen = aStartChars.getLength();
 	if ( nLen )
 	{
 		if ( !pStart )
 			pStart = new UPT_FLAG_TYPE[ nLen ];
         const sal_Unicode* p = aStartChars.getStr();
         for ( sal_Int32 j=0; j<nLen; j++, p++ )
 		{
 			pStart[j] = TOKEN_CHAR_WORD;
             if ( *p < nDefCnt )
                 pTable[*p] |= TOKEN_CHAR_WORD;
 		}
 	}
 	// ContChars
     nLen = aContChars.getLength();
 	if ( nLen )
 	{
 		if ( !pCont )
 			pCont = new UPT_FLAG_TYPE[ nLen ];
         const sal_Unicode* p = aContChars.getStr();
         for ( sal_Int32 j=0; j<nLen; j++ )
 		{
 			pCont[j] = TOKEN_WORD;
             if ( *p < nDefCnt )
                 pTable[*p] |= TOKEN_WORD;
 		}
 	}
 }


 void cclass_Unicode::destroyParserTable()
 {
 	if ( pCont )
 		delete [] pCont;
 	if ( pStart )
 		delete [] pStart;
 	if ( pTable )
 		delete [] pTable;
 }


 UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
 {
 	UPT_FLAG_TYPE nMask;
 	sal_Unicode c = aStr[nPos];
 	if ( c < nDefCnt )
 		nMask = pTable[ sal_uInt8(c) ];
 	else
 		nMask = getFlagsExtended( aStr, nPos );
 	switch ( eState )
 	{
 		case ssGetChar :
         case ssRewindFromValue :
         case ssIgnoreLeadingInRewind :
 		case ssGetWordFirstChar :
 			if ( !(nMask & TOKEN_CHAR_WORD) )
 			{
 				nMask |= getStartCharsFlags( c );
 				if ( nMask & TOKEN_CHAR_WORD )
 					nMask &= ~TOKEN_EXCLUDED;
 			}
 		break;
 		case ssGetValue :
 		case ssGetWord :
 			if ( !(nMask & TOKEN_WORD) )
 			{
 				nMask |= getContCharsFlags( c );
 				if ( nMask & TOKEN_WORD )
 					nMask &= ~TOKEN_EXCLUDED;
 			}
 		break;
         default:
             ;   // other cases aren't needed, no compiler warning
 	}
 	return nMask;
 }


 UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
 {
 	sal_Unicode c = aStr[nPos];
 	if ( c == cGroupSep )
 		return TOKEN_VALUE;
 	else if ( c == cDecimalSep )
 		return TOKEN_CHAR_VALUE | TOKEN_VALUE;
 	using namespace i18n;
     bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
             eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
 	sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);

 	//! all KParseTokens::UNI_... must be matched
     switch ( u_charType( (sal_uInt32) c ) )
 	{
 		case U_UPPERCASE_LETTER :
 			return (nTypes & KParseTokens::UNI_UPALPHA) ?
 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 				TOKEN_ILLEGAL;
 		case U_LOWERCASE_LETTER :
 			return (nTypes & KParseTokens::UNI_LOALPHA) ?
 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 				TOKEN_ILLEGAL;
 		case U_TITLECASE_LETTER :
 			return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 				TOKEN_ILLEGAL;
 		case U_MODIFIER_LETTER :
 			return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 				TOKEN_ILLEGAL;
 		case U_NON_SPACING_MARK :
         case U_COMBINING_SPACING_MARK :
             // Non_Spacing_Mark can't be a leading character,
             // nor can a spacing combining mark.
             if (bStart)
                 return TOKEN_ILLEGAL;
 			// fall through, treat it as Other_Letter.
 		case U_OTHER_LETTER :
 			return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 				TOKEN_ILLEGAL;
 		case U_DECIMAL_DIGIT_NUMBER :
 			return ((nTypes & KParseTokens::UNI_DIGIT) ?
 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
 		case U_LETTER_NUMBER :
 			return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
 		case U_OTHER_NUMBER :
 			return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
 		case U_SPACE_SEPARATOR :
 			return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
 				TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
 	}

 	return TOKEN_ILLEGAL;
 }


 UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
 {
 	if ( pStart )
 	{
         const sal_Unicode* pStr = aStartChars.getStr();
 		const sal_Unicode* p = StrChr( pStr, c );
 		if ( p )
 			return pStart[ p - pStr ];
 	}
 	return TOKEN_ILLEGAL;
 }


 UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
 {
 	if ( pCont )
 	{
         const sal_Unicode* pStr = aContChars.getStr();
 		const sal_Unicode* p = StrChr( pStr, c );
 		if ( p )
 			return pCont[ p - pStr ];
 	}
 	return TOKEN_ILLEGAL;
 }


 void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
 {
 	using namespace i18n;
 	const sal_Unicode* const pTextStart = rText.getStr() + nPos;
 	eState = ssGetChar;

     //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
 	const sal_Unicode* pSym = pTextStart;
 	const sal_Unicode* pSrc = pSym;
 	OUString aSymbol;
 	sal_Unicode c = *pSrc;
 	sal_Unicode cLast = 0;
     int nDecSeps = 0;
 	bool bQuote = false;
 	bool bMightBeWord = true;
 	bool bMightBeWordLast = true;
     //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!

 	while ( (c != 0) && (eState != ssStop) )
 	{
 		UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
 		if ( nMask & TOKEN_EXCLUDED )
 			eState = ssBounce;
 		if ( bMightBeWord )
 		{	// only relevant for ssGetValue fall back
 			if ( eState == ssGetChar || eState == ssRewindFromValue ||
                     eState == ssIgnoreLeadingInRewind )
 				bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
 			else
 				bMightBeWord = ((nMask & TOKEN_WORD) != 0);
 		}
 		sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
 		pSrc++;
 		switch (eState)
 		{
 			case ssGetChar :
             case ssRewindFromValue :
             case ssIgnoreLeadingInRewind :
 			{
                 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
                         && eState != ssIgnoreLeadingInRewind )
 				{	//! must be first, may fall back to ssGetWord via bMightBeWord
 					eState = ssGetValue;
 					if ( nMask & TOKEN_VALUE_DIGIT )
                     {
                         if ( 128 <= c )
                             r.TokenType = KParseType::UNI_NUMBER;
                         else
                             r.TokenType = KParseType::ASC_NUMBER;
                     }
                     else if ( c == cDecimalSep )
                     {
                         if ( *pSrc )
                             ++nDecSeps;
                         else
                             eState = ssRewindFromValue;
                             // retry for ONE_SINGLE_CHAR or others
                     }
 				}
 				else if ( nMask & TOKEN_CHAR_WORD )
 				{
 					eState = ssGetWord;
 					r.TokenType = KParseType::IDENTNAME;
 				}
 				else if ( nMask & TOKEN_NAME_SEP )
 				{
 					eState = ssGetWordFirstChar;
 					bQuote = true;
 					pSym++;
 					nParseTokensType = 0;	// will be taken of first real character
 					r.TokenType = KParseType::SINGLE_QUOTE_NAME;
 				}
 				else if ( nMask & TOKEN_CHAR_STRING )
 				{
 					eState = ssGetString;
 					pSym++;
 					nParseTokensType = 0;	// will be taken of first real character
 					r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
 				}
 				else if ( nMask & TOKEN_CHAR_DONTCARE )
 				{
 					if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
 					{
                         if (eState == ssRewindFromValue)
                             eState = ssIgnoreLeadingInRewind;
 						r.LeadingWhiteSpace++;
 						pSym++;
 						nParseTokensType = 0;	// wait until real character
 						bMightBeWord = true;
 					}
 					else
 						eState = ssBounce;
 				}
 				else if ( nMask & TOKEN_CHAR_BOOL )
 				{
 					eState = ssGetBool;
 					r.TokenType = KParseType::BOOLEAN;
 				}
 				else if ( nMask & TOKEN_CHAR )
 				{	//! must be last
 					eState = ssStop;
 					r.TokenType = KParseType::ONE_SINGLE_CHAR;
 				}
 				else
 					eState = ssBounce;		// not known
 			}
 			break;
 			case ssGetValue :
 			{
                 if ( nMask & TOKEN_VALUE_DIGIT )
                 {
                     if ( 128 <= c )
                         r.TokenType = KParseType::UNI_NUMBER;
                     else if ( r.TokenType != KParseType::UNI_NUMBER )
                         r.TokenType = KParseType::ASC_NUMBER;
                 }
                 if ( nMask & TOKEN_VALUE )
                 {
                     if ( c == cDecimalSep && ++nDecSeps > 1 )
                     {
                         if ( pSrc - pTextStart == 2 )
                             eState = ssRewindFromValue;
                             // consecutive separators
                         else
                             eState = ssStopBack;
                     }
                     // else keep it going
                 }
 				else if ( c == 'E' || c == 'e' )
 				{
 					UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
 					if ( nNext & TOKEN_VALUE_EXP )
 						;	// keep it going
 					else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
 					{	// might be a numerical name (1.2efg)
 						eState = ssGetWord;
 						r.TokenType = KParseType::IDENTNAME;
 					}
 					else
 						eState = ssStopBack;
 				}
 				else if ( nMask & TOKEN_VALUE_SIGN )
 				{
 					if ( (cLast == 'E') || (cLast == 'e') )
 					{
 						UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
 						if ( nNext & TOKEN_VALUE_EXP_VALUE )
 							;	// keep it going
 						else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
 						{	// might be a numerical name (1.2e+fg)
 							eState = ssGetWord;
 							r.TokenType = KParseType::IDENTNAME;
 						}
 						else
 							eState = ssStopBack;
 					}
 					else if ( bMightBeWord )
 					{	// might be a numerical name (1.2+fg)
 						eState = ssGetWord;
 						r.TokenType = KParseType::IDENTNAME;
 					}
 					else
 						eState = ssStopBack;
 				}
 				else if ( bMightBeWord && (nMask & TOKEN_WORD) )
 				{	// might be a numerical name (1995.A1)
 					eState = ssGetWord;
 					r.TokenType = KParseType::IDENTNAME;
 				}
 				else
 					eState = ssStopBack;
 			}
 			break;
 			case ssGetWordFirstChar :
 				eState = ssGetWord;
 				// fall thru
 			case ssGetWord :
 			{
 				if ( nMask & TOKEN_WORD )
 					;	// keep it going
 				else if ( nMask & TOKEN_NAME_SEP )
 				{
 					if ( bQuote )
 					{
 						if ( cLast == '\\' )
 						{	// escaped
 							aSymbol += OUString( pSym, pSrc - pSym - 2 );
 							aSymbol += OUString( &c, 1);
 						}
 						else
 						{
 							eState = ssStop;
 							aSymbol += OUString( pSym, pSrc - pSym - 1 );
 						}
 						pSym = pSrc;
 					}
 					else
 						eState = ssStopBack;
 				}
 				else if ( bQuote )
 					;	// keep it going
 				else
 					eState = ssStopBack;
 			}
 			break;
 			case ssGetString :
 			{
 				if ( nMask & TOKEN_STRING_SEP )
 				{
 					if ( cLast == '\\' )
 					{	// escaped
 						aSymbol += OUString( pSym, pSrc - pSym - 2 );
 						aSymbol += OUString( &c, 1);
 					}
                     else if ( c == *pSrc &&
                             !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
 					{	// "" => literal " escaped
 						aSymbol += OUString( pSym, pSrc - pSym );
 						pSrc++;
 					}
 					else
 					{
 						eState = ssStop;
 						aSymbol += OUString( pSym, pSrc - pSym - 1 );
 					}
 					pSym = pSrc;
 				}
 			}
 			break;
 			case ssGetBool :
 			{
 				if ( (nMask & TOKEN_BOOL) )
 					eState = ssStop;	// maximum 2: <, >, <>, <=, >=
 				else
 					eState = ssStopBack;
 			}
 			break;
             case ssStopBack :
             case ssBounce :
             case ssStop :
                 ;   // nothing, no compiler warning
             break;
 		}
         if ( eState == ssRewindFromValue )
         {
             r = ParseResult();
             pSym = pTextStart;
             pSrc = pSym;
             aSymbol = OUString();
             c = *pSrc;
             cLast = 0;
             nDecSeps = 0;
             bQuote = false;
             bMightBeWord = true;
             bMightBeWordLast = true;
         }
         else
         {
             if ( !(r.TokenType & nTokenType) )
             {
                 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
                         && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
                     ;	// keep a number that might be a word
                 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
                     ;	// keep ignored white space
                 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
                     ;   // keep uncertain value
                 else
                     eState = ssBounce;
             }
             if ( eState == ssBounce )
             {
                 r.TokenType = 0;
                 eState = ssStopBack;
             }
             if ( eState == ssStopBack )
             {	// put back
                 pSrc--;
                 bMightBeWord = bMightBeWordLast;
                 eState = ssStop;
             }
             if ( eState != ssStop )
             {
                 if ( !r.StartFlags )
                     r.StartFlags |= nParseTokensType;
                 else
                     r.ContFlags |= nParseTokensType;
             }
             bMightBeWordLast = bMightBeWord;
             cLast = c;
             c = *pSrc;
         }
 	}
 	// r.CharLen is the length in characters (not code points) of the parsed
 	// token not including any leading white space, change this calculation if
 	// multi-code-point Unicode characters are to be supported.
 	r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
 	r.EndPos = nPos + (pSrc - pTextStart);
 	if ( r.TokenType & KParseType::ASC_NUMBER )
 	{
         r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
                 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
 		if ( bMightBeWord )
 			r.TokenType |= KParseType::IDENTNAME;
 	}
 	else if ( r.TokenType & KParseType::UNI_NUMBER )
 	{
         if ( !xNatNumSup.is() )
         {
 #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier"
             if ( xMSF.is() )
             {
                 xNatNumSup = Reference< XNativeNumberSupplier > (
                         xMSF->createInstance( OUString(
                                 RTL_CONSTASCII_USTRINGPARAM(
                                     NATIVENUMBERSUPPLIER_SERVICENAME ) ) ),
                         UNO_QUERY );
             }
             if ( !xNatNumSup.is() )
             {
                 throw RuntimeException( OUString(
 #ifdef DBG_UTIL
                     RTL_CONSTASCII_USTRINGPARAM(
                         "cclass_Unicode::parseText: can't instanciate "
                         NATIVENUMBERSUPPLIER_SERVICENAME )
 #endif
                     ), *this );
             }
 #undef NATIVENUMBERSUPPLIER_SERVICENAME
         }
         OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
                 r.LeadingWhiteSpace );
         // transliterate to ASCII
         aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
                 NativeNumberMode::NATNUM0 );
         r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
 		if ( bMightBeWord )
 			r.TokenType |= KParseType::IDENTNAME;
 	}
 	else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
 	{
 		if ( pSym < pSrc )
 		{	//! open quote
 			aSymbol += OUString( pSym, pSrc - pSym );
 			r.TokenType |= KParseType::MISSING_QUOTE;
 		}
 		r.DequotedNameOrString = aSymbol;
 	}
 }

 } } } }