AOO410/main/svtools/source/svrtf/parrtf.cxx - openoffice - Git at Google

 /**************************************************************
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  *
  *************************************************************/


 // MARKER(update_precomp.py): autogen include statement, do not remove
 #include "precompiled_svtools.hxx"

 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */

 #include <stdio.h>		                // for EOF
 #include <rtl/tencinfo.h>
 #include <tools/stream.hxx>
 #include <tools/debug.hxx>
 #include <svtools/rtftoken.h>
 #include <svtools/rtfkeywd.hxx>
 #include <svtools/parrtf.hxx>

 const int MAX_STRING_LEN = 1024;
 const int MAX_TOKEN_LEN = 128;

 #define RTF_ISDIGIT( c ) (c >= '0' && c <= '9')
 #define RTF_ISALPHA( c ) ( (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') )

 SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize )
 	: SvParser( rIn, nStackSize ),
 	eUNICodeSet( RTL_TEXTENCODING_MS_1252 ), 	// default ist ANSI-CodeSet
 	nUCharOverread( 1 )
 {
 	// default ist ANSI-CodeSet
 	SetSrcEncoding( RTL_TEXTENCODING_MS_1252 );
 	bRTF_InTextRead = false;
 }

 SvRTFParser::~SvRTFParser()
 {
 }


 int SvRTFParser::_GetNextToken()
 {
 	int nRet = 0;
 	do {
 		int bNextCh = true;
 		switch( nNextCh )
 		{
 		case '\\':
 			{
 				// Steuerzeichen
 				switch( nNextCh = GetNextChar() )
 				{
 				case '{':
 				case '}':
 				case '\\':
 				case '+':		// habe ich in einem RTF-File gefunden
 				case '~':		// nonbreaking space
 				case '-':		// optional hyphen
 				case '_':		// nonbreaking hyphen
 				case '\'':		// HexValue
 					nNextCh = '\\';
 					rInput.SeekRel( -1 );
 					ScanText();
 					nRet = RTF_TEXTTOKEN;
 					bNextCh = 0 == nNextCh;
 					break;

 				case '*':		// ignoreflag
 					nRet = RTF_IGNOREFLAG;
 					break;
 				case ':':	 	// subentry in an index entry
 					nRet = RTF_SUBENTRYINDEX;
 					break;
 				case '|':		// formula-charakter
 					nRet = RTF_FORMULA;
 					break;

 				case 0x0a:
 				case 0x0d:
 					nRet = RTF_PAR;
 					break;

 				default:
 					if( RTF_ISALPHA( nNextCh ) )
 					{
 						aToken = '\\';
 						{
 							String aStrBuffer;
 							sal_Unicode* pStr = aStrBuffer.AllocBuffer(
 															MAX_TOKEN_LEN );
 							xub_StrLen nStrLen = 0;
 							do {
 								*(pStr + nStrLen++) = nNextCh;
 								if( MAX_TOKEN_LEN == nStrLen )
 								{
 									aToken += aStrBuffer;
 									aToken.GetBufferAccess();  // make unique string!
 									nStrLen = 0;
 								}
 								nNextCh = GetNextChar();
 							} while( RTF_ISALPHA( nNextCh ) );
 							if( nStrLen )
 							{
 								aStrBuffer.ReleaseBufferAccess( nStrLen );
 								aToken += aStrBuffer;
 							}
 						}

 						// Minus fuer numerischen Parameter
 						int bNegValue = false;
 						if( '-' == nNextCh )
 						{
 							bNegValue = true;
 							nNextCh = GetNextChar();
 						}

 						// evt. Numerischer Parameter
 						if( RTF_ISDIGIT( nNextCh ) )
 						{
 							nTokenValue = 0;
 							do {
 								nTokenValue *= 10;
 								nTokenValue += nNextCh - '0';
 								nNextCh = GetNextChar();
 							} while( RTF_ISDIGIT( nNextCh ) );
 							if( bNegValue )
 								nTokenValue = -nTokenValue;
 							bTokenHasValue=true;
 						}
 						else if( bNegValue )		// das Minus wieder zurueck
 						{
 							nNextCh = '-';
 							rInput.SeekRel( -1 );
 						}
 						if( ' ' == nNextCh )		// Blank gehoert zum Token!
 							nNextCh = GetNextChar();

 						// suche das Token in der Tabelle:
 						if( 0 == (nRet = GetRTFToken( aToken )) )
 							// Unknown Control
 							nRet = RTF_UNKNOWNCONTROL;

 						// bug 76812 - unicode token handled as normal text
 						bNextCh = false;
 						switch( nRet )
 						{
 						case RTF_UC:
 							if( 0 <= nTokenValue )
 							{
 								nUCharOverread = (sal_uInt8)nTokenValue;
 #if 1
                                 //cmc: other ifdef breaks #i3584
 								aParserStates.top().
 									nUCharOverread = nUCharOverread;
 #else
 								if( !nUCharOverread )
 									nUCharOverread = aParserStates.top().nUCharOverread;
 								else
 									aParserStates.top().
 										nUCharOverread = nUCharOverread;
 #endif
 							}
 							aToken.Erase(); // #i47831# erase token to prevent the token from beeing treated as text
 							// read next token
 							nRet = 0;
 							break;

 						case RTF_UPR:
 							if (!_inSkipGroup) {
 							// UPR - overread the group with the ansi
 							//       informations
 							while( '{' != _GetNextToken() )
 								;
 							SkipGroup();
 							_GetNextToken();  // overread the last bracket
 							nRet = 0;
 							}
 							break;

 						case RTF_U:
 							if( !bRTF_InTextRead )
 							{
 								nRet = RTF_TEXTTOKEN;
 								aToken = (sal_Unicode)nTokenValue;

 								// overread the next n "RTF" characters. This
 								// can be also \{, \}, \'88
 								for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
 								{
 									sal_Unicode cAnsi = nNextCh;
 									while( 0xD == cAnsi )
 										cAnsi = GetNextChar();
 									while( 0xA == cAnsi )
 										cAnsi = GetNextChar();

 									if( '\\' == cAnsi &&
 										'\'' == ( cAnsi = GetNextChar() ))
 										// HexValue ueberlesen
 										cAnsi = GetHexValue();
 									nNextCh = GetNextChar();
 								}
 								ScanText();
 								bNextCh = 0 == nNextCh;
 							}
 							break;
 						}
 					}
 					else if( SVPAR_PENDING != eState )
 					{
 						// Bug 34631 - "\ " ueberlesen - Blank als Zeichen
 						// eState = SVPAR_ERROR;
 						bNextCh = false;
 					}
 					break;
 				}
 			}
 			break;

 		case sal_Unicode(EOF):
 			eState = SVPAR_ACCEPTED;
 			nRet = nNextCh;
 			break;

 		case '{':
 			{
 				if( 0 <= nOpenBrakets )
 				{
 					RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() );
                     aParserStates.push( aState );
 				}
 				++nOpenBrakets;
                 DBG_ASSERT(
                     static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
                     "ParserStateStack unequal to bracket count" );
 				nRet = nNextCh;
 			}
 			break;

 		case '}':
 			--nOpenBrakets;
 			if( 0 <= nOpenBrakets )
 			{
                 aParserStates.pop();
 				if( !aParserStates.empty() )
 				{
 					const RtfParserState_Impl& rRPS =
 							aParserStates.top();
 					nUCharOverread = rRPS.nUCharOverread;
 					SetSrcEncoding( rRPS.eCodeSet );
 				}
 				else
 				{
 					nUCharOverread = 1;
 					SetSrcEncoding( GetCodeSet() );
 				}
 			}
             DBG_ASSERT(
                 static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
                 "ParserStateStack unequal to bracket count" );
 			nRet = nNextCh;
 			break;

 		case 0x0d:
 		case 0x0a:
 			break;

 		default:
 			// es folgt normaler Text
 			ScanText();
 			nRet = RTF_TEXTTOKEN;
 			bNextCh = 0 == nNextCh;
 			break;
 		}

 		if( bNextCh )
 			nNextCh = GetNextChar();

 	} while( !nRet && SVPAR_WORKING == eState );
 	return nRet;
 }


 sal_Unicode SvRTFParser::GetHexValue()
 {
 	// Hex-Wert sammeln
 	register int n;
 	register sal_Unicode nHexVal = 0;

 	for( n = 0; n < 2; ++n )
 	{
 		nHexVal *= 16;
 		nNextCh = GetNextChar();
 		if( nNextCh >= '0' && nNextCh <= '9' )
 			nHexVal += (nNextCh - 48);
 		else if( nNextCh >= 'a' && nNextCh <= 'f' )
 			nHexVal += (nNextCh - 87);
 		else if( nNextCh >= 'A' && nNextCh <= 'F' )
 			nHexVal += (nNextCh - 55);
 	}
 	return nHexVal;
 }

 void SvRTFParser::ScanText( const sal_Unicode cBreak )
 {
 	String aStrBuffer;
 	int bWeiter = true;
 	while( bWeiter && IsParserWorking() && aStrBuffer.Len() < MAX_STRING_LEN)
 	{
 		int bNextCh = true;
 		switch( nNextCh )
 		{
 		case '\\':
 			{
 				switch (nNextCh = GetNextChar())
 				{
 				case '\'':
 					{

 #if 0
                         // #i35653 patch from cmc
                         ByteString aByteString(static_cast<char>(GetHexValue()));
                         if (aByteString.Len())
                             aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
 #else
                         ByteString aByteString;
                         while (1)
                         {
                             aByteString.Append((char)GetHexValue());

                             bool bBreak = false;
                             sal_Char nSlash = '\\';
                             while (!bBreak)
                             {
 								wchar_t __next=GetNextChar();
 								if (__next>0xFF) // fix for #i43933# and #i35653#
 								{
 									if (aByteString.Len())
 										aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
 									aStrBuffer.Append((sal_Unicode)__next);

 									aByteString.Erase();
 									continue;
 								}
                                 nSlash = (sal_Char)__next;
                                 while (nSlash == 0xD || nSlash == 0xA)
                                     nSlash = (sal_Char)GetNextChar();

                                 switch (nSlash)
                                 {
                                     case '{':
                                     case '}':
                                     case '\\':
                                         bBreak = true;
                                         break;
                                     default:
                                         aByteString.Append(nSlash);
                                         break;
                                 }
                             }

                             nNextCh = GetNextChar();

                             if (nSlash != '\\' || nNextCh != '\'')
                             {
                                 rInput.SeekRel(-1);
                                 nNextCh = nSlash;
                                 break;
                             }
                         }

                         bNextCh = false;

                         if (aByteString.Len())
                             aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
 #endif
                     }
 					break;
 				case '\\':
 				case '}':
 				case '{':
 				case '+':		// habe ich in einem RTF-File gefunden
 					aStrBuffer.Append(nNextCh);
 					break;
 				case '~':		// nonbreaking space
 					aStrBuffer.Append(static_cast< sal_Unicode >(0xA0));
 					break;
 				case '-':		// optional hyphen
 					aStrBuffer.Append(static_cast< sal_Unicode >(0xAD));
 					break;
 				case '_':		// nonbreaking hyphen
 					aStrBuffer.Append(static_cast< sal_Unicode >(0x2011));
 					break;

 				case 'u':
 					// UNI-Code Zeichen lesen
 					{
 						nNextCh = GetNextChar();
 						rInput.SeekRel( -2 );

 						if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) )
 						{
 							bRTF_InTextRead = true;

 							String sSave( aToken );
 							nNextCh = '\\';
                             #ifdef DBG_UTIL
 							int nToken =
                             #endif
                                 _GetNextToken();
 							DBG_ASSERT( RTF_U == nToken, "doch kein UNI-Code Zeichen" );
 							// dont convert symbol chars
 							aStrBuffer.Append(
                                 static_cast< sal_Unicode >(nTokenValue));

 							// overread the next n "RTF" characters. This
 							// can be also \{, \}, \'88
 							for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
 							{
 								sal_Unicode cAnsi = nNextCh;
 								while( 0xD == cAnsi )
 									cAnsi = GetNextChar();
 								while( 0xA == cAnsi )
 									cAnsi = GetNextChar();

 								if( '\\' == cAnsi &&
 									'\'' == ( cAnsi = GetNextChar() ))
 									// HexValue ueberlesen
 									cAnsi = GetHexValue();
 								nNextCh = GetNextChar();
 							}
 							bNextCh = false;
 							aToken = sSave;
 							bRTF_InTextRead = false;
 						}
 						else
 						{
 							nNextCh = '\\';
 							bWeiter = false;		// Abbrechen, String zusammen
 						}
 					}
 					break;

 				default:
 					rInput.SeekRel( -1 );
 					nNextCh = '\\';
 					bWeiter = false;		// Abbrechen, String zusammen
 					break;
 				}
 			}
 			break;

 		case sal_Unicode(EOF):
 				eState = SVPAR_ERROR;
 				// weiter
 		case '{':
 		case '}':
 			bWeiter = false;
 			break;

 		case 0x0a:
 		case 0x0d:
 			break;

 		default:
 			if( nNextCh == cBreak || aStrBuffer.Len() >= MAX_STRING_LEN)
 				bWeiter = false;
 			else
 			{
 				do {
 					// alle anderen Zeichen kommen in den Text
 					aStrBuffer.Append(nNextCh);

 					if (sal_Unicode(EOF) == (nNextCh = GetNextChar()))
 					{
                         if (aStrBuffer.Len())
 		                    aToken += aStrBuffer;
 						return;
 					}
 				} while
                 (
                     (RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) &&
                     (aStrBuffer.Len() < MAX_STRING_LEN)
                 );
 				bNextCh = false;
 			}
 		}

 		if( bWeiter && bNextCh )
 			nNextCh = GetNextChar();
 	}

 	if (aStrBuffer.Len())
 		aToken += aStrBuffer;
 }


 short SvRTFParser::_inSkipGroup=0;

 void SvRTFParser::SkipGroup()
 {
 short nBrackets=1;
 if (_inSkipGroup>0)
 	return;
 _inSkipGroup++;
 #if 1	//#i16185# fecking \bin keyword
     do
     {
         switch (nNextCh)
         {
             case '{':
                 ++nBrackets;
                 break;
             case '}':
 				if (!--nBrackets) {
 					_inSkipGroup--;
                     return;
 				}
                 break;
         }
         int nToken = _GetNextToken();
         if (nToken == RTF_BIN)
         {
             rInput.SeekRel(-1);
             rInput.SeekRel(nTokenValue);
 		    nNextCh = GetNextChar();
         }
 		while (nNextCh==0xa || nNextCh==0xd)
 		{
 			nNextCh = GetNextChar();
 		}
     } while (sal_Unicode(EOF) != nNextCh && IsParserWorking());
 #else
 	sal_Unicode cPrev = 0;
 	do {
 		switch( nNextCh )
 		{
 		case '{':
 			if( '\\' != cPrev )
 				++nBrackets;
 			break;

 		case '}':
 			if( '\\' != cPrev && !--nBrackets )
 				return;
 			break;

 		case '\\':
 			if( '\\' == cPrev )
 				nNextCh = 0;
 			break;
 		}
 		cPrev = nNextCh;
 		nNextCh = GetNextChar();
 	} while( sal_Unicode(EOF) != nNextCh && IsParserWorking() );
 #endif

 	if( SVPAR_PENDING != eState && '}' != nNextCh )
 		eState = SVPAR_ERROR;
 	_inSkipGroup--;
 }

 void SvRTFParser::ReadUnknownData()	{ SkipGroup(); }
 void SvRTFParser::ReadBitmapData()	{ SkipGroup(); }
 void SvRTFParser::ReadOLEData()		{ SkipGroup(); }


 SvParserState SvRTFParser::CallParser()
 {
 	sal_Char cFirstCh;
     nNextChPos = rInput.Tell();
 	rInput >> cFirstCh; nNextCh = cFirstCh;
 	eState = SVPAR_WORKING;
 	nOpenBrakets = 0;
 	SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
 	eUNICodeSet = RTL_TEXTENCODING_MS_1252; 	// default ist ANSI-CodeSet

 	// die 1. beiden Token muessen '{' und \\rtf sein !!
 	if( '{' == GetNextToken() && RTF_RTF == GetNextToken() )
 	{
 		AddRef();
 		Continue( 0 );
 		if( SVPAR_PENDING != eState )
 			ReleaseRef();		// dann brauchen wir den Parser nicht mehr!
 	}
 	else
 		eState = SVPAR_ERROR;

 	return eState;
 }

 void SvRTFParser::Continue( int nToken )
 {
 //	DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(),
 //				"Zeichensatz wurde geaendert." );

 	if( !nToken )
 		nToken = GetNextToken();

 	while( IsParserWorking() )
 	{
 		SaveState( nToken );
 		switch( nToken )
 		{
 		case '}':
 			if( nOpenBrakets )
 				goto NEXTTOKEN;
 			eState = SVPAR_ACCEPTED;
 			break;

 		case '{':
 			// eine unbekannte Gruppe ?
 			{
 				if( RTF_IGNOREFLAG != GetNextToken() )
 					nToken = SkipToken( -1 );
 				else if( RTF_UNKNOWNCONTROL != GetNextToken() )
 					nToken = SkipToken( -2 );
 				else
 				{
 					// gleich herausfiltern
 					ReadUnknownData();
 					nToken = GetNextToken();
 					if( '}' != nToken )
 						eState = SVPAR_ERROR;
 					break;		// auf zum naechsten Token!!
 				}
 			}
 			goto NEXTTOKEN;

 		case RTF_UNKNOWNCONTROL:
 			break;		// unbekannte Token ueberspringen
 		case RTF_NEXTTYPE:
 		case RTF_ANSITYPE:
             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
             break;
 		case RTF_MACTYPE:
             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN );
             break;
 		case RTF_PCTYPE:
             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_437 );
             break;
 		case RTF_PCATYPE:
             SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_850 );
             break;
 		case RTF_ANSICPG:
             eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue);
             SetSrcEncoding(eCodeSet);
 			break;
 		default:
 NEXTTOKEN:
 			NextToken( nToken );
 			break;
 		}
 		if( IsParserWorking() )
 			SaveState( 0 );			// bis hierhin abgearbeitet,
 									// weiter mit neuem Token!
 		nToken = GetNextToken();
 	}
 	if( SVPAR_ACCEPTED == eState && 0 < nOpenBrakets )
 		eState = SVPAR_ERROR;
 }

 void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc )
 {
 	if (eEnc == RTL_TEXTENCODING_DONTKNOW)
 		eEnc = GetCodeSet();

 	if (!aParserStates.empty())
 		aParserStates.top().eCodeSet = eEnc;
 	SetSrcEncoding(eEnc);
 }

 #ifdef USED
 void SvRTFParser::SaveState( int nToken )
 {
 	SvParser::SaveState( nToken );
 }

 void SvRTFParser::RestoreState()
 {
 	SvParser::RestoreState();
 }
 #endif

 /* vi:set tabstop=4 shiftwidth=4 expandtab: */
	/**************************************************************
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*
	*************************************************************/



	// MARKER(update_precomp.py): autogen include statement, do not remove
	#include "precompiled_svtools.hxx"

	/* -- Mode: C; tab-width: 4; indent-tabs-mode: nil -- */

	#include <stdio.h> // for EOF
	#include <rtl/tencinfo.h>
	#include <tools/stream.hxx>
	#include <tools/debug.hxx>
	#include <svtools/rtftoken.h>
	#include <svtools/rtfkeywd.hxx>
	#include <svtools/parrtf.hxx>

	const int MAX_STRING_LEN = 1024;
	const int MAX_TOKEN_LEN = 128;

	#define RTF_ISDIGIT( c ) (c >= '0' && c <= '9')
	#define RTF_ISALPHA( c ) ( (c >= 'A' && c <= 'Z') \|\| (c >= 'a' && c <= 'z') )

	SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize )
	: SvParser( rIn, nStackSize ),
	eUNICodeSet( RTL_TEXTENCODING_MS_1252 ), // default ist ANSI-CodeSet
	nUCharOverread( 1 )
	{
	// default ist ANSI-CodeSet
	SetSrcEncoding( RTL_TEXTENCODING_MS_1252 );
	bRTF_InTextRead = false;
	}

	SvRTFParser::~SvRTFParser()
	{
	}




	int SvRTFParser::_GetNextToken()
	{
	int nRet = 0;
	do {
	int bNextCh = true;
	switch( nNextCh )
	{
	case '\\':
	{
	// Steuerzeichen
	switch( nNextCh = GetNextChar() )
	{
	case '{':
	case '}':
	case '\\':
	case '+': // habe ich in einem RTF-File gefunden
	case '~': // nonbreaking space
	case '-': // optional hyphen
	case '_': // nonbreaking hyphen
	case '\'': // HexValue
	nNextCh = '\\';
	rInput.SeekRel( -1 );
	ScanText();
	nRet = RTF_TEXTTOKEN;
	bNextCh = 0 == nNextCh;
	break;

	case '*': // ignoreflag
	nRet = RTF_IGNOREFLAG;
	break;
	case ':': // subentry in an index entry
	nRet = RTF_SUBENTRYINDEX;
	break;
	case '\|': // formula-charakter
	nRet = RTF_FORMULA;
	break;

	case 0x0a:
	case 0x0d:
	nRet = RTF_PAR;
	break;

	default:
	if( RTF_ISALPHA( nNextCh ) )
	{
	aToken = '\\';
	{
	String aStrBuffer;
	sal_Unicode* pStr = aStrBuffer.AllocBuffer(
	MAX_TOKEN_LEN );
	xub_StrLen nStrLen = 0;
	do {
	*(pStr + nStrLen++) = nNextCh;
	if( MAX_TOKEN_LEN == nStrLen )
	{
	aToken += aStrBuffer;
	aToken.GetBufferAccess(); // make unique string!
	nStrLen = 0;
	}
	nNextCh = GetNextChar();
	} while( RTF_ISALPHA( nNextCh ) );
	if( nStrLen )
	{
	aStrBuffer.ReleaseBufferAccess( nStrLen );
	aToken += aStrBuffer;
	}
	}

	// Minus fuer numerischen Parameter
	int bNegValue = false;
	if( '-' == nNextCh )
	{
	bNegValue = true;
	nNextCh = GetNextChar();
	}

	// evt. Numerischer Parameter
	if( RTF_ISDIGIT( nNextCh ) )
	{
	nTokenValue = 0;
	do {
	nTokenValue *= 10;
	nTokenValue += nNextCh - '0';
	nNextCh = GetNextChar();
	} while( RTF_ISDIGIT( nNextCh ) );
	if( bNegValue )
	nTokenValue = -nTokenValue;
	bTokenHasValue=true;
	}
	else if( bNegValue ) // das Minus wieder zurueck
	{
	nNextCh = '-';
	rInput.SeekRel( -1 );
	}
	if( ' ' == nNextCh ) // Blank gehoert zum Token!
	nNextCh = GetNextChar();

	// suche das Token in der Tabelle:
	if( 0 == (nRet = GetRTFToken( aToken )) )
	// Unknown Control
	nRet = RTF_UNKNOWNCONTROL;

	// bug 76812 - unicode token handled as normal text
	bNextCh = false;
	switch( nRet )
	{
	case RTF_UC:
	if( 0 <= nTokenValue )
	{
	nUCharOverread = (sal_uInt8)nTokenValue;
	#if 1
	//cmc: other ifdef breaks #i3584
	aParserStates.top().
	nUCharOverread = nUCharOverread;
	#else
	if( !nUCharOverread )
	nUCharOverread = aParserStates.top().nUCharOverread;
	else
	aParserStates.top().
	nUCharOverread = nUCharOverread;
	#endif
	}
	aToken.Erase(); // #i47831# erase token to prevent the token from beeing treated as text
	// read next token
	nRet = 0;
	break;

	case RTF_UPR:
	if (!_inSkipGroup) {
	// UPR - overread the group with the ansi
	// informations
	while( '{' != _GetNextToken() )
	;
	SkipGroup();
	_GetNextToken(); // overread the last bracket
	nRet = 0;
	}
	break;

	case RTF_U:
	if( !bRTF_InTextRead )
	{
	nRet = RTF_TEXTTOKEN;
	aToken = (sal_Unicode)nTokenValue;

	// overread the next n "RTF" characters. This
	// can be also \{, \}, \'88
	for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
	{
	sal_Unicode cAnsi = nNextCh;
	while( 0xD == cAnsi )
	cAnsi = GetNextChar();
	while( 0xA == cAnsi )
	cAnsi = GetNextChar();

	if( '\\' == cAnsi &&
	'\'' == ( cAnsi = GetNextChar() ))
	// HexValue ueberlesen
	cAnsi = GetHexValue();
	nNextCh = GetNextChar();
	}
	ScanText();
	bNextCh = 0 == nNextCh;
	}
	break;
	}
	}
	else if( SVPAR_PENDING != eState )
	{
	// Bug 34631 - "\ " ueberlesen - Blank als Zeichen
	// eState = SVPAR_ERROR;
	bNextCh = false;
	}
	break;
	}
	}
	break;

	case sal_Unicode(EOF):
	eState = SVPAR_ACCEPTED;
	nRet = nNextCh;
	break;

	case '{':
	{
	if( 0 <= nOpenBrakets )
	{
	RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() );
	aParserStates.push( aState );
	}
	++nOpenBrakets;
	DBG_ASSERT(
	static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
	"ParserStateStack unequal to bracket count" );
	nRet = nNextCh;
	}
	break;

	case '}':
	--nOpenBrakets;
	if( 0 <= nOpenBrakets )
	{
	aParserStates.pop();
	if( !aParserStates.empty() )
	{
	const RtfParserState_Impl& rRPS =
	aParserStates.top();
	nUCharOverread = rRPS.nUCharOverread;
	SetSrcEncoding( rRPS.eCodeSet );
	}
	else
	{
	nUCharOverread = 1;
	SetSrcEncoding( GetCodeSet() );
	}
	}
	DBG_ASSERT(
	static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
	"ParserStateStack unequal to bracket count" );
	nRet = nNextCh;
	break;

	case 0x0d:
	case 0x0a:
	break;

	default:
	// es folgt normaler Text
	ScanText();
	nRet = RTF_TEXTTOKEN;
	bNextCh = 0 == nNextCh;
	break;
	}

	if( bNextCh )
	nNextCh = GetNextChar();

	} while( !nRet && SVPAR_WORKING == eState );
	return nRet;
	}


	sal_Unicode SvRTFParser::GetHexValue()
	{
	// Hex-Wert sammeln
	register int n;
	register sal_Unicode nHexVal = 0;

	for( n = 0; n < 2; ++n )
	{
	nHexVal *= 16;
	nNextCh = GetNextChar();
	if( nNextCh >= '0' && nNextCh <= '9' )
	nHexVal += (nNextCh - 48);
	else if( nNextCh >= 'a' && nNextCh <= 'f' )
	nHexVal += (nNextCh - 87);
	else if( nNextCh >= 'A' && nNextCh <= 'F' )
	nHexVal += (nNextCh - 55);
	}
	return nHexVal;
	}

	void SvRTFParser::ScanText( const sal_Unicode cBreak )
	{
	String aStrBuffer;
	int bWeiter = true;
	while( bWeiter && IsParserWorking() && aStrBuffer.Len() < MAX_STRING_LEN)
	{
	int bNextCh = true;
	switch( nNextCh )
	{
	case '\\':
	{
	switch (nNextCh = GetNextChar())
	{
	case '\'':
	{

	#if 0
	// #i35653 patch from cmc
	ByteString aByteString(static_cast<char>(GetHexValue()));
	if (aByteString.Len())
	aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
	#else
	ByteString aByteString;
	while (1)
	{
	aByteString.Append((char)GetHexValue());

	bool bBreak = false;
	sal_Char nSlash = '\\';
	while (!bBreak)
	{
	wchar_t __next=GetNextChar();
	if (__next>0xFF) // fix for #i43933# and #i35653#
	{
	if (aByteString.Len())
	aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
	aStrBuffer.Append((sal_Unicode)__next);

	aByteString.Erase();
	continue;
	}
	nSlash = (sal_Char)__next;
	while (nSlash == 0xD \|\| nSlash == 0xA)
	nSlash = (sal_Char)GetNextChar();

	switch (nSlash)
	{
	case '{':
	case '}':
	case '\\':
	bBreak = true;
	break;
	default:
	aByteString.Append(nSlash);
	break;
	}
	}

	nNextCh = GetNextChar();

	if (nSlash != '\\' \|\| nNextCh != '\'')
	{
	rInput.SeekRel(-1);
	nNextCh = nSlash;
	break;
	}
	}

	bNextCh = false;

	if (aByteString.Len())
	aStrBuffer.Append(String(aByteString, GetSrcEncoding()));
	#endif
	}
	break;
	case '\\':
	case '}':
	case '{':
	case '+': // habe ich in einem RTF-File gefunden
	aStrBuffer.Append(nNextCh);
	break;
	case '~': // nonbreaking space
	aStrBuffer.Append(static_cast< sal_Unicode >(0xA0));
	break;
	case '-': // optional hyphen
	aStrBuffer.Append(static_cast< sal_Unicode >(0xAD));
	break;
	case '_': // nonbreaking hyphen
	aStrBuffer.Append(static_cast< sal_Unicode >(0x2011));
	break;

	case 'u':
	// UNI-Code Zeichen lesen
	{
	nNextCh = GetNextChar();
	rInput.SeekRel( -2 );

	if( '-' == nNextCh \|\| RTF_ISDIGIT( nNextCh ) )
	{
	bRTF_InTextRead = true;

	String sSave( aToken );
	nNextCh = '\\';
	#ifdef DBG_UTIL
	int nToken =
	#endif
	_GetNextToken();
	DBG_ASSERT( RTF_U == nToken, "doch kein UNI-Code Zeichen" );
	// dont convert symbol chars
	aStrBuffer.Append(
	static_cast< sal_Unicode >(nTokenValue));

	// overread the next n "RTF" characters. This
	// can be also \{, \}, \'88
	for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
	{
	sal_Unicode cAnsi = nNextCh;
	while( 0xD == cAnsi )
	cAnsi = GetNextChar();
	while( 0xA == cAnsi )
	cAnsi = GetNextChar();

	if( '\\' == cAnsi &&
	'\'' == ( cAnsi = GetNextChar() ))
	// HexValue ueberlesen
	cAnsi = GetHexValue();
	nNextCh = GetNextChar();
	}
	bNextCh = false;
	aToken = sSave;
	bRTF_InTextRead = false;
	}
	else
	{
	nNextCh = '\\';
	bWeiter = false; // Abbrechen, String zusammen
	}
	}
	break;

	default:
	rInput.SeekRel( -1 );
	nNextCh = '\\';
	bWeiter = false; // Abbrechen, String zusammen
	break;
	}
	}
	break;

	case sal_Unicode(EOF):
	eState = SVPAR_ERROR;
	// weiter
	case '{':
	case '}':
	bWeiter = false;
	break;

	case 0x0a:
	case 0x0d:
	break;

	default:
	if( nNextCh == cBreak \|\| aStrBuffer.Len() >= MAX_STRING_LEN)
	bWeiter = false;
	else
	{
	do {
	// alle anderen Zeichen kommen in den Text
	aStrBuffer.Append(nNextCh);

	if (sal_Unicode(EOF) == (nNextCh = GetNextChar()))
	{
	if (aStrBuffer.Len())
	aToken += aStrBuffer;
	return;
	}
	} while
	(
	(RTF_ISALPHA(nNextCh) \|\| RTF_ISDIGIT(nNextCh)) &&
	(aStrBuffer.Len() < MAX_STRING_LEN)
	);
	bNextCh = false;
	}
	}

	if( bWeiter && bNextCh )
	nNextCh = GetNextChar();
	}

	if (aStrBuffer.Len())
	aToken += aStrBuffer;
	}


	short SvRTFParser::_inSkipGroup=0;

	void SvRTFParser::SkipGroup()
	{
	short nBrackets=1;
	if (_inSkipGroup>0)
	return;
	_inSkipGroup++;
	#if 1 //#i16185# fecking \bin keyword
	do
	{
	switch (nNextCh)
	{
	case '{':
	++nBrackets;
	break;
	case '}':
	if (!--nBrackets) {
	_inSkipGroup--;
	return;
	}
	break;
	}
	int nToken = _GetNextToken();
	if (nToken == RTF_BIN)
	{
	rInput.SeekRel(-1);
	rInput.SeekRel(nTokenValue);
	nNextCh = GetNextChar();
	}
	while (nNextCh==0xa \|\| nNextCh==0xd)
	{
	nNextCh = GetNextChar();
	}
	} while (sal_Unicode(EOF) != nNextCh && IsParserWorking());
	#else
	sal_Unicode cPrev = 0;
	do {
	switch( nNextCh )
	{
	case '{':
	if( '\\' != cPrev )
	++nBrackets;
	break;

	case '}':
	if( '\\' != cPrev && !--nBrackets )
	return;
	break;

	case '\\':
	if( '\\' == cPrev )
	nNextCh = 0;
	break;
	}
	cPrev = nNextCh;
	nNextCh = GetNextChar();
	} while( sal_Unicode(EOF) != nNextCh && IsParserWorking() );
	#endif

	if( SVPAR_PENDING != eState && '}' != nNextCh )
	eState = SVPAR_ERROR;
	_inSkipGroup--;
	}

	void SvRTFParser::ReadUnknownData() { SkipGroup(); }
	void SvRTFParser::ReadBitmapData() { SkipGroup(); }
	void SvRTFParser::ReadOLEData() { SkipGroup(); }


	SvParserState SvRTFParser::CallParser()
	{
	sal_Char cFirstCh;
	nNextChPos = rInput.Tell();
	rInput >> cFirstCh; nNextCh = cFirstCh;
	eState = SVPAR_WORKING;
	nOpenBrakets = 0;
	SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
	eUNICodeSet = RTL_TEXTENCODING_MS_1252; // default ist ANSI-CodeSet

	// die 1. beiden Token muessen '{' und \\rtf sein !!
	if( '{' == GetNextToken() && RTF_RTF == GetNextToken() )
	{
	AddRef();
	Continue( 0 );
	if( SVPAR_PENDING != eState )
	ReleaseRef(); // dann brauchen wir den Parser nicht mehr!
	}
	else
	eState = SVPAR_ERROR;

	return eState;
	}

	void SvRTFParser::Continue( int nToken )
	{
	// DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(),
	// "Zeichensatz wurde geaendert." );

	if( !nToken )
	nToken = GetNextToken();

	while( IsParserWorking() )
	{
	SaveState( nToken );
	switch( nToken )
	{
	case '}':
	if( nOpenBrakets )
	goto NEXTTOKEN;
	eState = SVPAR_ACCEPTED;
	break;

	case '{':
	// eine unbekannte Gruppe ?
	{
	if( RTF_IGNOREFLAG != GetNextToken() )
	nToken = SkipToken( -1 );
	else if( RTF_UNKNOWNCONTROL != GetNextToken() )
	nToken = SkipToken( -2 );
	else
	{
	// gleich herausfiltern
	ReadUnknownData();
	nToken = GetNextToken();
	if( '}' != nToken )
	eState = SVPAR_ERROR;
	break; // auf zum naechsten Token!!
	}
	}
	goto NEXTTOKEN;

	case RTF_UNKNOWNCONTROL:
	break; // unbekannte Token ueberspringen
	case RTF_NEXTTYPE:
	case RTF_ANSITYPE:
	SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
	break;
	case RTF_MACTYPE:
	SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN );
	break;
	case RTF_PCTYPE:
	SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_437 );
	break;
	case RTF_PCATYPE:
	SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_850 );
	break;
	case RTF_ANSICPG:
	eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue);
	SetSrcEncoding(eCodeSet);
	break;
	default:
	NEXTTOKEN:
	NextToken( nToken );
	break;
	}
	if( IsParserWorking() )
	SaveState( 0 ); // bis hierhin abgearbeitet,
	// weiter mit neuem Token!
	nToken = GetNextToken();
	}
	if( SVPAR_ACCEPTED == eState && 0 < nOpenBrakets )
	eState = SVPAR_ERROR;
	}

	void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc )
	{
	if (eEnc == RTL_TEXTENCODING_DONTKNOW)
	eEnc = GetCodeSet();

	if (!aParserStates.empty())
	aParserStates.top().eCodeSet = eEnc;
	SetSrcEncoding(eEnc);
	}

	#ifdef USED
	void SvRTFParser::SaveState( int nToken )
	{
	SvParser::SaveState( nToken );
	}

	void SvRTFParser::RestoreState()
	{
	SvParser::RestoreState();
	}
	#endif

	/* vi:set tabstop=4 shiftwidth=4 expandtab: */