AOO410/main/svtools/source/svhtml/parhtml.cxx - openoffice - Git at Google

 /**************************************************************
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  *
  *************************************************************/


 // MARKER(update_precomp.py): autogen include statement, do not remove
 #include "precompiled_svtools.hxx"

 #include <ctype.h>
 #include <stdio.h>
 #include <tools/stream.hxx>
 #include <tools/debug.hxx>
 #include <tools/color.hxx>
 #include <rtl/ustrbuf.hxx>
 #include <rtl/strbuf.hxx>
 #ifndef _SVSTDARR_HXX
 #define _SVSTDARR_ULONGS
 #include <svl/svstdarr.hxx>
 #endif

 #include <tools/tenccvt.hxx>
 #include <tools/datetime.hxx>
 #include <svl/inettype.hxx>
 #include <comphelper/string.hxx>
 #include <com/sun/star/beans/PropertyAttribute.hpp>
 #include <com/sun/star/document/XDocumentProperties.hpp>

 #include <svtools/parhtml.hxx>
 #include <svtools/htmltokn.h>
 #include <svtools/htmlkywd.hxx>


 using namespace ::com::sun::star;


 const sal_Int32 MAX_LEN( 1024L );
 //static sal_Unicode sTmpBuffer[ MAX_LEN+1 ];
 const sal_Int32 MAX_MACRO_LEN( 1024 );

 const sal_Int32 MAX_ENTITY_LEN( 8L );

 /*  */

 // Tabellen zum Umwandeln von Options-Werten in Strings

 // <INPUT TYPE=xxx>
 static HTMLOptionEnum __READONLY_DATA aInputTypeOptEnums[] =
 {
 	{ OOO_STRING_SVTOOLS_HTML_IT_text,		HTML_IT_TEXT		},
 	{ OOO_STRING_SVTOOLS_HTML_IT_password,	HTML_IT_PASSWORD	},
 	{ OOO_STRING_SVTOOLS_HTML_IT_checkbox,	HTML_IT_CHECKBOX	},
 	{ OOO_STRING_SVTOOLS_HTML_IT_radio,   	HTML_IT_RADIO		},
 	{ OOO_STRING_SVTOOLS_HTML_IT_range,   	HTML_IT_RANGE		},
 	{ OOO_STRING_SVTOOLS_HTML_IT_scribble,	HTML_IT_SCRIBBLE	},
 	{ OOO_STRING_SVTOOLS_HTML_IT_file,    	HTML_IT_FILE		},
 	{ OOO_STRING_SVTOOLS_HTML_IT_hidden,  	HTML_IT_HIDDEN		},
 	{ OOO_STRING_SVTOOLS_HTML_IT_submit,  	HTML_IT_SUBMIT		},
 	{ OOO_STRING_SVTOOLS_HTML_IT_image,   	HTML_IT_IMAGE		},
 	{ OOO_STRING_SVTOOLS_HTML_IT_reset,   	HTML_IT_RESET		},
 	{ OOO_STRING_SVTOOLS_HTML_IT_button,   	HTML_IT_BUTTON		},
 	{ 0,					0					}
 };

 // <TABLE FRAME=xxx>
 static HTMLOptionEnum __READONLY_DATA aTableFrameOptEnums[] =
 {
 	{ OOO_STRING_SVTOOLS_HTML_TF_void,	HTML_TF_VOID	},
 	{ OOO_STRING_SVTOOLS_HTML_TF_above,	HTML_TF_ABOVE	},
 	{ OOO_STRING_SVTOOLS_HTML_TF_below,	HTML_TF_BELOW	},
 	{ OOO_STRING_SVTOOLS_HTML_TF_hsides,	HTML_TF_HSIDES	},
 	{ OOO_STRING_SVTOOLS_HTML_TF_lhs,		HTML_TF_LHS		},
 	{ OOO_STRING_SVTOOLS_HTML_TF_rhs,		HTML_TF_RHS		},
 	{ OOO_STRING_SVTOOLS_HTML_TF_vsides,	HTML_TF_VSIDES	},
 	{ OOO_STRING_SVTOOLS_HTML_TF_box,		HTML_TF_BOX		},
 	{ OOO_STRING_SVTOOLS_HTML_TF_border,	HTML_TF_BOX		},
 	{ 0,				0				}
 };

 // <TABLE RULES=xxx>
 static HTMLOptionEnum __READONLY_DATA aTableRulesOptEnums[] =
 {
 	{ OOO_STRING_SVTOOLS_HTML_TR_none,	HTML_TR_NONE	},
 	{ OOO_STRING_SVTOOLS_HTML_TR_groups,	HTML_TR_GROUPS	},
 	{ OOO_STRING_SVTOOLS_HTML_TR_rows,	HTML_TR_ROWS	},
 	{ OOO_STRING_SVTOOLS_HTML_TR_cols,	HTML_TR_COLS	},
 	{ OOO_STRING_SVTOOLS_HTML_TR_all,		HTML_TR_ALL		},
 	{ 0,				0				}
 };


 SV_IMPL_PTRARR(HTMLOptions,HTMLOptionPtr)

 /*  */

 sal_uInt16 HTMLOption::GetEnum( const HTMLOptionEnum *pOptEnums, sal_uInt16 nDflt ) const
 {
 	sal_uInt16 nValue = nDflt;

 	while( pOptEnums->pName )
 		if( aValue.EqualsIgnoreCaseAscii( pOptEnums->pName ) )
 			break;
 		else
 			pOptEnums++;

 	if( pOptEnums->pName )
 		nValue = pOptEnums->nValue;

 	return nValue;
 }

 sal_Bool HTMLOption::GetEnum( sal_uInt16 &rEnum, const HTMLOptionEnum *pOptEnums ) const
 {
 	while( pOptEnums->pName )
 	{
 		if( aValue.EqualsIgnoreCaseAscii( pOptEnums->pName ) )
 			break;
 		else
 			pOptEnums++;
 	}

 	const sal_Char *pName = pOptEnums->pName;
 	if( pName )
 		rEnum = pOptEnums->nValue;

 	return (pName != 0);
 }

 HTMLOption::HTMLOption( sal_uInt16 nTok, const String& rToken,
 						const String& rValue )
 	: aValue(rValue)
 	, aToken(rToken)
 	, nToken( nTok )
 {
 	DBG_ASSERT( nToken>=HTML_OPTION_START && nToken<HTML_OPTION_END,
 		"HTMLOption: unbekanntes Token" );
 }

 sal_uInt32 HTMLOption::GetNumber() const
 {
 	DBG_ASSERT( (nToken>=HTML_OPTION_NUMBER_START &&
 				 nToken<HTML_OPTION_NUMBER_END) ||
 				(nToken>=HTML_OPTION_CONTEXT_START &&
 				 nToken<HTML_OPTION_CONTEXT_END) ||
 				nToken==HTML_O_VALUE,
 		"GetNumber: Option ist nicht numerisch" );
 	String aTmp( aValue );
 	aTmp.EraseLeadingChars();
 	sal_Int32 nTmp = aTmp.ToInt32();
 	return nTmp >= 0 ? (sal_uInt32)nTmp : 0;
 }

 sal_Int32 HTMLOption::GetSNumber() const
 {
 	DBG_ASSERT( (nToken>=HTML_OPTION_NUMBER_START && nToken<HTML_OPTION_NUMBER_END) ||
 				(nToken>=HTML_OPTION_CONTEXT_START && nToken<HTML_OPTION_CONTEXT_END),
 		"GetSNumber: Option ist nicht numerisch" );
 	String aTmp( aValue );
 	aTmp.EraseLeadingChars();
 	return aTmp.ToInt32();
 }

 void HTMLOption::GetNumbers( SvULongs &rLongs, sal_Bool bSpaceDelim ) const
 {
 	if( rLongs.Count() )
 		rLongs.Remove( 0, rLongs.Count() );

 	if( bSpaceDelim )
 	{
 		// das ist ein sehr stark vereinfachter Scanner. Er sucht einfach
 		// alle Tiffern aus dem String
 		sal_Bool bInNum = sal_False;
 		sal_uLong nNum = 0;
 		for( xub_StrLen i=0; i<aValue.Len(); i++ )
 		{
 			register sal_Unicode c = aValue.GetChar( i );
 			if( c>='0' && c<='9' )
 			{
 				nNum *= 10;
 				nNum += (c - '0');
 				bInNum = sal_True;
 			}
 			else if( bInNum )
 			{
 				rLongs.Insert( nNum, rLongs.Count() );
 				bInNum = sal_False;
 				nNum = 0;
 			}
 		}
 		if( bInNum )
 		{
 			rLongs.Insert( nNum, rLongs.Count() );
 		}
 	}
 	else
 	{
 		// hier wird auf die korrekte Trennung der Zahlen durch ',' geachtet
 		// und auch mal eine 0 eingefuegt
 		xub_StrLen nPos = 0;
 		while( nPos < aValue.Len() )
 		{
 			register sal_Unicode c;
 			while( nPos < aValue.Len() &&
 				   ((c=aValue.GetChar(nPos)) == ' ' || c == '\t' ||
 				   c == '\n' || c== '\r' ) )
 				nPos++;

 			if( nPos==aValue.Len() )
 				rLongs.Insert( sal_uLong(0), rLongs.Count() );
 			else
 			{
 				xub_StrLen nEnd = aValue.Search( (sal_Unicode)',', nPos );
 				if( STRING_NOTFOUND==nEnd )
 				{
 					sal_Int32 nTmp = aValue.Copy(nPos).ToInt32();
 					rLongs.Insert( nTmp >= 0 ? (sal_uInt32)nTmp : 0,
 								   rLongs.Count() );
 					nPos = aValue.Len();
 				}
 				else
 				{
 					sal_Int32 nTmp =
 						aValue.Copy(nPos,nEnd-nPos).ToInt32();
 					rLongs.Insert( nTmp >= 0 ? (sal_uInt32)nTmp : 0,
 								   rLongs.Count() );
 					nPos = nEnd+1;
 				}
 			}
 		}
 	}
 }

 void HTMLOption::GetColor( Color& rColor ) const
 {
 	DBG_ASSERT( (nToken>=HTML_OPTION_COLOR_START && nToken<HTML_OPTION_COLOR_END) || nToken==HTML_O_SIZE,
 		"GetColor: Option spezifiziert keine Farbe" );

 	String aTmp( aValue );
 	aTmp.ToUpperAscii();
 	sal_uLong nColor = ULONG_MAX;
 	if( '#'!=aTmp.GetChar( 0 ) )
 		nColor = GetHTMLColor( aTmp );

 	if( ULONG_MAX == nColor )
 	{
 		nColor = 0;
 		xub_StrLen nPos = 0;
 		for( sal_uInt32 i=0; i<6; i++ )
 		{
 			// MIB 26.06.97: Wie auch immer Netscape Farbwerte ermittelt,
 			// maximal drei Zeichen, die kleiner als '0' sind werden
 			// ignoriert. Bug #40901# stimmt damit. Mal schauen, was sich
 			// irgendwelche HTML-Autoren noch so einfallen lassen...
 			register sal_Unicode c = nPos<aTmp.Len() ? aTmp.GetChar( nPos++ )
 													 : '0';
 			if( c < '0' )
 			{
 				c = nPos<aTmp.Len() ? aTmp.GetChar(nPos++) : '0';
 				if( c < '0' )
 					c = nPos<aTmp.Len() ? aTmp.GetChar(nPos++) : '0';
 			}
 			nColor *= 16;
 			if( c >= '0' && c <= '9' )
 				nColor += (c - 48);
 			else if( c >= 'A' && c <= 'F' )
 				nColor += (c - 55);
 		}
 	}

 	rColor.SetRed(   (sal_uInt8)((nColor & 0x00ff0000) >> 16) );
 	rColor.SetGreen( (sal_uInt8)((nColor & 0x0000ff00) >> 8));
 	rColor.SetBlue(  (sal_uInt8)(nColor & 0x000000ff) );
 }

 HTMLInputType HTMLOption::GetInputType() const
 {
 	DBG_ASSERT( nToken==HTML_O_TYPE, "GetInputType: Option nicht TYPE" );
 	return (HTMLInputType)GetEnum( aInputTypeOptEnums, HTML_IT_TEXT );
 }

 HTMLTableFrame HTMLOption::GetTableFrame() const
 {
 	DBG_ASSERT( nToken==HTML_O_FRAME, "GetTableFrame: Option nicht FRAME" );
 	return (HTMLTableFrame)GetEnum( aTableFrameOptEnums, HTML_TF_VOID );
 }

 HTMLTableRules HTMLOption::GetTableRules() const
 {
 	DBG_ASSERT( nToken==HTML_O_RULES, "GetTableRules: Option nicht RULES" );
 	return (HTMLTableRules)GetEnum( aTableRulesOptEnums, HTML_TR_NONE );
 }

 /*  */

 HTMLParser::HTMLParser( SvStream& rIn, int bReadNewDoc )
 	: SvParser( rIn )
 {
 	bNewDoc = bReadNewDoc;
 	bReadListing = bReadXMP = bReadPRE = bReadTextArea =
 		bReadScript = bReadStyle =
 		bEndTokenFound = bIsInBody = bReadNextChar =
 		bReadComment = sal_False;
 	bIsInHeader = sal_True;
 	pOptions = new HTMLOptions;

 	//#i76649, default to UTF-8 for HTML unless we know differently
 	SetSrcEncoding(RTL_TEXTENCODING_UTF8);
 }

 HTMLParser::~HTMLParser()
 {
 	if( pOptions && pOptions->Count() )
 		pOptions->DeleteAndDestroy( 0, pOptions->Count() );
 	delete pOptions;
 }

 SvParserState __EXPORT HTMLParser::CallParser()
 {
 	eState = SVPAR_WORKING;
 	nNextCh = GetNextChar();
 	SaveState( 0 );

 	nPre_LinePos = 0;
 	bPre_IgnoreNewPara = sal_False;

 	AddRef();
 	Continue( 0 );
 	if( SVPAR_PENDING != eState )
 		ReleaseRef();		// dann brauchen wir den Parser nicht mehr!

 	return eState;
 }

 void HTMLParser::Continue( int nToken )
 {
 	if( !nToken )
 		nToken = GetNextToken();

 	while( IsParserWorking() )
 	{
 		SaveState( nToken );
 		nToken = FilterToken( nToken );

 		if( nToken )
 			NextToken( nToken );

 		if( IsParserWorking() )
 			SaveState( 0 );			// bis hierhin abgearbeitet,
 									// weiter mit neuem Token!
 		nToken = GetNextToken();
 	}
 }

 int HTMLParser::FilterToken( int nToken )
 {
 	switch( nToken )
 	{
 	case sal_Unicode(EOF):
 		nToken = 0;
 		break;			// nicht verschicken

 	case HTML_HEAD_OFF:
 		bIsInBody = sal_True;
 	case HTML_HEAD_ON:
 		bIsInHeader = HTML_HEAD_ON == nToken;
 		break;

 	case HTML_BODY_ON:
 	case HTML_FRAMESET_ON:
 		bIsInHeader = sal_False;
 		bIsInBody = HTML_BODY_ON == nToken;
 		break;

 	case HTML_BODY_OFF:
 		bIsInBody = bReadPRE = bReadListing = bReadXMP = sal_False;
 		break;

 	case HTML_HTML_OFF:
 		nToken = 0;
 		bReadPRE = bReadListing = bReadXMP = sal_False;
 		break;		// HTML_ON wurde auch nicht verschickt !

 	case HTML_PREFORMTXT_ON:
 		StartPRE();
 		break;

 	case HTML_PREFORMTXT_OFF:
 		FinishPRE();
 		break;

 	case HTML_LISTING_ON:
 		StartListing();
 		break;

 	case HTML_LISTING_OFF:
 		FinishListing();
 		break;

 	case HTML_XMP_ON:
 		StartXMP();
 		break;

 	case HTML_XMP_OFF:
 		FinishXMP();
 		break;

 	default:
 		if( bReadPRE )
 			nToken = FilterPRE( nToken );
 		else if( bReadListing )
 			nToken = FilterListing( nToken );
 		else if( bReadXMP )
 			nToken = FilterXMP( nToken );

 		break;
 	}

 	return nToken;
 }

 #define HTML_ISDIGIT( c ) (c >= '0' && c <= '9')
 #define HTML_ISALPHA( c ) ( (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') )
 #define HTML_ISALNUM( c ) ( HTML_ISALPHA(c) || HTML_ISDIGIT(c) )
 #define HTML_ISSPACE( c ) ( ' ' == c || (c >= 0x09 && c <= 0x0d) )
 #define HTML_ISPRINTABLE( c ) ( c >= 32 && c != 127)
 // --> OD 2006-07-26 #138464#
 #define HTML_ISHEXDIGIT( c ) ( HTML_ISDIGIT(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') )
 // <--

 int HTMLParser::ScanText( const sal_Unicode cBreak )
 {
 	::rtl::OUStringBuffer sTmpBuffer( MAX_LEN );
 	int bWeiter = sal_True;
 	int bEqSignFound = sal_False;
 	sal_Unicode cQuote = 0U;

 	while( bWeiter && IsParserWorking() )
 	{
 		int bNextCh = sal_True;
 		switch( nNextCh )
 		{
 		case '&':
 			bEqSignFound = sal_False;
 			if( bReadXMP )
 				sTmpBuffer.append( (sal_Unicode)'&' );
 			else
 			{
 				sal_uLong nStreamPos = rInput.Tell();
 				sal_uLong nLinePos = GetLinePos();

 				sal_Unicode cChar = 0U;
 				if( '#' == (nNextCh = GetNextChar()) )
 				{
 					nNextCh = GetNextChar();
                     // --> OD 2006-07-26 #138464#
                     // consider hexadecimal digits
                     const sal_Bool bIsHex( 'x' == nNextCh );
                     const sal_Bool bIsDecOrHex( bIsHex || HTML_ISDIGIT(nNextCh) );
                     if ( bIsDecOrHex )
 					{
                         if ( bIsHex )
                         {
                             nNextCh = GetNextChar();
                             while ( HTML_ISHEXDIGIT(nNextCh) )
                             {
                                 cChar = cChar * 16U +
                                         ( nNextCh <= '9'
                                           ? sal_Unicode( nNextCh - '0' )
                                           : ( nNextCh <= 'F'
                                               ? sal_Unicode( nNextCh - 'A' + 10 )
                                               : sal_Unicode( nNextCh - 'a' + 10 ) ) );
                                 nNextCh = GetNextChar();
                             }
                         }
                         else
                         {
                             do
                             {
                                 cChar = cChar * 10U + sal_Unicode( nNextCh - '0');
                                 nNextCh = GetNextChar();
                             }
                             while( HTML_ISDIGIT(nNextCh) );
                         }

 						if( RTL_TEXTENCODING_DONTKNOW != eSrcEnc &&
 							RTL_TEXTENCODING_UCS2 != eSrcEnc &&
 							RTL_TEXTENCODING_UTF8 != eSrcEnc &&
 						 	cChar < 256 )
 						{
 						 	sal_Unicode cOrig = cChar;
 							cChar = ByteString::ConvertToUnicode(
 											(sal_Char)cChar, eSrcEnc );
 							if( 0U == cChar )
 							{
 								// #73398#: If the character could not be
 								// converted, because a conversion is not
 								// available, do no conversion at all.
 								cChar = cOrig;
 							}
 						}
 					}
                     // <--
 					else
 						nNextCh = 0U;
 				}
 				else if( HTML_ISALPHA( nNextCh ) )
 				{
 					::rtl::OUStringBuffer sEntityBuffer( MAX_ENTITY_LEN );
 					xub_StrLen nPos = 0L;
 					do
 					{
 						sEntityBuffer.append( nNextCh );
 						nPos++;
 						nNextCh = GetNextChar();
 					}
 					while( nPos < MAX_ENTITY_LEN && HTML_ISALNUM( nNextCh ) &&
 						   !rInput.IsEof() );

 					if( IsParserWorking() && !rInput.IsEof() )
 					{
 						String sEntity( sEntityBuffer.getStr(), nPos );
 						cChar = GetHTMLCharName( sEntity );

 						// nicht gefunden ( == 0 ), dann Klartext
 						// oder ein Zeichen das als Attribut eingefuegt
 						// wird
 						if( 0U == cChar && ';' != nNextCh )
 						{
 							DBG_ASSERT( rInput.Tell() - nStreamPos ==
 										(sal_uLong)(nPos+1L)*GetCharSize(),
 										"UTF-8 geht hier schief" );
 							for( xub_StrLen i=nPos-1L; i>1L; i-- )
 							{
 								nNextCh = sEntityBuffer[i];
 								sEntityBuffer.setLength( i );
 								sEntity.Assign( sEntityBuffer.getStr(), i );
  								cChar = GetHTMLCharName( sEntity );
 								if( cChar )
 								{
 									rInput.SeekRel( -(long)
 											((nPos-i)*GetCharSize()) );
 									nlLinePos -= sal_uInt32(nPos-i);
 									nPos = i;
 									ClearTxtConvContext();
 									break;
 								}
 							}
 						}

 						if( !cChar )		// unbekanntes Zeichen?
 						{
 							// dann im Stream zurueck, das '&' als Zeichen
 							// einfuegen und mit dem nachfolgenden Zeichen
 							// wieder aufsetzen
 							sTmpBuffer.append( (sal_Unicode)'&' );

 //							rInput.SeekRel( -(long)(++nPos*GetCharSize()) );
 //							nlLinePos -= nPos;
 							DBG_ASSERT( rInput.Tell()-nStreamPos ==
 										(sal_uLong)(nPos+1)*GetCharSize(),
 										"Falsche Stream-Position" );
 							DBG_ASSERT( nlLinePos-nLinePos ==
 										(sal_uLong)(nPos+1),
 										"Falsche Zeilen-Position" );
 							rInput.Seek( nStreamPos );
 							nlLinePos = nLinePos;
 							ClearTxtConvContext();
 							break;
 						}

 						// 1 == Non Breaking Space
 						// 2 == SoftHyphen

 						if( cChar < 3U )
 						{
 							if( '>' == cBreak )
 							{
 								// Wenn der Inhalt eines Tags gelesen wird,
 								// muessen wir ein Space bzw. - daraus machen
 								switch( cChar )
 								{
 								case 1U: cChar = ' '; break;
 								case 2U: cChar = '-'; break;
 								default:
 									DBG_ASSERT( cChar==1U,
 							"\0x00 sollte doch schon laengt abgefangen sein!" );
 									break;
 								}
 							}
 							else
 							{
 								// Wenn kein Tag gescannt wird, enstprechendes
 								// Token zurueckgeben
 								aToken +=
 									String( sTmpBuffer.makeStringAndClear() );
 								if( cChar )
 								{
 									if( aToken.Len() )
 									{
 										// mit dem Zeichen wieder aufsetzen
 										nNextCh = '&';
 //										rInput.SeekRel( -(long)(++nPos*GetCharSize()) );
 //										nlLinePos -= nPos;
 										DBG_ASSERT( rInput.Tell()-nStreamPos ==
 													(sal_uLong)(nPos+1)*GetCharSize(),
 													"Falsche Stream-Position" );
 										DBG_ASSERT( nlLinePos-nLinePos ==
 													(sal_uLong)(nPos+1),
 													"Falsche Zeilen-Position" );
 										rInput.Seek( nStreamPos );
 										nlLinePos = nLinePos;
 										ClearTxtConvContext();
 										return HTML_TEXTTOKEN;
 									}

 									// Hack: _GetNextChar soll nicht das
 									// naechste Zeichen lesen
 									if( ';' != nNextCh )
 										aToken += ' ';
 									if( 1U == cChar )
 										return HTML_NONBREAKSPACE;
 									if( 2U == cChar )
 										return HTML_SOFTHYPH;
 								}
 								aToken += (sal_Unicode)'&';
 								aToken +=
 									String(sEntityBuffer.makeStringAndClear());
 								break;
 							}
 						}
 					}
 					else
 						nNextCh = 0U;
 				}
 				// MIB 03/02/2000: &{...};-JavaScript-Macros are not
 				// supported any longer.
 				else if( IsParserWorking() )
 				{
 					sTmpBuffer.append( (sal_Unicode)'&' );
 					bNextCh = sal_False;
 					break;
 				}

 				bNextCh = (';' == nNextCh);
 				if( cBreak=='>' && (cChar=='\\' || cChar=='\'' ||
 									cChar=='\"' || cChar==' ') )
 				{
 					// ' und " mussen innerhalb von Tags mit einem
 					// gekennzeichnet werden, um sie von ' und " als Klammern
 					// um Optionen zu unterscheiden. Logischerweise muss
 					// deshalb auch ein \ gekeenzeichnet werden. Ausserdem
 					// schuetzen wir ein Space, weil es kein Trennzeichen
 					// zwischen Optionen ist.
 					sTmpBuffer.append( (sal_Unicode)'\\' );
 					if( MAX_LEN == sTmpBuffer.getLength() )
 						aToken += String(sTmpBuffer.makeStringAndClear());
 				}
 				if( IsParserWorking() )
 				{
 					if( cChar )
 						sTmpBuffer.append( cChar );
 				}
 				else if( SVPAR_PENDING==eState && '>'!=cBreak )
 				{
 					// Mit dem '&' Zeichen wieder aufsetzen, der Rest
 					// wird als Texttoken zurueckgegeben.
 					if( aToken.Len() || sTmpBuffer.getLength() )
 					{
 						// Der bisherige Text wird von _GetNextChar()
 						// zurueckgegeben und beim naechsten Aufruf wird
 						// ein neues Zeichen gelesen. Also muessen wir uns
 						// noch vor das & stellen.
 						nNextCh = 0U;
 						rInput.Seek( nStreamPos-(sal_uInt32)GetCharSize() );
 						nlLinePos = nLinePos-1;
 						ClearTxtConvContext();
 						bReadNextChar = sal_True;
 					}
 					bNextCh = sal_False;
 				}
 			}
 			break;
 		case '=':
 			if( '>'==cBreak && !cQuote )
 				bEqSignFound = sal_True;
 			sTmpBuffer.append( nNextCh );
 			break;

 		case '\\':
 			if( '>'==cBreak )
 			{
 				// Innerhalb von Tags kennzeichnen
 				sTmpBuffer.append( (sal_Unicode)'\\' );
 				if( MAX_LEN == sTmpBuffer.getLength() )
 					aToken += String(sTmpBuffer.makeStringAndClear());
 			}
 			sTmpBuffer.append( (sal_Unicode)'\\' );
 			break;

 		case '\"':
 		case '\'':
 			if( '>'==cBreak )
 			{
 				if( bEqSignFound )
 					cQuote = nNextCh;
 				else if( cQuote && (cQuote==nNextCh ) )
 					cQuote = 0U;
 			}
 			sTmpBuffer.append( nNextCh );
 			bEqSignFound = sal_False;
 			break;

 		case sal_Unicode(EOF):
 			if( rInput.IsEof() )
 			{
 // MIB 20.11.98: Das macht hier keinen Sinn, oder doch: Zumindest wird
 // abc&auml;<EOF> nicht angezeigt, also lassen wir das in Zukunft.
 //				if( '>' != cBreak )
 //					eState = SVPAR_ACCEPTED;
 				bWeiter = sal_False;
 			}
 			else
 			{
 				sTmpBuffer.append( nNextCh );
 			}
 			break;

 		case '<':
 			bEqSignFound = sal_False;
 			if( '>'==cBreak )
 				sTmpBuffer.append( nNextCh );
 			else
 				bWeiter = sal_False;		// Abbrechen, String zusammen
 			break;

 		case '\f':
 			if( '>' == cBreak )
 			{
 				// Beim Scannen von Optionen wie ein Space behandeln
 				sTmpBuffer.append( (sal_Unicode)' ' );
 			}
 			else
 			{
 				// sonst wird es ein eigenes Token
 				bWeiter = sal_False;
 			}
 			break;

 		case '\r':
 		case '\n':
 			if( '>'==cBreak )
 			{
 				// #26979# cr/lf in Tag wird in _GetNextToken() behandeln
 				sTmpBuffer.append( nNextCh );
 				break;
 			}
 			else if( bReadListing || bReadXMP || bReadPRE || bReadTextArea )
 			{
 				bWeiter = sal_False;
 				break;
 			}
 			// Bug 18984: CR-LF -> Blank
 			// 		Folge von CR/LF/BLANK/TAB nur in ein Blank wandeln
 			// kein break!!
 		case '\t':
 			if( '\t'==nNextCh && bReadPRE && '>'!=cBreak )
 			{
 				// In <PRE>: Tabs nach oben durchreichen
 				bWeiter = sal_False;
 				break;
 			}
 			// kein break
 		case '\x0b':
 			if( '\x0b'==nNextCh && (bReadPRE || bReadXMP ||bReadListing) &&
 				'>'!=cBreak )
 			{
 				break;
 			}
 			nNextCh = ' ';
 			// kein break;
 		case ' ':
 			sTmpBuffer.append( nNextCh );
 			if( '>'!=cBreak && (!bReadListing && !bReadXMP &&
 								!bReadPRE && !bReadTextArea) )
 			{
 				// alle Folgen von Blanks/Tabs/CR/LF zu einem Blank umwandeln
 				do {
 					if( sal_Unicode(EOF) == (nNextCh = GetNextChar()) &&
 						rInput.IsEof() )
 					{
 						if( aToken.Len() || sTmpBuffer.getLength() > 1L )
 						{
 							// ausser den Blanks wurde noch etwas geselen
 							aToken += String(sTmpBuffer.makeStringAndClear());
 							return HTML_TEXTTOKEN;
 						}
 						else
 							// nur Blanks gelesen: dann darf kein Text
 							// mehr zurueckgegeben werden und _GetNextToken
 							// muss auf EOF laufen
 							return 0;
 					}
 				} while ( ' ' == nNextCh || '\t' == nNextCh ||
 						  '\r' == nNextCh || '\n' == nNextCh ||
 						  '\x0b' == nNextCh );
 				bNextCh = sal_False;
 			}
 			break;

 		default:
 			bEqSignFound = sal_False;
 			if( (nNextCh==cBreak && !cQuote) ||
 				(sal_uLong(aToken.Len()) + MAX_LEN) > sal_uLong(STRING_MAXLEN & ~1 ))
 				bWeiter = sal_False;
 			else
 			{
 				do {
 					// alle anderen Zeichen kommen in den Text
 					sTmpBuffer.append( nNextCh );
 					if( MAX_LEN == sTmpBuffer.getLength() )
 					{
 						aToken += String(sTmpBuffer.makeStringAndClear());
 						if( (sal_uLong(aToken.Len()) + MAX_LEN) >
 								sal_uLong(STRING_MAXLEN & ~1 ) )
 						{
 							nNextCh = GetNextChar();
 							return HTML_TEXTTOKEN;
 						}
 					}
 					if( ( sal_Unicode(EOF) == (nNextCh = GetNextChar()) &&
 						  rInput.IsEof() ) ||
 						!IsParserWorking() )
 					{
 						if( sTmpBuffer.getLength() )
 							aToken += String(sTmpBuffer.makeStringAndClear());
 						return HTML_TEXTTOKEN;
 					}
 				} while( HTML_ISALPHA( nNextCh ) || HTML_ISDIGIT( nNextCh ) );
 				bNextCh = sal_False;
 			}
 		}

 		if( MAX_LEN == sTmpBuffer.getLength() )
 			aToken += String(sTmpBuffer.makeStringAndClear());

 		if( bWeiter && bNextCh )
 			nNextCh = GetNextChar();
 	}

 	if( sTmpBuffer.getLength() )
 		aToken += String(sTmpBuffer.makeStringAndClear());

 	return HTML_TEXTTOKEN;
 }

 int HTMLParser::_GetNextRawToken()
 {
 	::rtl::OUStringBuffer sTmpBuffer( MAX_LEN );

 	if( bEndTokenFound )
 	{
 		// beim letzten Aufruf haben wir das End-Token bereits gefunden,
 		// deshalb muessen wir es nicht noch einmal suchen
 		bReadScript = sal_False;
 		bReadStyle = sal_False;
 		aEndToken.Erase();
 		bEndTokenFound = sal_False;

 		return 0;
 	}

 	// per default geben wir HTML_RAWDATA zurueck
 	int bWeiter = sal_True;
 	int nToken = HTML_RAWDATA;
 	SaveState( 0 );
 	while( bWeiter && IsParserWorking() )
 	{
 		int bNextCh = sal_True;
 		switch( nNextCh )
 		{
 		case '<':
 			{
 				// Vielleicht haben wir das Ende erreicht

 				// das bisher gelesene erstmal retten
 				aToken += String(sTmpBuffer.makeStringAndClear());

 				// und die Position im Stream merken
 				sal_uLong nStreamPos = rInput.Tell();
 				sal_uLong nLineNr = GetLineNr();
 				sal_uLong nLinePos = GetLinePos();

 				// Start eines End-Token?
 				int bOffState = sal_False;
 				if( '/' == (nNextCh = GetNextChar()) )
 				{
 					bOffState = sal_True;
 					nNextCh = GetNextChar();
 				}
 				else if( '!' == nNextCh )
 				{
 					sTmpBuffer.append( nNextCh );
 					nNextCh = GetNextChar();
 				}

 				// jetzt die Buchstaben danach lesen
 				while( (HTML_ISALPHA(nNextCh) || '-'==nNextCh) &&
 					   IsParserWorking() && sTmpBuffer.getLength() < MAX_LEN )
 				{
 					sTmpBuffer.append( nNextCh );
 					nNextCh = GetNextChar();
 				}

 				String aTok( sTmpBuffer.getStr(),
 							 sal::static_int_cast< xub_StrLen >(
                                  sTmpBuffer.getLength()) );
 				aTok.ToUpperAscii();
 				sal_Bool bDone = sal_False;
 				if( bReadScript || aEndToken.Len() )
 				{
 					if( !bReadComment )
 					{
 						if( aTok.CompareToAscii( OOO_STRING_SVTOOLS_HTML_comment, 3 )
 								== COMPARE_EQUAL )
 						{
 							bReadComment = sal_True;
 						}
 						else
 						{
 							// ein Script muss mit "</SCRIPT>" aufhoehren, wobei
 							// wir es mit dem ">" aus sicherheitsgruenden
 							// erstmal nicht so genau nehmen
 							bDone = bOffState && // '>'==nNextCh &&
 							COMPARE_EQUAL == ( bReadScript
 								? aTok.CompareToAscii(OOO_STRING_SVTOOLS_HTML_script)
 								: aTok.CompareTo(aEndToken) );
 						}
 					}
 					if( bReadComment && '>'==nNextCh && aTok.Len() >= 2 &&
 						aTok.Copy( aTok.Len()-2 ).EqualsAscii( "--" ) )
 					{
 						// hier ist ein Kommentar der Art <!-----> zuende
 						bReadComment = sal_False;
 					}
 				}
 				else
 				{
 					// ein Style-Sheet kann mit </STYLE>, </HEAD> oder
 					// <BODY> aughoehren
 					if( bOffState )
 						bDone = aTok.CompareToAscii(OOO_STRING_SVTOOLS_HTML_style)
 									== COMPARE_EQUAL ||
 								aTok.CompareToAscii(OOO_STRING_SVTOOLS_HTML_head)
 									== COMPARE_EQUAL;
 					else
 						bDone =
 							aTok.CompareToAscii(OOO_STRING_SVTOOLS_HTML_body) == COMPARE_EQUAL;
 				}

 				if( bDone )
 				{
 					// das war's, jetzt muessen wir gegebenenfalls den
 					// bisher gelesenen String zurueckgeben und dnach normal
 					// weitermachen

 					bWeiter = sal_False;

 					// nToken==0 heisst, dass _GetNextToken gleich weiterliest
 					if( !aToken.Len() && (bReadStyle || bReadScript) )
 					{
 						// wir koennen sofort die Umgebung beeden und
 						// das End-Token parsen
 						bReadScript = sal_False;
 						bReadStyle = sal_False;
 						aEndToken.Erase();
 						nToken = 0;
 					}
 					else
 					{
 						// wir muessen bReadScript/bReadStyle noch am
 						// Leben lassen und koennen erst beim naechsten
 						// mal das End-Token Parsen
 						bEndTokenFound = sal_True;
 					}

 					// jetzt fahren wir im Stream auf das '<' zurueck
 					rInput.Seek( nStreamPos );
 					SetLineNr( nLineNr );
 					SetLinePos( nLinePos );
 					ClearTxtConvContext();
 					nNextCh = '<';

 					// den String wollen wir nicht an das Token haengen
 					sTmpBuffer.setLength( 0L );
 				}
 				else
 				{
 					// "</" merken, alles andere steht noch im buffer
 					aToken += (sal_Unicode)'<';
 					if( bOffState )
 						aToken += (sal_Unicode)'/';

 					bNextCh = sal_False;
 				}
 			}
 			break;
 		case '-':
 			sTmpBuffer.append( nNextCh );
 			if( bReadComment )
 			{
 				sal_Bool bTwoMinus = sal_False;
 				nNextCh = GetNextChar();
 				while( '-' == nNextCh && IsParserWorking() )
 				{
 					bTwoMinus = sal_True;

 					if( MAX_LEN == sTmpBuffer.getLength() )
 						aToken += String(sTmpBuffer.makeStringAndClear());
 					sTmpBuffer.append( nNextCh );
 					nNextCh = GetNextChar();
 				}

 				if( '>' == nNextCh && IsParserWorking() && bTwoMinus )
 					bReadComment = sal_False;

 				bNextCh = sal_False;
 			}
 			break;

 		case '\r':
 			// \r\n? beendet das aktuelle Text-Token (auch wenn es leer ist)
 			nNextCh = GetNextChar();
 			if( nNextCh=='\n' )
 				nNextCh = GetNextChar();
 			bWeiter = sal_False;
 			break;
 		case '\n':
 			// \n beendet das aktuelle Text-Token (auch wenn es leer ist)
 			nNextCh = GetNextChar();
 			bWeiter = sal_False;
 			break;
 		case sal_Unicode(EOF):
 			// eof beendet das aktuelle Text-Token und tut so, als ob
 			// ein End-Token gelesen wurde
 			if( rInput.IsEof() )
 			{
 				bWeiter = sal_False;
 				if( aToken.Len() || sTmpBuffer.getLength() )
 				{
 					bEndTokenFound = sal_True;
 				}
 				else
 				{
 					bReadScript = sal_False;
 					bReadStyle = sal_False;
 					aEndToken.Erase();
 					nToken = 0;
 				}
 				break;
 			}
 			// kein break
 		default:
 			// alle anderen Zeichen landen im Buffer
 			sTmpBuffer.append( nNextCh );
 			break;
 		}

 		if( (!bWeiter && sTmpBuffer.getLength() > 0L) ||
 			MAX_LEN == sTmpBuffer.getLength() )
 			aToken += String(sTmpBuffer.makeStringAndClear());

 		if( bWeiter && bNextCh )
 			nNextCh = GetNextChar();
 	}

 	if( IsParserWorking() )
 		SaveState( 0 );
 	else
 		nToken = 0;

 	return nToken;
 }

 // scanne das naechste Token,
 int __EXPORT HTMLParser::_GetNextToken()
 {
 	int nRet = 0;
 	sSaveToken.Erase();

 	// die Optionen loeschen
 	if( pOptions->Count() )
 		pOptions->DeleteAndDestroy( 0, pOptions->Count() );

 	if( !IsParserWorking() )		// wenn schon Fehler, dann nicht weiter!
 		return 0;

 	sal_Bool bReadNextCharSave = bReadNextChar;
 	if( bReadNextChar )
 	{
 		DBG_ASSERT( !bEndTokenFound,
 					"</SCRIPT> gelesen und trotzdem noch ein Zeichen lesen?" );
 		nNextCh = GetNextChar();
 		if( !IsParserWorking() )		// wenn schon Fehler, dann nicht weiter!
 			return 0;
 		bReadNextChar = sal_False;
 	}

 	if( bReadScript || bReadStyle || aEndToken.Len() )
 	{
 		nRet = _GetNextRawToken();
 		if( nRet || !IsParserWorking() )
 			return nRet;
 	}

 	do {
 		int bNextCh = sal_True;
 		switch( nNextCh )
 		{
 		case '<':
 			{
 				sal_uLong nStreamPos = rInput.Tell();
 				sal_uLong nLineNr = GetLineNr();
 				sal_uLong nLinePos = GetLinePos();

 				int bOffState = sal_False;
 				if( '/' == (nNextCh = GetNextChar()) )
 				{
 					bOffState = sal_True;
 					nNextCh = GetNextChar();
 				}
 				if( HTML_ISALPHA( nNextCh ) || '!'==nNextCh ) // fix #26984#
 				{
 					::rtl::OUStringBuffer sTmpBuffer;
 					do {
 						sTmpBuffer.append( nNextCh );
 						if( MAX_LEN == sTmpBuffer.getLength() )
 							aToken += String(sTmpBuffer.makeStringAndClear());
 						nNextCh = GetNextChar();
 					} while( '>' != nNextCh && !HTML_ISSPACE( nNextCh ) &&
 							 IsParserWorking() && !rInput.IsEof() );

 					if( sTmpBuffer.getLength() )
 						aToken += String(sTmpBuffer.makeStringAndClear());

 					// Blanks ueberlesen
 					while( HTML_ISSPACE( nNextCh ) && IsParserWorking() )
 						nNextCh = GetNextChar();

 					if( !IsParserWorking() )
 					{
 						if( SVPAR_PENDING == eState )
 							bReadNextChar = bReadNextCharSave;
 						break;
 					}

 					// suche das Token in der Tabelle:
 					sSaveToken = aToken;
 					aToken.ToUpperAscii();
 					if( 0 == (nRet = GetHTMLToken( aToken )) )
 						// Unknown Control
 						nRet = HTML_UNKNOWNCONTROL_ON;

 					// Wenn es ein Token zum ausschalten ist ...
 					if( bOffState )
 					{
 						 if( HTML_TOKEN_ONOFF & nRet )
 						 {
 							// und es ein Off-Token gibt, das daraus machen
 							++nRet;
 						 }
 						 else if( HTML_LINEBREAK!=nRet )
 						 {
 							// und es kein Off-Token gibt, ein unbekanntes
 							// Token daraus machen (ausser </BR>, das wird
 							// wie <BR> behandelt
 							nRet = HTML_UNKNOWNCONTROL_OFF;
 						 }
 					}

 					if( nRet == HTML_COMMENT )
 					{
 						// fix: sSaveToken wegen Gross-/Kleinschreibung
 						// als Anfang des Kommentars benutzen und ein
 						// Space anhaengen.
 						aToken = sSaveToken;
 						if( '>'!=nNextCh )
 							aToken += (sal_Unicode)' ';
 						sal_uLong nCStreamPos = 0;
 						sal_uLong nCLineNr = 0;
 						sal_uLong nCLinePos = 0;
 						xub_StrLen nCStrLen = 0;

 						sal_Bool bDone = sal_False;
 						// bis zum schliessenden --> lesen. wenn keins gefunden
 						// wurde beim der ersten > wieder aufsetzen
 						while( !bDone && !rInput.IsEof() && IsParserWorking() )
 						{
 							if( '>'==nNextCh )
 							{
 								if( !nCStreamPos )
 								{
 									nCStreamPos = rInput.Tell();
 									nCStrLen = aToken.Len();
 									nCLineNr = GetLineNr();
 									nCLinePos = GetLinePos();
 								}
 								bDone = aToken.Len() >= 2 &&
 										aToken.Copy(aToken.Len()-2,2).
 														EqualsAscii( "--" );
 								if( !bDone )
 								aToken += nNextCh;
 							}
 							else
 								aToken += nNextCh;
 							if( !bDone )
 								nNextCh = GetNextChar();
 						}
 						if( !bDone && IsParserWorking() && nCStreamPos )
 						{
 							rInput.Seek( nCStreamPos );
 							SetLineNr( nCLineNr );
 							SetLinePos( nCLinePos );
 							ClearTxtConvContext();
 							aToken.Erase( nCStrLen );
 							nNextCh = '>';
 						}
 					}
 					else
 					{
 						// den TokenString koennen wir jetzt verwerfen
 						aToken.Erase();
 					}

 					// dann lesen wir mal alles bis zur schliessenden '>'
 					if( '>' != nNextCh && IsParserWorking() )
 					{
 						ScanText( '>' );
 						if( sal_Unicode(EOF) == nNextCh && rInput.IsEof() )
 						{
 							// zurueck hinter die < gehen  und dort neu
 							// aufsetzen, das < als Text zurueckgeben
 							rInput.Seek( nStreamPos );
 							SetLineNr( nLineNr );
 							SetLinePos( nLinePos );
 							ClearTxtConvContext();

 							aToken = '<';
 							nRet = HTML_TEXTTOKEN;
 							nNextCh = GetNextChar();
 							bNextCh = sal_False;
 							break;
 						}
 					}
 					if( SVPAR_PENDING == eState )
 						bReadNextChar = bReadNextCharSave;
 				}
 				else
 				{
 					if( bOffState )
 					{
 						// einfach alles wegschmeissen
 						ScanText( '>' );
 						if( sal_Unicode(EOF) == nNextCh && rInput.IsEof() )
 						{
 							// zurueck hinter die < gehen  und dort neu
 							// aufsetzen, das < als Text zurueckgeben
 							rInput.Seek( nStreamPos );
 							SetLineNr( nLineNr );
 							SetLinePos( nLinePos );
 							ClearTxtConvContext();

 							aToken = '<';
 							nRet = HTML_TEXTTOKEN;
 							nNextCh = GetNextChar();
 							bNextCh = sal_False;
 							break;
 						}
 						if( SVPAR_PENDING == eState )
 							bReadNextChar = bReadNextCharSave;
 						aToken.Erase();
 					}
 					else if( '%' == nNextCh )
 					{
 						nRet = HTML_UNKNOWNCONTROL_ON;

 						sal_uLong nCStreamPos = rInput.Tell();
 						sal_uLong nCLineNr = GetLineNr(), nCLinePos = GetLinePos();

 						sal_Bool bDone = sal_False;
 						// bis zum schliessenden %> lesen. wenn keins gefunden
 						// wurde beim der ersten > wieder aufsetzen
 						while( !bDone && !rInput.IsEof() && IsParserWorking() )
 						{
 							bDone = '>'==nNextCh && aToken.Len() >= 1 &&
 									'%' == aToken.GetChar( aToken.Len()-1 );
 							if( !bDone )
 							{
 								aToken += nNextCh;
 								nNextCh = GetNextChar();
 							}
 						}
 						if( !bDone && IsParserWorking() )
 						{
 							rInput.Seek( nCStreamPos );
 							SetLineNr( nCLineNr );
 							SetLinePos( nCLinePos );
 							ClearTxtConvContext();
 							aToken.AssignAscii( "<%", 2 );
 							nRet = HTML_TEXTTOKEN;
 							break;
 						}
 						if( IsParserWorking() )
 						{
 							sSaveToken = aToken;
 							aToken.Erase();
 						}
 					}
 					else
 					{
 						aToken = '<';
 						nRet = HTML_TEXTTOKEN;
 						bNextCh = sal_False;
 						break;
 					}
 				}

 				if( IsParserWorking() )
 				{
 					bNextCh = '>' == nNextCh;
 					switch( nRet )
 					{
 					case HTML_TEXTAREA_ON:
 						bReadTextArea = sal_True;
 						break;
 					case HTML_TEXTAREA_OFF:
 						bReadTextArea = sal_False;
 						break;
 					case HTML_SCRIPT_ON:
 						if( !bReadTextArea )
 							bReadScript = sal_True;
 						break;
 					case HTML_SCRIPT_OFF:
 						if( !bReadTextArea )
 						{
 							bReadScript = sal_False;
 							// JavaScript kann den Stream veraendern
 							// also muss das letzte Zeichen nochmals
 							// gelesen werden
 							bReadNextChar = sal_True;
 							bNextCh = sal_False;
 						}
 						break;

 					case HTML_STYLE_ON:
 						bReadStyle = sal_True;
 						break;
 					case HTML_STYLE_OFF:
 						bReadStyle = sal_False;
 						break;
 					}

 				}
 			}
 			break;

 		case sal_Unicode(EOF):
 			if( rInput.IsEof() )
 			{
 				eState = SVPAR_ACCEPTED;
 				nRet = nNextCh;
 			}
 			else
 			{
 				// normalen Text lesen
 				goto scan_text;
 			}
 			break;

 		case '\f':
 			// Form-Feeds werden jetzt extra nach oben gereicht
 			nRet = HTML_LINEFEEDCHAR; // !!! eigentlich FORMFEEDCHAR
 			break;

 		case '\n':
 		case '\r':
 			if( bReadListing || bReadXMP || bReadPRE || bReadTextArea )
 			{
 				sal_Unicode c = GetNextChar();
 				if( ( '\n' != nNextCh || '\r' != c ) &&
 					( '\r' != nNextCh || '\n' != c ) )
 				{
 					bNextCh = sal_False;
 					nNextCh = c;
 				}
 				nRet = HTML_NEWPARA;
 				break;
 			}
 			// kein break !
 		case '\t':
 			if( bReadPRE )
 			{
 				nRet = HTML_TABCHAR;
 				break;
 			}
 			// kein break !
 		case ' ':
 			// kein break !
 		default:

 scan_text:
 			// es folgt "normaler" Text
 			nRet = ScanText();
 			bNextCh = 0 == aToken.Len();

 			// der Text sollte noch verarbeitet werden
 			if( !bNextCh && eState == SVPAR_PENDING )
 			{
 				eState = SVPAR_WORKING;
 				bReadNextChar = sal_True;
 			}

 			break;
 		}

 		if( bNextCh && SVPAR_WORKING == eState )
 		{
 			nNextCh = GetNextChar();
 			if( SVPAR_PENDING == eState && nRet && HTML_TEXTTOKEN != nRet )
 			{
 				bReadNextChar = sal_True;
 				eState = SVPAR_WORKING;
 			}
 		}

 	} while( !nRet && SVPAR_WORKING == eState );

 	if( SVPAR_PENDING == eState )
 		nRet = -1;		// irgendwas ungueltiges

 	return nRet;
 }

 void HTMLParser::UnescapeToken()
 {
 	xub_StrLen nPos=0;

 	sal_Bool bEscape = sal_False;
 	while( nPos < aToken.Len() )
 	{
 		sal_Bool bOldEscape = bEscape;
 		bEscape = sal_False;
 		if( '\\'==aToken.GetChar(nPos) && !bOldEscape )
 		{
 			aToken.Erase( nPos, 1 );
 			bEscape = sal_True;
 		}
 		else
 		{
 			nPos++;
 		}
 	}
 }

 // hole die Optionen
 const HTMLOptions *HTMLParser::GetOptions( sal_uInt16 *pNoConvertToken ) const
 {
 	// wenn die Option fuer das aktuelle Token schon einmal
 	// geholt wurden, geben wir sie noch einmal zurueck
 	if( pOptions->Count() )
 		return pOptions;

 	xub_StrLen nPos = 0;
 	while( nPos < aToken.Len() )
 	{
 		// ein Zeichen ? Dann faengt hier eine Option an
 		if( HTML_ISALPHA( aToken.GetChar(nPos) ) )
 		{
 			int nToken;
 			String aValue;
 			xub_StrLen nStt = nPos;
 			sal_Unicode cChar = 0;

 			// Eigentlich sind hier nur ganz bestimmte Zeichen erlaubt.
 			// Netscape achtet aber nur auf "=" und Leerzeichen (siehe
 			// Mozilla: PA_FetchRequestedNameValues in
 			// lipparse/pa_mdl.c
 //			while( nPos < aToken.Len() &&
 //					( '-'==(c=aToken[nPos]) || isalnum(c) || '.'==c || '_'==c) )
 			while( nPos < aToken.Len() && '=' != (cChar=aToken.GetChar(nPos)) &&
 				   HTML_ISPRINTABLE(cChar) && !HTML_ISSPACE(cChar) )
 				nPos++;

 			String sName( aToken.Copy( nStt, nPos-nStt ) );

 //JP 23.03.97: die PlugIns wollen die TokenName im "Original" haben
 //				also nur fuers Suchen in UpperCase wandeln
 			String sNameUpperCase( sName );
 			sNameUpperCase.ToUpperAscii();

 			nToken = GetHTMLOption( sNameUpperCase ); // der Name ist fertig
 			DBG_ASSERTWARNING( nToken!=HTML_O_UNKNOWN,
 						"GetOption: unbekannte HTML-Option" );
 			sal_Bool bStripCRLF = (nToken < HTML_OPTION_SCRIPT_START ||
 							   nToken >= HTML_OPTION_SCRIPT_END) &&
 							  (!pNoConvertToken || nToken != *pNoConvertToken);

 			while( nPos < aToken.Len() &&
 				   ( !HTML_ISPRINTABLE( (cChar=aToken.GetChar(nPos)) ) ||
 					 HTML_ISSPACE(cChar) ) )
 				nPos++;

 			// hat die Option auch einen Wert?
 			if( nPos!=aToken.Len() && '='==cChar )
 			{
 				nPos++;

 				while( nPos < aToken.Len() &&
 						( !HTML_ISPRINTABLE( (cChar=aToken.GetChar(nPos)) ) ||
 						  ' '==cChar || '\t'==cChar || '\r'==cChar || '\n'==cChar ) )
 					nPos++;

 				if( nPos != aToken.Len() )
 				{
 					xub_StrLen nLen = 0;
 					nStt = nPos;
 					if( ('"'==cChar) || ('\'')==cChar )
 					{
 						sal_Unicode cEnd = cChar;
 						nPos++; nStt++;
 						sal_Bool bDone = sal_False;
 						sal_Bool bEscape = sal_False;
 						while( nPos < aToken.Len() && !bDone )
 						{
 							sal_Bool bOldEscape = bEscape;
 							bEscape = sal_False;
 							cChar = aToken.GetChar(nPos);
 							switch( cChar )
 							{
 							case '\r':
 							case '\n':
 								if( bStripCRLF )
 									((String &)aToken).Erase( nPos, 1 );
 								else
 									nPos++, nLen++;
 								break;
 							case '\\':
 								if( bOldEscape )
 								{
 									nPos++, nLen++;
 								}
 								else
 								{
 									((String &)aToken).Erase( nPos, 1 );
 									bEscape = sal_True;
 								}
 								break;
 							case '"':
 							case '\'':
 								bDone = !bOldEscape && cChar==cEnd;
 								if( !bDone )
 									nPos++, nLen++;
 								break;
 							default:
 								nPos++, nLen++;
 								break;
 							}
 						}
 						if( nPos!=aToken.Len() )
 							nPos++;
 					}
 					else
 					{
 						// hier sind wir etwas laxer als der
 						// Standard und erlauben alles druckbare
 						sal_Bool bEscape = sal_False;
 						sal_Bool bDone = sal_False;
 						while( nPos < aToken.Len() && !bDone )
 						{
 							sal_Bool bOldEscape = bEscape;
 							bEscape = sal_False;
 							sal_Unicode c = aToken.GetChar(nPos);
 							switch( c )
 							{
 							case ' ':
 								bDone = !bOldEscape;
 								if( !bDone )
 									nPos++, nLen++;
 								break;

 							case '\t':
 							case '\r':
 							case '\n':
 								bDone = sal_True;
 								break;

 							case '\\':
 								if( bOldEscape )
 								{
 									nPos++, nLen++;
 								}
 								else
 								{
 									((String &)aToken).Erase( nPos, 1 );
 									bEscape = sal_True;
 								}
 								break;

 							default:
 								if( HTML_ISPRINTABLE( c ) )
 									nPos++, nLen++;
 								else
 									bDone = sal_True;
 								break;
 							}
 						}
 					}

 					if( nLen )
 						aValue = aToken.Copy( nStt, nLen );
 				}
 			}

 			// Wir kennen das Token und koennen es Speichern
 			HTMLOption *pOption =
 				new HTMLOption(
                     sal::static_int_cast< sal_uInt16 >(nToken), sName, aValue );

 			pOptions->Insert( pOption, pOptions->Count() );

 		}
 		else
 			// white space un unerwartete Zeichen ignorieren wie
 			nPos++;
 	}

 	return pOptions;
 }

 int HTMLParser::FilterPRE( int nToken )
 {
 	switch( nToken )
 	{
 #ifdef HTML_BEHAVIOUR
 	// diese werden laut Definition zu LFs
 	case HTML_PARABREAK_ON:
 	case HTML_LINEBREAK:
 		nToken = HTML_NEWPARA;
 #else
 	// in Netscape zeigen sie aber nur in nicht-leeren Absaetzen Wirkung
 	case HTML_PARABREAK_ON:
 		nToken = HTML_LINEBREAK;
 	case HTML_LINEBREAK:
 #endif
 	case HTML_NEWPARA:
 		nPre_LinePos = 0;
 		if( bPre_IgnoreNewPara )
 			nToken = 0;
 		break;

 	case HTML_TABCHAR:
 		{
 			xub_StrLen nSpaces = sal::static_int_cast< xub_StrLen >(
                 8 - (nPre_LinePos % 8));
 			DBG_ASSERT( !aToken.Len(), "Wieso ist das Token nicht leer?" );
 			aToken.Expand( nSpaces, ' ' );
 			nPre_LinePos += nSpaces;
 			nToken = HTML_TEXTTOKEN;
 		}
 		break;
 	// diese bleiben erhalten
 	case HTML_TEXTTOKEN:
 		nPre_LinePos += aToken.Len();
 		break;

 	case HTML_SELECT_ON:
 	case HTML_SELECT_OFF:
 	case HTML_BODY_ON:
 	case HTML_FORM_ON:
 	case HTML_FORM_OFF:
 	case HTML_INPUT:
 	case HTML_OPTION:
 	case HTML_TEXTAREA_ON:
 	case HTML_TEXTAREA_OFF:

 	case HTML_IMAGE:
 	case HTML_APPLET_ON:
 	case HTML_APPLET_OFF:
 	case HTML_PARAM:
 	case HTML_EMBED:

 	case HTML_HEAD1_ON:
 	case HTML_HEAD1_OFF:
 	case HTML_HEAD2_ON:
 	case HTML_HEAD2_OFF:
 	case HTML_HEAD3_ON:
 	case HTML_HEAD3_OFF:
 	case HTML_HEAD4_ON:
 	case HTML_HEAD4_OFF:
 	case HTML_HEAD5_ON:
 	case HTML_HEAD5_OFF:
 	case HTML_HEAD6_ON:
 	case HTML_HEAD6_OFF:
 	case HTML_BLOCKQUOTE_ON:
 	case HTML_BLOCKQUOTE_OFF:
 	case HTML_ADDRESS_ON:
 	case HTML_ADDRESS_OFF:
 	case HTML_HORZRULE:

 	case HTML_CENTER_ON:
 	case HTML_CENTER_OFF:
 	case HTML_DIVISION_ON:
 	case HTML_DIVISION_OFF:

 	case HTML_SCRIPT_ON:
 	case HTML_SCRIPT_OFF:
 	case HTML_RAWDATA:

 	case HTML_TABLE_ON:
 	case HTML_TABLE_OFF:
 	case HTML_CAPTION_ON:
 	case HTML_CAPTION_OFF:
 	case HTML_COLGROUP_ON:
 	case HTML_COLGROUP_OFF:
 	case HTML_COL_ON:
 	case HTML_COL_OFF:
 	case HTML_THEAD_ON:
 	case HTML_THEAD_OFF:
 	case HTML_TFOOT_ON:
 	case HTML_TFOOT_OFF:
 	case HTML_TBODY_ON:
 	case HTML_TBODY_OFF:
 	case HTML_TABLEROW_ON:
 	case HTML_TABLEROW_OFF:
 	case HTML_TABLEDATA_ON:
 	case HTML_TABLEDATA_OFF:
 	case HTML_TABLEHEADER_ON:
 	case HTML_TABLEHEADER_OFF:

 	case HTML_ANCHOR_ON:
 	case HTML_ANCHOR_OFF:
 	case HTML_BOLD_ON:
 	case HTML_BOLD_OFF:
 	case HTML_ITALIC_ON:
 	case HTML_ITALIC_OFF:
 	case HTML_STRIKE_ON:
 	case HTML_STRIKE_OFF:
 	case HTML_STRIKETHROUGH_ON:
 	case HTML_STRIKETHROUGH_OFF:
 	case HTML_UNDERLINE_ON:
 	case HTML_UNDERLINE_OFF:
 	case HTML_BASEFONT_ON:
 	case HTML_BASEFONT_OFF:
 	case HTML_FONT_ON:
 	case HTML_FONT_OFF:
 	case HTML_BLINK_ON:
 	case HTML_BLINK_OFF:
 	case HTML_SPAN_ON:
 	case HTML_SPAN_OFF:
 	case HTML_SUBSCRIPT_ON:
 	case HTML_SUBSCRIPT_OFF:
 	case HTML_SUPERSCRIPT_ON:
 	case HTML_SUPERSCRIPT_OFF:
 	case HTML_BIGPRINT_ON:
 	case HTML_BIGPRINT_OFF:
 	case HTML_SMALLPRINT_OFF:
 	case HTML_SMALLPRINT_ON:

 	case HTML_EMPHASIS_ON:
 	case HTML_EMPHASIS_OFF:
 	case HTML_CITIATION_ON:
 	case HTML_CITIATION_OFF:
 	case HTML_STRONG_ON:
 	case HTML_STRONG_OFF:
 	case HTML_CODE_ON:
 	case HTML_CODE_OFF:
 	case HTML_SAMPLE_ON:
 	case HTML_SAMPLE_OFF:
 	case HTML_KEYBOARD_ON:
 	case HTML_KEYBOARD_OFF:
 	case HTML_VARIABLE_ON:
 	case HTML_VARIABLE_OFF:
 	case HTML_DEFINSTANCE_ON:
 	case HTML_DEFINSTANCE_OFF:
 	case HTML_SHORTQUOTE_ON:
 	case HTML_SHORTQUOTE_OFF:
 	case HTML_LANGUAGE_ON:
 	case HTML_LANGUAGE_OFF:
 	case HTML_AUTHOR_ON:
 	case HTML_AUTHOR_OFF:
 	case HTML_PERSON_ON:
 	case HTML_PERSON_OFF:
 	case HTML_ACRONYM_ON:
 	case HTML_ACRONYM_OFF:
 	case HTML_ABBREVIATION_ON:
 	case HTML_ABBREVIATION_OFF:
 	case HTML_INSERTEDTEXT_ON:
 	case HTML_INSERTEDTEXT_OFF:
 	case HTML_DELETEDTEXT_ON:
 	case HTML_DELETEDTEXT_OFF:
 	case HTML_TELETYPE_ON:
 	case HTML_TELETYPE_OFF:

 		break;

 	// der Rest wird als unbekanntes Token behandelt
 	default:
 		if( nToken )
 		{
 			nToken =
 				( ((HTML_TOKEN_ONOFF & nToken) && (1 & nToken))
 					? HTML_UNKNOWNCONTROL_OFF
 					: HTML_UNKNOWNCONTROL_ON );
 		}
 		break;
 	}

 	bPre_IgnoreNewPara = sal_False;

 	return nToken;
 }

 int HTMLParser::FilterXMP( int nToken )
 {
 	switch( nToken )
 	{
 	case HTML_NEWPARA:
 		if( bPre_IgnoreNewPara )
 			nToken = 0;
 	case HTML_TEXTTOKEN:
 	case HTML_NONBREAKSPACE:
 	case HTML_SOFTHYPH:
 		break;				// bleiben erhalten

 	default:
 		if( nToken )
 		{
 			if( (HTML_TOKEN_ONOFF & nToken) && (1 & nToken) )
 			{
 				sSaveToken.Insert( '<', 0 );
 				sSaveToken.Insert( '/', 1 );
 			}
 			else
 				sSaveToken.Insert( '<', 0 );
 			if( aToken.Len() )
 			{
 				UnescapeToken();
 				sSaveToken += (sal_Unicode)' ';
 				aToken.Insert( sSaveToken, 0 );
 			}
 			else
 				aToken = sSaveToken;
 			aToken += (sal_Unicode)'>';
 			nToken = HTML_TEXTTOKEN;
 		}
 		break;
 	}

 	bPre_IgnoreNewPara = sal_False;

 	return nToken;
 }

 int HTMLParser::FilterListing( int nToken )
 {
 	switch( nToken )
 	{
 	case HTML_NEWPARA:
 		if( bPre_IgnoreNewPara )
 			nToken = 0;
 	case HTML_TEXTTOKEN:
 	case HTML_NONBREAKSPACE:
 	case HTML_SOFTHYPH:
 		break;		// bleiben erhalten

 	default:
 		if( nToken )
 		{
 			nToken =
 				( ((HTML_TOKEN_ONOFF & nToken) && (1 & nToken))
 					? HTML_UNKNOWNCONTROL_OFF
 					: HTML_UNKNOWNCONTROL_ON );
 		}
 		break;
 	}

 	bPre_IgnoreNewPara = sal_False;

 	return nToken;
 }

 FASTBOOL HTMLParser::IsHTMLFormat( const sal_Char* pHeader,
 								   sal_Bool bSwitchToUCS2,
 								   rtl_TextEncoding eEnc )
 {
 	// Einer der folgenden regulaeren Ausdrucke muss sich auf den String
 	// anwenden lassen, damit das Dok ein HTML-Dokument ist.
 	//
 	// ^[^<]*<[^ \t]*[> \t]
 	//        -------
 	// ^<!
 	//
 	// wobei der unterstrichene Teilausdruck einem HTML-Token
 	// ensprechen muss

 	ByteString sCmp;
 	sal_Bool bUCS2B = sal_False;
 	if( bSwitchToUCS2 )
 	{
 		if( 0xfeU == (sal_uChar)pHeader[0] &&
 			0xffU == (sal_uChar)pHeader[1] )
 		{
 			eEnc = RTL_TEXTENCODING_UCS2;
 			bUCS2B = sal_True;
 		}
 		else if( 0xffU == (sal_uChar)pHeader[0] &&
 				 0xfeU == (sal_uChar)pHeader[1] )
 		{
 			eEnc = RTL_TEXTENCODING_UCS2;
 		}
 	}
 	if
        (
         RTL_TEXTENCODING_UCS2 == eEnc &&
         (
          (0xfe == (sal_uChar)pHeader[0] && 0xff == (sal_uChar)pHeader[1]) ||
          (0xff == (sal_uChar)pHeader[0] && 0xfe == (sal_uChar)pHeader[1])
         )
        )
 	{
 		if( 0xfe == (sal_uChar)pHeader[0] )
 			bUCS2B = sal_True;

 		xub_StrLen nLen;
 		for( nLen = 2;
 			 pHeader[nLen] != 0 || pHeader[nLen+1] != 0;
 			 nLen+=2 )
 			;

 		::rtl::OStringBuffer sTmp( (nLen - 2)/2 );
 		for( xub_StrLen nPos = 2; nPos < nLen; nPos += 2 )
 		{
 			sal_Unicode cUC;
 			if( bUCS2B )
 				cUC = (sal_Unicode(pHeader[nPos]) << 8) | pHeader[nPos+1];
 			else
 				cUC = (sal_Unicode(pHeader[nPos+1]) << 8) | pHeader[nPos];
 			if( 0U == cUC )
 				break;

 			sTmp.append( cUC < 256U ? (sal_Char)cUC : '.' );
 		}
 		sCmp = ByteString( sTmp.makeStringAndClear() );
 	}
 	else
 	{
 		sCmp = (sal_Char *)pHeader;
 	}

 	sCmp.ToUpperAscii();

 	// Ein HTML-Dokument muss in der ersten Zeile ein '<' besitzen
 	xub_StrLen nStart = sCmp.Search( '<' );
 	if( STRING_NOTFOUND  == nStart )
 		return sal_False;
 	nStart++;

 	// danach duerfen beliebige andere Zeichen bis zu einem blank oder
 	// '>' kommen
 	sal_Char c;
 	xub_StrLen nPos;
 	for( nPos = nStart; nPos<sCmp.Len(); nPos++ )
 	{
 		if( '>'==(c=sCmp.GetChar(nPos)) || HTML_ISSPACE(c) )
 			break;
 	}

 	// wenn das Dokeument hinter dem < aufhoert ist es wohl kein HTML
 	if( nPos==nStart )
 		return sal_False;

 	// die Zeichenkette nach dem '<' muss ausserdem ein bekanntes
 	// HTML Token sein. Damit die Ausgabe eines DOS-dir-Befehls nicht
 	// als HTML interpretiert wird, wird ein <DIR> jedoch nicht als HTML
 	// interpretiert.
 	String sTest( sCmp.Copy( nStart, nPos-nStart ), RTL_TEXTENCODING_ASCII_US );
 	int nTok = GetHTMLToken( sTest );
 	if( 0 != nTok && HTML_DIRLIST_ON != nTok )
 		return sal_True;

 	// oder es handelt sich um ein "<!" ganz am Anfang der Datei (fix #27092#)
 	if( nStart == 1 && '!' == sCmp.GetChar( 1 ) )
 		return sal_True;

 	// oder wir finden irgendwo ein <HTML> in den ersten 80 Zeichen
 	nStart = sCmp.Search( OOO_STRING_SVTOOLS_HTML_html );
 	if( nStart!=STRING_NOTFOUND &&
 		nStart>0 && '<'==sCmp.GetChar(nStart-1) &&
 		nStart+4 < sCmp.Len() && '>'==sCmp.GetChar(nStart+4) )
 		return sal_True;

 	// sonst ist es wohl doch eher kein HTML-Dokument
 	return sal_False;
 }

 sal_Bool HTMLParser::InternalImgToPrivateURL( String& rURL )
 {
 	if( rURL.Len() < 19 || 'i' != rURL.GetChar(0) ||
 		rURL.CompareToAscii( OOO_STRING_SVTOOLS_HTML_internal_gopher, 9 ) != COMPARE_EQUAL )
 		return sal_False;

 	sal_Bool bFound = sal_False;

 	if( rURL.CompareToAscii( OOO_STRING_SVTOOLS_HTML_internal_gopher,16) == COMPARE_EQUAL )
 	{
 		String aName( rURL.Copy(16) );
 		switch( aName.GetChar(0) )
 		{
 		case 'b':
 			bFound = aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_GOPHER_binary );
 			break;
 		case 'i':
 			bFound = aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_GOPHER_image ) ||
 					 aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_GOPHER_index );
 			break;
 		case 'm':
 			bFound = aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_GOPHER_menu ) ||
 					 aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_GOPHER_movie );
 			break;
 		case 's':
 			bFound = aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_GOPHER_sound );
 			break;
 		case 't':
 			bFound = aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_GOPHER_telnet ) ||
 					 aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_GOPHER_text );
 			break;
 		case 'u':
 			bFound = aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_GOPHER_unknown );
 			break;
 		}
 	}
 	else if( rURL.CompareToAscii( OOO_STRING_SVTOOLS_HTML_internal_icon,14) == COMPARE_EQUAL )
 	{
 		String aName( rURL.Copy(14) );
 		switch( aName.GetChar(0) )
 		{
 		case 'b':
 			bFound = aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_ICON_baddata );
 			break;
 		case 'd':
 			bFound = aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_ICON_delayed );
 			break;
 		case 'e':
 			bFound = aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_ICON_embed );
 			break;
 		case 'i':
 			bFound = aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_ICON_insecure );
 			break;
 		case 'n':
 			bFound = aName.EqualsAscii( OOO_STRING_SVTOOLS_HTML_INT_ICON_notfound );
 			break;
 		}
 	}
 	if( bFound )
 	{
 		String sTmp ( rURL );
 		rURL.AssignAscii( OOO_STRING_SVTOOLS_HTML_private_image );
 		rURL.Append( sTmp );
 	}

 	return bFound;
 }

 #ifdef USED
 void HTMLParser::SaveState( int nToken )
 {
 	SvParser::SaveState( nToken );
 }

 void HTMLParser::RestoreState()
 {
 	SvParser::RestoreState();
 }
 #endif


 enum eHtmlMetas {
     HTML_META_NONE = 0,
     HTML_META_AUTHOR,
     HTML_META_DESCRIPTION,
     HTML_META_KEYWORDS,
     HTML_META_REFRESH,
     HTML_META_CLASSIFICATION,
     HTML_META_CREATED,
     HTML_META_CHANGEDBY,
     HTML_META_CHANGED,
     HTML_META_GENERATOR,
     HTML_META_SDFOOTNOTE,
     HTML_META_SDENDNOTE,
     HTML_META_CONTENT_TYPE
 };

 // <META NAME=xxx>
 static HTMLOptionEnum __READONLY_DATA aHTMLMetaNameTable[] =
 {
     { OOO_STRING_SVTOOLS_HTML_META_author,        HTML_META_AUTHOR        },
     { OOO_STRING_SVTOOLS_HTML_META_changed,       HTML_META_CHANGED       },
     { OOO_STRING_SVTOOLS_HTML_META_changedby,     HTML_META_CHANGEDBY     },
     { OOO_STRING_SVTOOLS_HTML_META_classification,HTML_META_CLASSIFICATION},
     { OOO_STRING_SVTOOLS_HTML_META_content_type,  HTML_META_CONTENT_TYPE  },
     { OOO_STRING_SVTOOLS_HTML_META_created,       HTML_META_CREATED       },
     { OOO_STRING_SVTOOLS_HTML_META_description,   HTML_META_DESCRIPTION   },
     { OOO_STRING_SVTOOLS_HTML_META_keywords,      HTML_META_KEYWORDS      },
     { OOO_STRING_SVTOOLS_HTML_META_generator,     HTML_META_GENERATOR     },
     { OOO_STRING_SVTOOLS_HTML_META_refresh,       HTML_META_REFRESH       },
     { OOO_STRING_SVTOOLS_HTML_META_sdendnote,     HTML_META_SDENDNOTE     },
     { OOO_STRING_SVTOOLS_HTML_META_sdfootnote,    HTML_META_SDFOOTNOTE    },
     { 0,                                          0                       }
 };


 void HTMLParser::AddMetaUserDefined( ::rtl::OUString const & )
 {
 }

 bool HTMLParser::ParseMetaOptionsImpl(
         const uno::Reference<document::XDocumentProperties> & i_xDocProps,
         SvKeyValueIterator *i_pHTTPHeader,
         const HTMLOptions *i_pOptions,
         rtl_TextEncoding& o_rEnc )
 {
     String aName, aContent;
     sal_uInt16 nAction = HTML_META_NONE;
     bool bHTTPEquiv = false, bChanged = false;

     for ( sal_uInt16 i = i_pOptions->Count(); i; )
     {
         const HTMLOption *pOption = (*i_pOptions)[ --i ];
         switch ( pOption->GetToken() )
         {
             case HTML_O_NAME:
                 aName = pOption->GetString();
                 if ( HTML_META_NONE==nAction )
                 {
                     pOption->GetEnum( nAction, aHTMLMetaNameTable );
                 }
                 break;
             case HTML_O_HTTPEQUIV:
                 aName = pOption->GetString();
                 pOption->GetEnum( nAction, aHTMLMetaNameTable );
                 bHTTPEquiv = true;
                 break;
             case HTML_O_CONTENT:
                 aContent = pOption->GetString();
                 break;
         }
     }

     if ( bHTTPEquiv || HTML_META_DESCRIPTION != nAction )
     {
         // if it is not a Description, remove CRs and LFs from CONTENT
         aContent.EraseAllChars( _CR );
         aContent.EraseAllChars( _LF );
     }
     else
     {
         // convert line endings for Description
         aContent.ConvertLineEnd();
     }


     if ( bHTTPEquiv && i_pHTTPHeader )
     {
         // #57232#: Netscape seems to just ignore a closing ", so we do too
         if ( aContent.Len() && '"' == aContent.GetChar( aContent.Len()-1 ) )
         {
             aContent.Erase( aContent.Len() - 1 );
         }
         SvKeyValue aKeyValue( aName, aContent );
         i_pHTTPHeader->Append( aKeyValue );
     }

     switch ( nAction )
     {
         case HTML_META_AUTHOR:
             if (i_xDocProps.is()) {
                 i_xDocProps->setAuthor( aContent );
                 bChanged = true;
             }
             break;
         case HTML_META_DESCRIPTION:
             if (i_xDocProps.is()) {
                 i_xDocProps->setDescription( aContent );
                 bChanged = true;
             }
             break;
         case HTML_META_KEYWORDS:
             if (i_xDocProps.is()) {
                 i_xDocProps->setKeywords(
                     ::comphelper::string::convertCommaSeparated(aContent));
                 bChanged = true;
             }
             break;
         case HTML_META_CLASSIFICATION:
             if (i_xDocProps.is()) {
                 i_xDocProps->setSubject( aContent );
                 bChanged = true;
             }
             break;

         case HTML_META_CHANGEDBY:
             if (i_xDocProps.is()) {
                 i_xDocProps->setModifiedBy( aContent );
             }
             break;

         case HTML_META_CREATED:
         case HTML_META_CHANGED:
             if ( i_xDocProps.is() && aContent.Len() &&
                  aContent.GetTokenCount() == 2 )
             {
                 Date aDate( (sal_uLong)aContent.GetToken(0).ToInt32() );
                 Time aTime( (sal_uLong)aContent.GetToken(1).ToInt32() );
                 DateTime aDateTime( aDate, aTime );
                 ::util::DateTime uDT(aDateTime.Get100Sec(),
                     aDateTime.GetSec(), aDateTime.GetMin(),
                     aDateTime.GetHour(), aDateTime.GetDay(),
                     aDateTime.GetMonth(), aDateTime.GetYear());
                 if ( HTML_META_CREATED==nAction )
                     i_xDocProps->setCreationDate( uDT );
                 else
                     i_xDocProps->setModificationDate( uDT );
                 bChanged = true;
             }
             break;

         case HTML_META_REFRESH:
             DBG_ASSERT( !bHTTPEquiv || i_pHTTPHeader,
         "Reload-URL aufgrund unterlassener MUSS-Aenderung verlorengegangen" );
             break;

         case HTML_META_CONTENT_TYPE:
             if ( aContent.Len() )
             {
                 o_rEnc = GetEncodingByMIME( aContent );
             }
             break;

         case HTML_META_NONE:
             if ( !bHTTPEquiv )
             {
                 if (i_xDocProps.is())
                 {
                     uno::Reference<beans::XPropertyContainer> xUDProps
                         = i_xDocProps->getUserDefinedProperties();
                     try {
                         xUDProps->addProperty(aName,
                             beans::PropertyAttribute::REMOVEABLE,
                             uno::makeAny(::rtl::OUString(aContent)));
                         AddMetaUserDefined(aName);
                         bChanged = true;
                     } catch (uno::Exception &) {
                         // ignore
                     }
                 }
             }
             break;
         default:
             break;
     }

     return bChanged;
 }

 bool HTMLParser::ParseMetaOptions(
         const uno::Reference<document::XDocumentProperties> & i_xDocProps,
         SvKeyValueIterator *i_pHeader )
 {
     sal_uInt16 nContentOption = HTML_O_CONTENT;
     rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;

     bool bRet = ParseMetaOptionsImpl( i_xDocProps, i_pHeader,
 				      GetOptions(&nContentOption),
 				      eEnc );

     // If the encoding is set by a META tag, it may only overwrite the
     // current encoding if both, the current and the new encoding, are 1-sal_uInt8
     // encodings. Everything else cannot lead to reasonable results.
     if (RTL_TEXTENCODING_DONTKNOW != eEnc &&
         rtl_isOctetTextEncoding( eEnc ) &&
         rtl_isOctetTextEncoding( GetSrcEncoding() ) )
     {
         eEnc = GetExtendedCompatibilityTextEncoding( eEnc ); // #89973#
         SetSrcEncoding( eEnc );
     }

     return bRet;
 }

 rtl_TextEncoding HTMLParser::GetEncodingByMIME( const String& rMime )
 {
     ByteString sType;
     ByteString sSubType;
     INetContentTypeParameterList aParameters;
     ByteString sMime( rMime, RTL_TEXTENCODING_ASCII_US );
     if (INetContentTypes::parse(sMime, sType, sSubType, &aParameters))
     {
         const INetContentTypeParameter * pCharset
             = aParameters.find("charset");
         if (pCharset != 0)
         {
             ByteString sValue( pCharset->m_sValue, RTL_TEXTENCODING_ASCII_US );
             return GetExtendedCompatibilityTextEncoding(
                     rtl_getTextEncodingFromMimeCharset( sValue.GetBuffer() ) );
         }
     }
     return RTL_TEXTENCODING_DONTKNOW;
 }

 rtl_TextEncoding HTMLParser::GetEncodingByHttpHeader( SvKeyValueIterator *pHTTPHeader )
 {
     rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
     if( pHTTPHeader )
 	{
         SvKeyValue aKV;
 		for( sal_Bool bCont = pHTTPHeader->GetFirst( aKV ); bCont;
 			 bCont = pHTTPHeader->GetNext( aKV ) )
 		{
 			if( aKV.GetKey().EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_META_content_type ) )
 			{
 				if( aKV.GetValue().Len() )
 				{
                     eRet = HTMLParser::GetEncodingByMIME( aKV.GetValue() );
                 }
 			}
 		}
 	}
     return eRet;
 }

 sal_Bool HTMLParser::SetEncodingByHTTPHeader(
 								SvKeyValueIterator *pHTTPHeader )
 {
 	sal_Bool bRet = sal_False;
     rtl_TextEncoding eEnc = HTMLParser::GetEncodingByHttpHeader( pHTTPHeader );
     if(RTL_TEXTENCODING_DONTKNOW != eEnc)
 	{
         SetSrcEncoding( eEnc );
         bRet = sal_True;
     }
 	return bRet;
 }