blob: 39e73eac96dd0dd356a5200b2eef4436c40681fd [file] [log] [blame]
/**************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_basic.hxx"
#include "sbcomp.hxx"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#if defined UNX
#include <stdlib.h>
#else
#include <math.h> // atof()
#endif
#include <rtl/math.hxx>
#include <vcl/svapp.hxx>
#include <unotools/charclass.hxx>
#include <runtime.hxx>
SbiScanner::SbiScanner( const ::rtl::OUString& rBuf, StarBASIC* p ) : aBuf( rBuf )
{
pBasic = p;
pLine = NULL;
nVal = 0;
eScanType = SbxVARIANT;
nErrors = 0;
nBufPos = 0;
nCurCol1 = 0;
nSavedCol1 = 0;
nColLock = 0;
nLine = 0;
nCol1 = 0;
nCol2 = 0;
nCol = 0;
bError =
bAbort =
bSpaces =
bNumber =
bSymbol =
bUsedForHilite =
bCompatible =
bVBASupportOn =
bPrevLineExtentsComment = sal_False;
bHash =
bErrors = sal_True;
}
SbiScanner::~SbiScanner()
{}
void SbiScanner::LockColumn()
{
if( !nColLock++ )
nSavedCol1 = nCol1;
}
void SbiScanner::UnlockColumn()
{
if( nColLock )
nColLock--;
}
void SbiScanner::GenError( SbError code )
{
if( GetSbData()->bBlockCompilerError )
{
bAbort = sal_True;
return;
}
if( !bError && bErrors )
{
sal_Bool bRes = sal_True;
// Nur einen Fehler pro Statement reporten
bError = sal_True;
if( pBasic )
{
// Falls EXPECTED oder UNEXPECTED kommen sollte, bezieht es sich
// immer auf das letzte Token, also die Col1 uebernehmen
sal_uInt16 nc = nColLock ? nSavedCol1 : nCol1;
switch( code )
{
case SbERR_EXPECTED:
case SbERR_UNEXPECTED:
case SbERR_SYMBOL_EXPECTED:
case SbERR_LABEL_EXPECTED:
nc = nCol1;
if( nc > nCol2 ) nCol2 = nc;
break;
}
bRes = pBasic->CError( code, aError, nLine, nc, nCol2 );
}
bAbort |= !bRes |
( code == SbERR_NO_MEMORY || code == SbERR_PROG_TOO_LARGE );
}
if( bErrors )
nErrors++;
}
// Falls sofort ein Doppelpunkt folgt, wird sal_True zurueckgeliefert.
// Wird von SbiTokenizer::MayBeLabel() verwendet, um einen Label zu erkennen
sal_Bool SbiScanner::DoesColonFollow()
{
if( pLine && *pLine == ':' )
{
pLine++; nCol++; return sal_True;
}
else return sal_False;
}
// Testen auf ein legales Suffix
static SbxDataType GetSuffixType( sal_Unicode c )
{
static String aSuffixesStr = String::CreateFromAscii( "%&!#@ $" );
if( c )
{
sal_uInt32 n = aSuffixesStr.Search( c );
if( STRING_NOTFOUND != n && c != ' ' )
return SbxDataType( (sal_uInt16) n + SbxINTEGER );
}
return SbxVARIANT;
}
// Einlesen des naechsten Symbols in die Variablen aSym, nVal und eType
// Returnwert ist sal_False bei EOF oder Fehlern
#define BUF_SIZE 80
namespace {
/** Returns true, if the passed character is a white space character. */
inline bool lclIsWhitespace( sal_Unicode cChar )
{
return (cChar == ' ') || (cChar == '\t') || (cChar == '\f');
}
} // namespace
sal_Bool SbiScanner::NextSym()
{
// Fuer den EOLN-Fall merken
sal_uInt16 nOldLine = nLine;
sal_uInt16 nOldCol1 = nCol1;
sal_uInt16 nOldCol2 = nCol2;
sal_Unicode buf[ BUF_SIZE ], *p = buf;
bHash = sal_False;
eScanType = SbxVARIANT;
aSym.Erase();
bSymbol =
bNumber = bSpaces = sal_False;
// Zeile einlesen?
if( !pLine )
{
sal_Int32 n = nBufPos;
sal_Int32 nLen = aBuf.getLength();
if( nBufPos >= nLen )
return sal_False;
const sal_Unicode* p2 = aBuf.getStr();
p2 += n;
while( ( n < nLen ) && ( *p2 != '\n' ) && ( *p2 != '\r' ) )
p2++, n++;
// #163944# ignore trailing whitespace
sal_Int32 nCopyEndPos = n;
while( (nBufPos < nCopyEndPos) && lclIsWhitespace( aBuf[ nCopyEndPos - 1 ] ) )
--nCopyEndPos;
aLine = aBuf.copy( nBufPos, nCopyEndPos - nBufPos );
if( n < nLen )
{
if( *p2 == '\r' && *( p2+1 ) == '\n' )
n += 2;
else
n++;
}
nBufPos = n;
pLine = aLine.getStr();
nOldLine = ++nLine;
nCol = nCol1 = nCol2 = nOldCol1 = nOldCol2 = 0;
nColLock = 0;
}
// Leerstellen weg:
while( lclIsWhitespace( *pLine ) )
pLine++, nCol++, bSpaces = sal_True;
nCol1 = nCol;
// nur Leerzeile?
if( !*pLine )
goto eoln;
if( bPrevLineExtentsComment )
goto PrevLineCommentLbl;
if( *pLine == '#' )
{
pLine++;
nCol++;
bHash = sal_True;
}
// Symbol? Dann Zeichen kopieren.
if( BasicSimpleCharClass::isAlpha( *pLine, bCompatible ) || *pLine == '_' )
{
// Wenn nach '_' nichts kommt, ist es ein Zeilenabschluss!
if( *pLine == '_' && !*(pLine+1) )
{ pLine++;
goto eoln; }
bSymbol = sal_True;
short n = nCol;
for ( ; (BasicSimpleCharClass::isAlphaNumeric( *pLine, bCompatible ) || ( *pLine == '_' ) ); pLine++ )
nCol++;
aSym = aLine.copy( n, nCol - n );
// Special handling for "go to"
if( bCompatible && *pLine && aSym.EqualsIgnoreCaseAscii( "go" ) )
{
const sal_Unicode* pTestLine = pLine;
short nTestCol = nCol;
while( lclIsWhitespace( *pTestLine ) )
{
pTestLine++;
nTestCol++;
}
if( *pTestLine && *(pTestLine + 1) )
{
String aTestSym = aLine.copy( nTestCol, 2 );
if( aTestSym.EqualsIgnoreCaseAscii( "to" ) )
{
aSym = String::CreateFromAscii( "goto" );
pLine = pTestLine + 2;
nCol = nTestCol + 2;
}
}
}
// Abschliessendes '_' durch Space ersetzen, wenn Zeilenende folgt
// (sonst falsche Zeilenfortsetzung)
if( !bUsedForHilite && !*pLine && *(pLine-1) == '_' )
{
aSym.GetBufferAccess(); // #109693 force copy if necessary
*((sal_Unicode*)(pLine-1)) = ' '; // cast wegen const
}
// Typkennung?
// Das Ausrufezeichen bitte nicht testen, wenn
// danach noch ein Symbol anschliesst
else if( *pLine != '!' || !BasicSimpleCharClass::isAlpha( pLine[ 1 ], bCompatible ) )
{
SbxDataType t = GetSuffixType( *pLine );
if( t != SbxVARIANT )
{
eScanType = t;
pLine++;
nCol++;
}
}
}
// Zahl? Dann einlesen und konvertieren.
else if( BasicSimpleCharClass::isDigit( *pLine & 0xFF )
|| ( *pLine == '.' && BasicSimpleCharClass::isDigit( *(pLine+1) & 0xFF ) ) )
{
short exp = 0;
short comma = 0;
short ndig = 0;
short ncdig = 0;
eScanType = SbxDOUBLE;
sal_Bool bBufOverflow = sal_False;
while( strchr( "0123456789.DEde", *pLine ) && *pLine )
{
// AB 4.1.1996: Buffer voll? -> leer weiter scannen
if( (p-buf) == (BUF_SIZE-1) )
{
bBufOverflow = sal_True;
pLine++, nCol++;
continue;
}
// Komma oder Exponent?
if( *pLine == '.' )
{
if( ++comma > 1 )
{
pLine++; nCol++; continue;
}
else *p++ = *pLine++, nCol++;
}
else if( strchr( "DdEe", *pLine ) )
{
if (++exp > 1)
{
pLine++; nCol++; continue;
}
// if( toupper( *pLine ) == 'D' )
// eScanType = SbxDOUBLE;
*p++ = 'E'; pLine++; nCol++;
// Vorzeichen hinter Exponent?
if( *pLine == '+' )
pLine++, nCol++;
else
if( *pLine == '-' )
*p++ = *pLine++, nCol++;
}
else
{
*p++ = *pLine++, nCol++;
if( comma && !exp ) ncdig++;
}
if (!exp) ndig++;
}
*p = 0;
aSym = p; bNumber = sal_True;
// Komma, Exponent mehrfach vorhanden?
if( comma > 1 || exp > 1 )
{ aError = '.';
GenError( SbERR_BAD_CHAR_IN_NUMBER ); }
// #57844 Lokalisierte Funktion benutzen
nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', NULL, NULL );
// ALT: nVal = atof( buf );
ndig = ndig - comma;
if( !comma && !exp )
{
if( nVal >= SbxMININT && nVal <= SbxMAXINT )
eScanType = SbxINTEGER;
else
if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG )
eScanType = SbxLONG;
}
if( bBufOverflow )
GenError( SbERR_MATH_OVERFLOW );
// zu viele Zahlen fuer SINGLE?
// if (ndig > 15 || ncdig > 6)
// eScanType = SbxDOUBLE;
// else
// if( nVal > SbxMAXSNG || nVal < SbxMINSNG )
// eScanType = SbxDOUBLE;
// Typkennung?
SbxDataType t = GetSuffixType( *pLine );
if( t != SbxVARIANT )
{
eScanType = t;
pLine++;
nCol++;
}
}
// Hex/Oktalzahl? Einlesen und konvertieren:
else if( *pLine == '&' )
{
pLine++; nCol++;
sal_Unicode cmp1[] = { '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F', 0 };
sal_Unicode cmp2[] = { '0', '1', '2', '3', '4', '5', '6', '7', 0 };
sal_Unicode *cmp = cmp1;
//char *cmp = "0123456789ABCDEF";
sal_Unicode base = 16;
sal_Unicode ndig = 8;
sal_Unicode xch = *pLine++ & 0xFF; nCol++;
switch( toupper( xch ) )
{
case 'O':
cmp = cmp2; base = 8; ndig = 11; break;
//cmp = "01234567"; base = 8; ndig = 11; break;
case 'H':
break;
default :
// Wird als Operator angesehen
pLine--; nCol--; nCol1 = nCol-1; aSym = '&'; return SYMBOL;
}
bNumber = sal_True;
long l = 0;
int i;
sal_Bool bBufOverflow = sal_False;
while( BasicSimpleCharClass::isAlphaNumeric( *pLine & 0xFF, bCompatible ) )
{
sal_Unicode ch = sal::static_int_cast< sal_Unicode >(
toupper( *pLine & 0xFF ) );
pLine++; nCol++;
// AB 4.1.1996: Buffer voll, leer weiter scannen
if( (p-buf) == (BUF_SIZE-1) )
bBufOverflow = sal_True;
else if( String( cmp ).Search( ch ) != STRING_NOTFOUND )
//else if( strchr( cmp, ch ) )
*p++ = ch;
else
{
aError = ch;
GenError( SbERR_BAD_CHAR_IN_NUMBER );
}
}
*p = 0;
for( p = buf; *p; p++ )
{
i = (*p & 0xFF) - '0';
if( i > 9 ) i -= 7;
l = ( l * base ) + i;
if( !ndig-- )
{
GenError( SbERR_MATH_OVERFLOW ); break;
}
}
if( *pLine == '&' ) pLine++, nCol++;
nVal = (double) l;
eScanType = ( l >= SbxMININT && l <= SbxMAXINT ) ? SbxINTEGER : SbxLONG;
if( bBufOverflow )
GenError( SbERR_MATH_OVERFLOW );
}
// Strings:
else if( *pLine == '"' || *pLine == '[' )
{
sal_Unicode cSep = *pLine;
if( cSep == '[' )
bSymbol = sal_True, cSep = ']';
short n = nCol+1;
while( *pLine )
{
do pLine++, nCol++;
while( *pLine && ( *pLine != cSep ) );
if( *pLine == cSep )
{
pLine++; nCol++;
if( *pLine != cSep || cSep == ']' ) break;
} else aError = cSep, GenError( SbERR_EXPECTED );
}
// If VBA Interop then doen't eat the [] chars
if ( cSep == ']' && bVBASupportOn )
aSym = aLine.copy( n - 1, nCol - n + 1);
else
aSym = aLine.copy( n, nCol - n - 1 );
// Doppelte Stringbegrenzer raus
String s( cSep );
s += cSep;
sal_uInt16 nIdx = 0;
do
{
nIdx = aSym.Search( s, nIdx );
if( nIdx == STRING_NOTFOUND )
break;
aSym.Erase( nIdx, 1 );
nIdx++;
}
while( true );
if( cSep != ']' )
eScanType = ( cSep == '#' ) ? SbxDATE : SbxSTRING;
}
// ungueltige Zeichen:
else if( ( *pLine & 0xFF ) >= 0x7F )
{
GenError( SbERR_SYNTAX ); pLine++; nCol++;
}
// andere Gruppen:
else
{
short n = 1;
switch( *pLine++ )
{
case '<': if( *pLine == '>' || *pLine == '=' ) n = 2; break;
case '>': if( *pLine == '=' ) n = 2; break;
case ':': if( *pLine == '=' ) n = 2; break;
}
aSym = aLine.copy( nCol, n );
pLine += n-1; nCol = nCol + n;
}
nCol2 = nCol-1;
PrevLineCommentLbl:
// Kommentar?
if( bPrevLineExtentsComment || (eScanType != SbxSTRING &&
( aSym.GetBuffer()[0] == '\'' || aSym.EqualsIgnoreCaseAscii( "REM" ) ) ) )
{
bPrevLineExtentsComment = sal_False;
aSym = String::CreateFromAscii( "REM" );
sal_uInt16 nLen = String( pLine ).Len();
if( bCompatible && pLine[ nLen - 1 ] == '_' && pLine[ nLen - 2 ] == ' ' )
bPrevLineExtentsComment = sal_True;
nCol2 = nCol2 + nLen;
pLine = NULL;
}
return sal_True;
// Sonst Zeilen-Ende: aber bitte auf '_' testen, ob die
// Zeile nicht weitergeht!
eoln:
if( nCol && *--pLine == '_' )
{
pLine = NULL;
bool bRes = NextSym();
if( bVBASupportOn && aSym.GetBuffer()[0] == '.' )
{
// object _
// .Method
// ^^^ <- spaces is legal in MSO VBA
OSL_TRACE("*** resetting bSpaces***");
bSpaces = sal_False;
}
return bRes;
}
else
{
pLine = NULL;
nLine = nOldLine;
nCol1 = nOldCol1;
nCol2 = nOldCol2;
aSym = '\n';
nColLock = 0;
return sal_True;
}
}
LetterTable BasicSimpleCharClass::aLetterTable;
LetterTable::LetterTable( void )
{
for( int i = 0 ; i < 256 ; ++i )
IsLetterTab[i] = false;
IsLetterTab[0xC0] = true; // À , CAPITAL LETTER A WITH GRAVE ACCENT
IsLetterTab[0xC1] = true; // Á , CAPITAL LETTER A WITH ACUTE ACCENT
IsLetterTab[0xC2] = true; // Â , CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
IsLetterTab[0xC3] = true; // Ã , CAPITAL LETTER A WITH TILDE
IsLetterTab[0xC4] = true; // Ä , CAPITAL LETTER A WITH DIAERESIS
IsLetterTab[0xC5] = true; // Å , CAPITAL LETTER A WITH RING ABOVE
IsLetterTab[0xC6] = true; // Æ , CAPITAL LIGATURE AE
IsLetterTab[0xC7] = true; // Ç , CAPITAL LETTER C WITH CEDILLA
IsLetterTab[0xC8] = true; // È , CAPITAL LETTER E WITH GRAVE ACCENT
IsLetterTab[0xC9] = true; // É , CAPITAL LETTER E WITH ACUTE ACCENT
IsLetterTab[0xCA] = true; // Ê , CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
IsLetterTab[0xCB] = true; // Ë , CAPITAL LETTER E WITH DIAERESIS
IsLetterTab[0xCC] = true; // Ì , CAPITAL LETTER I WITH GRAVE ACCENT
IsLetterTab[0xCD] = true; // Í , CAPITAL LETTER I WITH ACUTE ACCENT
IsLetterTab[0xCE] = true; // Î , CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
IsLetterTab[0xCF] = true; // Ï , CAPITAL LETTER I WITH DIAERESIS
IsLetterTab[0xD0] = true; // Ð , CAPITAL LETTER ETH
IsLetterTab[0xD1] = true; // Ñ , CAPITAL LETTER N WITH TILDE
IsLetterTab[0xD2] = true; // Ò , CAPITAL LETTER O WITH GRAVE ACCENT
IsLetterTab[0xD3] = true; // Ó , CAPITAL LETTER O WITH ACUTE ACCENT
IsLetterTab[0xD4] = true; // Ô , CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
IsLetterTab[0xD5] = true; // Õ , CAPITAL LETTER O WITH TILDE
IsLetterTab[0xD6] = true; // Ö , CAPITAL LETTER O WITH DIAERESIS
IsLetterTab[0xD8] = true; // Ø , CAPITAL LETTER O WITH STROKE
IsLetterTab[0xD9] = true; // Ù , CAPITAL LETTER U WITH GRAVE ACCENT
IsLetterTab[0xDA] = true; // Ú , CAPITAL LETTER U WITH ACUTE ACCENT
IsLetterTab[0xDB] = true; // Û , CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
IsLetterTab[0xDC] = true; // Ü , CAPITAL LETTER U WITH DIAERESIS
IsLetterTab[0xDD] = true; // Ý , CAPITAL LETTER Y WITH ACUTE ACCENT
IsLetterTab[0xDE] = true; // Þ , CAPITAL LETTER THORN
IsLetterTab[0xDF] = true; // ß , SMALL LETTER SHARP S
IsLetterTab[0xE0] = true; // à , SMALL LETTER A WITH GRAVE ACCENT
IsLetterTab[0xE1] = true; // á , SMALL LETTER A WITH ACUTE ACCENT
IsLetterTab[0xE2] = true; // â , SMALL LETTER A WITH CIRCUMFLEX ACCENT
IsLetterTab[0xE3] = true; // ã , SMALL LETTER A WITH TILDE
IsLetterTab[0xE4] = true; // ä , SMALL LETTER A WITH DIAERESIS
IsLetterTab[0xE5] = true; // å , SMALL LETTER A WITH RING ABOVE
IsLetterTab[0xE6] = true; // æ , SMALL LIGATURE AE
IsLetterTab[0xE7] = true; // ç , SMALL LETTER C WITH CEDILLA
IsLetterTab[0xE8] = true; // è , SMALL LETTER E WITH GRAVE ACCENT
IsLetterTab[0xE9] = true; // é , SMALL LETTER E WITH ACUTE ACCENT
IsLetterTab[0xEA] = true; // ê , SMALL LETTER E WITH CIRCUMFLEX ACCENT
IsLetterTab[0xEB] = true; // ë , SMALL LETTER E WITH DIAERESIS
IsLetterTab[0xEC] = true; // ì , SMALL LETTER I WITH GRAVE ACCENT
IsLetterTab[0xED] = true; // í , SMALL LETTER I WITH ACUTE ACCENT
IsLetterTab[0xEE] = true; // î , SMALL LETTER I WITH CIRCUMFLEX ACCENT
IsLetterTab[0xEF] = true; // ï , SMALL LETTER I WITH DIAERESIS
IsLetterTab[0xF0] = true; // ð , SMALL LETTER ETH
IsLetterTab[0xF1] = true; // ñ , SMALL LETTER N WITH TILDE
IsLetterTab[0xF2] = true; // ò , SMALL LETTER O WITH GRAVE ACCENT
IsLetterTab[0xF3] = true; // ó , SMALL LETTER O WITH ACUTE ACCENT
IsLetterTab[0xF4] = true; // ô , SMALL LETTER O WITH CIRCUMFLEX ACCENT
IsLetterTab[0xF5] = true; // õ , SMALL LETTER O WITH TILDE
IsLetterTab[0xF6] = true; // ö , SMALL LETTER O WITH DIAERESIS
IsLetterTab[0xF8] = true; // ø , SMALL LETTER O WITH OBLIQUE BAR
IsLetterTab[0xF9] = true; // ù , SMALL LETTER U WITH GRAVE ACCENT
IsLetterTab[0xFA] = true; // ú , SMALL LETTER U WITH ACUTE ACCENT
IsLetterTab[0xFB] = true; // û , SMALL LETTER U WITH CIRCUMFLEX ACCENT
IsLetterTab[0xFC] = true; // ü , SMALL LETTER U WITH DIAERESIS
IsLetterTab[0xFD] = true; // ý , SMALL LETTER Y WITH ACUTE ACCENT
IsLetterTab[0xFE] = true; // þ , SMALL LETTER THORN
IsLetterTab[0xFF] = true; // ÿ , SMALL LETTER Y WITH DIAERESIS
}
bool LetterTable::isLetterUnicode( sal_Unicode c )
{
static CharClass* pCharClass = NULL;
if( pCharClass == NULL )
pCharClass = new CharClass( Application::GetSettings().GetLocale() );
String aStr( c );
bool bRet = pCharClass->isLetter( aStr, 0 );
return bRet;
}