| /+ |
| Copyright (c) 1999-2006 by Digital Mars |
| All Rights Reserved |
| written by Walter Bright www.digitalmars.com |
| License for redistribution is by either the Artistic License in artistic.txt, or the GNU General Public License in gnu.txt. |
| See the included readme.txt for details. |
| D Language conversion by: J Duncan |
| +/ |
| |
| /** |
| d language lexer |
| */ |
| |
| module dparser.Lexer; |
| |
| import dparser.Root; |
| |
| import dparser.Tokens; |
| import dparser.Token; |
| import dparser.Keyword; |
| |
| import dparser.Types; |
| |
| import dparser.Module; |
| import dparser.Identifier; |
| import dparser.unialpha; |
| |
| import dparser.OutBuffer; |
| |
| //private import std.ctype; |
| //private import std.string; |
| //import dwf.core.debugapi; |
| |
| int errno = 0; |
| |
| //#if _WIN32 && __DMC__ |
| // from \dm\src\include\setlocal.h |
| //extern "C" char * __cdecl __locale_decpoint; |
| char *__locale_decpoint; |
| //#endif |
| //const uint LS = 0x2028; // UTF line separator |
| //const uint PS = 0x2029; // UTF paragraph separator |
| |
| //extern int isUniAlpha(unsigned u); |
| //extern int HtmlNamedEntity(unsigned char *p, int length); |
| |
| /** |
| Lexer object |
| */ |
| |
| class Lexer |
| { |
| static Identifier[char[]] stringtable; |
| static OutBuffer stringbuffer; |
| static Token *freelist; |
| |
| Token token; // current token |
| Module mod; // current module |
| Loc loc; // for error messages |
| ubyte *base; // pointer to start of buffer |
| ubyte *end; // past end of buffer |
| ubyte *p; // current character |
| int doDocComment; // collect doc comment information |
| int anyToken; // !=0 means seen at least one token |
| int commentToken; // !=0 means comments are TOKcomment's |
| |
| |
| this(Module mod, ubyte *base, uint begoffset, uint endoffset, int doDocComment, int commentToken) |
| { |
| if (stringbuffer is null) |
| stringbuffer = new OutBuffer; |
| loc = Loc(mod, 1); |
| |
| this.base = base; |
| this.end = base + endoffset; |
| this.p = base + begoffset; |
| this.mod = mod; |
| this.doDocComment = doDocComment; |
| this.commentToken = commentToken; |
| |
| /* |
| If first line starts with '#!', ignore the line |
| */ |
| |
| if (p[0] == '#' && p[1] == '!') |
| { |
| p += 2; |
| while (true) |
| { |
| ubyte c = *p; |
| switch (c) |
| { |
| case '\n': |
| p++; |
| break; |
| |
| case '\r': |
| p++; |
| if (*p == '\n') |
| p++; |
| break; |
| |
| case 0: |
| case 0x1A: |
| break; |
| |
| default: |
| if (c & 0x80) |
| { |
| uint u = decodeUTF(); |
| if (u == PS || u == LS) |
| break; |
| } |
| p++; |
| continue; |
| } |
| break; |
| } |
| |
| loc.linnum = 2; |
| } |
| } |
| |
| |
| |
| // generate a unique identifier for this string |
| static Identifier idPool(in char[] str) |
| { |
| // StringValue sv; |
| // uint len = s.length; |
| // StringValue sv = stringtable.update(s, len); |
| // Identifier* id = cast(Identifier*) sv.ptrvalue; |
| // if( id is null ) |
| if ((str in stringtable) == null) |
| { |
| stringtable[str] = new Identifier(str, TOK.TOKidentifier); |
| } |
| return stringtable[str]; |
| } |
| |
| static void initKeywords() |
| { |
| // build character map |
| cmtable_init(); |
| |
| // create keyword tokens & identifiers |
| dparser.Keyword.initKeywords(); |
| |
| // create standard lexer tokens |
| dparser.Token.createLexerTokens(); |
| } |
| |
| // Combine two document comments into one. |
| static char[] combineComments(char[] c1, char[] c2) |
| { |
| char[] c = c2; |
| if (c1.length) |
| { |
| c = c1; |
| if (c2.length) |
| { |
| c = c1 ~ "\n" ~ c2; |
| } |
| } |
| return c; |
| } |
| |
| // Decode UTF character. Issue error messages for invalid sequences. Return decoded character, advance p to last character in UTF sequence. |
| //! fix |
| uint decodeUTF() |
| { |
| ubyte *s = p; |
| ubyte c = *s; |
| |
| assert(c & 0x80); |
| if (!(c & 0x80)) |
| return c; |
| |
| return cast(uint)'X'; |
| /* |
| dchar u; |
| uint len; |
| |
| |
| |
| // Check length of remaining string up to 6 UTF-8 characters |
| for( len = 1; len < 6 && s[len]; len++ ) |
| { |
| |
| } |
| /+ |
| uint idx = 0; |
| char* msg = utf_decodeChar( s, len, &idx, &u ); |
| p += idx - 1; |
| if( msg ) |
| { |
| error(msg); |
| } |
| +/ |
| return u; |
| */ |
| } |
| |
| void error(...) |
| { |
| if ((mod !is null) && !global.gag) |
| { |
| writefln(formatLoc(loc, _arguments, _argptr)); |
| /* |
| char[] p = loc.toChars(); |
| if( p.length ) |
| writef( "%s: ", p ); |
| writefx( stdout, _arguments, _argptr, 1 ); |
| */ |
| if (global.errors >= global.max_errors) // moderate blizzard of cascading messages |
| throw new Exception("too many errors"); |
| } |
| |
| global.errors++; |
| } |
| |
| void errorLoc(Loc loc, ...) |
| { |
| if ((mod !is null) && !global.gag) |
| { |
| writefln(formatLoc(loc, _arguments, _argptr)); |
| /* |
| char[] p = loc.toChars(); |
| if( p.length ) |
| writef("%s: ", p); |
| writefx(stdout, _arguments, _argptr, 1); |
| */ |
| if (global.errors >= 20) // moderate blizzard of cascading messages |
| throw new Exception("too many errors"); |
| } |
| |
| global.errors++; |
| } |
| |
| |
| TOK nextToken() |
| { |
| if (token.next) |
| { |
| Token *t = token.next; |
| memcpy(&token, t, Token.sizeof); |
| // t.next = freelist; |
| // freelist = t; |
| } |
| else |
| { |
| scan(&token); |
| } |
| // token.print(); |
| return token.value; |
| } |
| |
| Token *peek(inout Token ct) |
| { |
| Token *t; |
| |
| if (ct.next) |
| t = ct.next; |
| else |
| { |
| t = new Token; |
| scan(t); |
| t.next = null; |
| ct.next = t; |
| } |
| return t; |
| } |
| |
| // Turn next token in buffer into a token. |
| |
| void scan(Token *t) |
| { |
| // debug writefln("scan token"); |
| uint lastLine = loc.linnum; |
| uint linnum; |
| |
| t.blockComment = null; |
| t.lineComment = null; |
| while (true) |
| { |
| t.ptr = p; |
| // debug writefln( " p = %d, *p = ", cast(uint)p, cast(char)*p ); |
| switch (*p) |
| { |
| case 0: |
| case 0x1a: |
| t.value = TOK.TOKeof; // end of file |
| // debug writefln( " EOF" ); |
| return; |
| |
| case ' ': |
| case '\t': |
| case '\v': |
| case '\f': |
| p++; |
| // debug writefln( " whitespace" ); |
| continue; // skip white space |
| |
| case '\r': |
| // debug writefln( " cr" ); |
| p++; |
| if (*p != '\n') // if CR stands by itself |
| loc.linnum++; |
| continue; // skip white space |
| |
| case '\n': |
| // debug writefln( " nl" ); |
| p++; |
| loc.linnum++; |
| continue; // skip white space |
| |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| t.value = number(t); |
| return; |
| |
| /* |
| #if CSTRINGS |
| case '\'': |
| t.value = charConstant(t, 0); |
| return; |
| |
| case '"': |
| t.value = stringConstant(t,0); |
| return; |
| |
| case 'l': |
| case 'L': |
| if( p[1] == '\'') |
| { |
| p++; |
| t.value = charConstant(t, 1); |
| return; |
| } |
| else if( p[1] == '"') |
| { |
| p++; |
| t.value = stringConstant(t, 1); |
| return; |
| } |
| #else |
| */ |
| case '\'': |
| // debug writefln( " char" ); |
| t.value = charConstant(t, 0); |
| return; |
| |
| case 'r': |
| // debug writefln( " wysiwyg" ); |
| if (p[1] != '"') |
| goto case_ident; |
| p++; |
| |
| case '`': |
| t.value = wysiwygStringConstant(t, *p); |
| return; |
| |
| case 'x': |
| // debug writefln( " hex string" ); |
| if (p[1] != '"') |
| goto case_ident; |
| p++; |
| t.value = hexStringConstant(t); |
| return; |
| |
| |
| case '"': |
| // debug writefln( " string" ); |
| t.value = escapeStringConstant(t, 0); |
| // debug writefln( t.ustring ); |
| return; |
| |
| case '\\': // escaped string literal |
| // debug writefln( " escaped string literal" ); |
| uint c; |
| stringbuffer.offset = 0; |
| do |
| { |
| p++; |
| c = escapeSequence(); |
| stringbuffer.write(c); |
| } while (*p == '\\'); |
| // t.len = stringbuffer.offset; |
| // stringbuffer.write(cast(byte)0); |
| t.ustring = stringbuffer.toString; |
| // memcpy( t.ustring.ptr, stringbuffer.data, stringbuffer.offset ); |
| t.postfix = 0; |
| t.value = TOK.TOKstring; |
| return; |
| |
| case 'l': |
| case 'L': |
| // #endif |
| |
| case 'a': |
| case 'b': |
| case 'c': |
| case 'd': |
| case 'e': |
| case 'f': |
| case 'g': |
| case 'h': |
| case 'i': |
| case 'j': |
| case 'k': |
| case 'm': |
| case 'n': |
| case 'o': |
| case 'p': |
| case 'q': /*case 'r':*/ |
| case 's': |
| case 't': |
| case 'u': |
| case 'v': |
| case 'w': /*case 'x':*/ |
| case 'y': |
| case 'z': |
| case 'A': |
| case 'B': |
| case 'C': |
| case 'D': |
| case 'E': |
| case 'F': |
| case 'G': |
| case 'H': |
| case 'I': |
| case 'J': |
| case 'K': |
| case 'M': |
| case 'N': |
| case 'O': |
| case 'P': |
| case 'Q': |
| case 'R': |
| case 'S': |
| case 'T': |
| case 'U': |
| case 'V': |
| case 'W': |
| case 'X': |
| case 'Y': |
| case 'Z': |
| case '_': |
| case_ident: |
| { |
| // debug writefln( " identifier" ); |
| ubyte c; |
| do |
| { |
| c = *++p; |
| } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); |
| |
| // sv = stringtable.update((char *)t.ptr, p - t.ptr); |
| char[] tmp; |
| tmp.length = p - t.ptr; |
| memcpy(tmp.ptr, t.ptr, p - t.ptr); |
| Identifier id; |
| Identifier *pid = tmp in stringtable; |
| if (pid) |
| { |
| id = *pid; |
| } |
| |
| if (id is null) |
| { |
| id = new Identifier(tmp, TOK.TOKidentifier); |
| stringtable[tmp] = id; |
| } |
| |
| t.ident = id; |
| t.value = cast(TOK)id.value; |
| anyToken = 1; |
| |
| // if special identifier token |
| if (*t.ptr == '_') |
| { |
| static char date[11 + 1]; |
| static char time[8 + 1]; |
| static char timestamp[24 + 1]; |
| |
| if (!date[0]) // lazy evaluation |
| { |
| //!! |
| /+ |
| time_t t; |
| char *p; |
| .time(&t); |
| p = ctime(&t); |
| assert(p); |
| sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20); |
| sprintf(time.ptr, "%.8s", p + 11); |
| sprintf(timestamp.ptr, "%.24s", p); |
| +/ |
| } |
| |
| if (mod && id is Id.FILE) |
| { |
| t.value = TOK.TOKstring; |
| if (loc.filename.length) |
| t.ustring = loc.filename; |
| else |
| t.ustring = mod.ident.toChars(); |
| goto Llen; |
| } |
| else if (mod && id == Id.LINE) |
| { |
| t.value = TOK.TOKint64v; |
| t.uns64value = loc.linnum; |
| } |
| else if (id == Id.DATE) |
| { |
| t.value = TOK.TOKstring; |
| //! t.ustring = date; |
| goto Llen; |
| } |
| else if (id == Id.TIME) |
| { |
| t.value = TOK.TOKstring; |
| //! t.ustring = time; |
| goto Llen; |
| } |
| else if (id == Id.TIMESTAMP) |
| { |
| t.value = TOK.TOKstring; |
| //! t.ustring = timestamp; |
| Llen: |
| t.postfix = 0; |
| // t.len = strlen((char *)t.ustring); |
| } |
| } |
| //printf("t.value = %d\n",t.value); |
| return; |
| } |
| |
| // comments |
| case '/': |
| p++; |
| switch (*p) |
| { |
| case '=': |
| p++; |
| t.value = TOK.TOKdivass; |
| return; |
| |
| case '*': // '/*' |
| p++; |
| linnum = loc.linnum; |
| while (true) |
| { |
| while (true) |
| { |
| ubyte c = *p; |
| switch (c) |
| { |
| case '/': |
| break; |
| |
| case '\n': |
| loc.linnum++; |
| p++; |
| continue; |
| |
| case '\r': |
| p++; |
| if (*p != '\n') |
| loc.linnum++; |
| continue; |
| |
| case 0: |
| case 0x1A: |
| error("unterminated /* */ comment"); |
| p = end; |
| t.value = TOK.TOKeof; |
| return; |
| |
| default: |
| if (c & 0x80) |
| { |
| uint u = decodeUTF(); |
| if (u == PS || u == LS) |
| loc.linnum++; |
| } |
| p++; |
| continue; |
| } |
| break; |
| } |
| p++; |
| if (p[-2] == '*' && p - 3 != t.ptr) |
| break; |
| } |
| |
| if (commentToken) |
| { |
| t.value = TOK.TOKcomment; |
| return; |
| } |
| // if /** but not /**/ |
| else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) |
| getDocComment(t, lastLine == linnum); //! ? |
| continue; |
| |
| case '/': // do // style comments |
| linnum = loc.linnum; |
| while (1) |
| { |
| ubyte c = *++p; |
| switch (c) |
| { |
| case '\n': |
| break; |
| |
| case '\r': |
| if (p[1] == '\n') |
| p++; |
| break; |
| |
| case 0: |
| case 0x1a: |
| if (commentToken) |
| { |
| p = end; |
| t.value = TOK.TOKcomment; |
| return; |
| } |
| if (doDocComment && t.ptr[2] == '/') |
| getDocComment(t, lastLine == linnum); |
| p = end; |
| t.value = TOK.TOKeof; |
| return; |
| |
| default: |
| if (c & 0x80) |
| { |
| uint u = decodeUTF(); |
| if (u == PS || u == LS) |
| break; |
| } |
| continue; |
| } |
| break; |
| } |
| |
| if (commentToken) |
| { |
| p++; |
| loc.linnum++; |
| t.value = TOK.TOKcomment; |
| return; |
| } |
| if (doDocComment && t.ptr[2] == '/') |
| getDocComment(t, lastLine == linnum); |
| |
| p++; |
| loc.linnum++; |
| continue; |
| |
| case '+': |
| { |
| int nest; |
| linnum = loc.linnum; |
| p++; |
| nest = 1; |
| while (1) |
| { |
| ubyte c = *p; |
| switch (c) |
| { |
| case '/': |
| p++; |
| if (*p == '+') |
| { |
| p++; |
| nest++; |
| } |
| continue; |
| |
| case '+': |
| p++; |
| if (*p == '/') |
| { |
| p++; |
| if (--nest == 0) |
| break; |
| } |
| continue; |
| |
| case '\r': |
| p++; |
| if (*p != '\n') |
| loc.linnum++; |
| continue; |
| |
| case '\n': |
| loc.linnum++; |
| p++; |
| continue; |
| |
| case 0: |
| case 0x1A: |
| error("unterminated /+ +/ comment"); |
| p = end; |
| t.value = TOK.TOKeof; |
| return; |
| |
| default: |
| if (c & 0x80) |
| { |
| uint u = decodeUTF(); |
| if (u == PS || u == LS) |
| loc.linnum++; |
| } |
| p++; |
| continue; |
| } |
| break; |
| } |
| if (commentToken) |
| { |
| t.value = TOK.TOKcomment; |
| return; |
| } |
| if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) |
| { |
| // if /++ but not /++/ |
| getDocComment(t, lastLine == linnum); |
| } |
| continue; |
| } |
| |
| default: |
| break; |
| } |
| t.value = TOK.TOKdiv; |
| return; |
| |
| case '.': |
| p++; |
| if (isdigit(*p)) |
| { |
| p--; |
| t.value = inreal(t); |
| } |
| else if (p[0] == '.') |
| { |
| if (p[1] == '.') |
| { |
| p += 2; |
| t.value = TOK.TOKdotdotdot; |
| } |
| else |
| { |
| p++; |
| t.value = TOK.TOKslice; |
| } |
| } |
| else |
| t.value = TOK.TOKdot; |
| return; |
| |
| case '&': |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKandass; |
| } |
| else if (*p == '&') |
| { |
| p++; |
| t.value = TOK.TOKandand; |
| } |
| else |
| t.value = TOK.TOKand; |
| return; |
| |
| // |, ||, |= |
| case '|': |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKorass; |
| } |
| else if (*p == '|') |
| { |
| p++; |
| t.value = TOK.TOKoror; |
| } |
| else |
| t.value = TOK.TOKor; |
| return; |
| |
| case '-': |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKminass; |
| } |
| else if (*p == '-') |
| { |
| p++; |
| t.value = TOK.TOKminusminus; |
| } |
| else |
| t.value = TOK.TOKmin; |
| return; |
| |
| // +, +=, ++ |
| case '+': |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKaddass; // += |
| } |
| else if (*p == '+') |
| { |
| p++; |
| t.value = TOK.TOKplusplus; // ++ |
| } |
| else |
| t.value = TOK.TOKadd; // + |
| return; |
| |
| // <, <=, <<=, <<, <>=, <> |
| case '<': |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKle; // <= |
| } |
| else if (*p == '<') |
| { |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKshlass; // <<= |
| } |
| else |
| t.value = TOK.TOKshl; // << |
| } |
| else if (*p == '>') |
| { |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKleg; // <>= |
| } |
| else |
| t.value = TOK.TOKlg; // <> |
| } |
| else |
| t.value = TOK.TOKlt; // < |
| return; |
| |
| // >, >>, >>>, >=, >>=, >>>= |
| case '>': |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKge; // >= |
| } |
| else if (*p == '>') |
| { |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKshrass; // >>= |
| } |
| else if (*p == '>') |
| { |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKushrass; // >>>= |
| } |
| else |
| t.value = TOK.TOKushr; // >>> |
| } |
| else |
| t.value = TOK.TOKshr; // >> |
| } |
| else |
| t.value = TOK.TOKgt; // > |
| return; |
| |
| case '!': |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKnotidentity; // !== |
| } |
| else |
| t.value = TOK.TOKnotequal; // != |
| } |
| else if (*p == '<') |
| { |
| p++; |
| if (*p == '>') |
| { |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKunord; // !<>= |
| } |
| else |
| t.value = TOK.TOKue; // !<> |
| } |
| else if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKug; // !<= |
| } |
| else |
| t.value = TOK.TOKuge; // !< |
| } |
| else if (*p == '>') |
| { |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKul; // !>= |
| } |
| else |
| t.value = TOK.TOKule; // !> |
| } |
| else |
| t.value = TOK.TOKnot; // ! |
| return; |
| |
| case '=': |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKidentity; // === |
| } |
| else |
| t.value = TOK.TOKequal; // == |
| } |
| else |
| t.value = TOK.TOKassign; // = |
| return; |
| |
| case '~': |
| p++; |
| if (*p == '=') |
| { |
| p++; |
| t.value = TOK.TOKcatass; // ~= |
| } |
| else |
| t.value = TOK.TOKtilde; // ~ |
| return; |
| |
| // SINGLE |
| case '(': p++; t.value = TOK.TOKlparen; return; |
| |
| case ')': p++; t.value = TOK.TOKrparen; return; |
| |
| case '[': p++; t.value = TOK.TOKlbracket; return; |
| |
| case ']': p++; t.value = TOK.TOKrbracket; return; |
| |
| case '{': p++; t.value = TOK.TOKlcurly; return; |
| |
| case '}': p++; t.value = TOK.TOKrcurly; return; |
| |
| case '?': p++; t.value = TOK.TOKquestion; return; |
| |
| case ',': p++; t.value = TOK.TOKcomma; return; |
| |
| case ';': p++; t.value = TOK.TOKsemicolon; return; |
| |
| case ':': p++; t.value = TOK.TOKcolon; return; |
| |
| case '$': p++; t.value = TOK.TOKdollar; return; |
| |
| // DOUBLE |
| case '*': p++; if (*p == '=') |
| { |
| p++; t.value = TOK.TOKmulass; |
| } |
| else |
| t.value = TOK.TOKmul; |
| return; |
| |
| case '%': p++; if (*p == '=') |
| { |
| p++; t.value = TOK.TOKmodass; |
| } |
| else |
| t.value = TOK.TOKmod; |
| return; |
| |
| case '^': p++; if (*p == '=') |
| { |
| p++; t.value = TOK.TOKxorass; |
| } |
| else |
| t.value = TOK.TOKxor; |
| return; |
| |
| // removed 148 case '~': p++; if( *p == '=' ) { p++; t.value = TOK.TOKcatass; } else t.value = TOK.TOKtilde; return; |
| |
| |
| case '#': |
| p++; |
| Pragma(); |
| continue; |
| |
| default: |
| { |
| debug writefln(" default char"); |
| ubyte c = *p; |
| if (c & 0x80) |
| { |
| uint u = decodeUTF(); |
| // Check for start of unicode identifier |
| if (isUniAlpha(u)) |
| goto case_ident; |
| |
| if (u == PS || u == LS) |
| { |
| loc.linnum++; |
| p++; |
| continue; |
| } |
| } |
| if (isprint(c)) |
| error("unsupported char '%s'", cast(char)c); |
| else |
| error("unsupported char 0x%02x", cast(ubyte)c); |
| p++; |
| continue; |
| } |
| } |
| } |
| } |
| |
| |
| |
| // Parse escape sequence. |
| uint escapeSequence() |
| { |
| uint c; |
| int n; |
| int ndigits; |
| |
| c = *p; |
| switch (c) |
| { |
| case '\'': |
| case '"': |
| case '?': |
| case '\\': |
| Lconsume: |
| p++; |
| break; |
| |
| case 'a': c = 7; goto Lconsume; |
| |
| case 'b': c = 8; goto Lconsume; |
| |
| case 'f': c = 12; goto Lconsume; |
| |
| case 'n': c = 10; goto Lconsume; |
| |
| case 'r': c = 13; goto Lconsume; |
| |
| case 't': c = 9; goto Lconsume; |
| |
| case 'v': c = 11; goto Lconsume; |
| |
| case 'u': |
| ndigits = 4; |
| goto Lhex; |
| |
| case 'U': |
| ndigits = 8; |
| goto Lhex; |
| |
| case 'x': |
| ndigits = 2; |
| Lhex: |
| p++; |
| c = *p; |
| if (ishex(c)) |
| { |
| uint v; |
| n = 0; |
| v = 0; |
| while (1) |
| { |
| if (isdigit(c)) |
| c -= '0'; |
| else if (islower(c)) |
| c -= 'a' - 10; |
| else |
| c -= 'A' - 10; |
| v = v * 16 + c; |
| c = *++p; |
| if (++n == ndigits) |
| break; |
| if (!ishex(c)) |
| { |
| error("escape hex sequence has %d hex digits instead of %d", n, ndigits); |
| break; |
| } |
| } |
| //! if( ndigits != 2 && !utf_isValidDchar(v)) |
| //! error("invalid UTF character \\U%08x", v); |
| c = v; |
| } |
| else |
| error("undefined escape hex sequence \\%s\n", c); |
| break; |
| |
| case '&': // named character entity |
| for (ubyte *idstart = ++p; 1; p++) |
| { |
| switch (*p) |
| { |
| case ';': |
| //!!! |
| /+ |
| c = HtmlNamedEntity(idstart, p - idstart); |
| if( c == ~0 ) |
| { |
| error("unnamed character entity &%.*s;", p - idstart, idstart); |
| c = ' '; |
| } |
| |
| p++; |
| +/ |
| break; |
| |
| default: |
| if (isalpha(*p) || (p != idstart + 1 && isdigit(*p))) |
| continue; |
| error("unterminated named entity"); |
| break; |
| } |
| break; |
| } |
| break; |
| |
| case 0: |
| case 0x1a: // end of file |
| c = '\\'; |
| break; |
| |
| default: |
| if (isoctal(c)) |
| { |
| ubyte v; |
| n = 0; |
| do |
| { |
| v = v * 8 + (c - '0'); |
| c = *++p; |
| } while (++n < 3 && isoctal(c)); |
| c = v; |
| } |
| else |
| error("undefined escape sequence \\%s\n", c); |
| break; |
| } |
| return c; |
| } |
| |
| /************************************** |
| */ |
| |
| TOK wysiwygStringConstant(Token *t, int tc) |
| { |
| uint c; |
| Loc start = loc; |
| |
| p++; |
| stringbuffer.offset = 0; |
| while (1) |
| { |
| c = *p++; |
| switch (c) |
| { |
| case '\n': |
| loc.linnum++; |
| break; |
| |
| case '\r': |
| if (*p == '\n') |
| continue; // ignore |
| c = '\n'; // treat EndOfLine as \n character |
| loc.linnum++; |
| break; |
| |
| case 0: |
| case 0x1a: |
| error("unterminated string constant starting at %s", start.toChars()); |
| t.ustring = ""; |
| t.postfix = 0; |
| return TOK.TOKstring; |
| |
| case '"': |
| case '`': |
| if (c == tc) |
| { |
| // t.len = stringbuffer.offset; |
| stringbuffer.write(cast(byte)0); |
| t.ustring = stringbuffer.toString; |
| // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset); |
| // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset); |
| stringPostfix(t); |
| return TOK.TOKstring; |
| } |
| break; |
| |
| default: |
| if (c & 0x80) |
| { |
| p--; |
| uint u = decodeUTF(); |
| p++; |
| if (u == PS || u == LS) |
| loc.linnum++; |
| stringbuffer.write(u); |
| continue; |
| } |
| break; |
| } |
| stringbuffer.write(c); |
| } |
| } |
| |
| /************************************** |
| * Lex hex strings: |
| * x"0A ae 34FE BD" |
| */ |
| |
| TOK hexStringConstant(Token *t) |
| { |
| uint c; |
| Loc start = loc; |
| uint n = 0; |
| uint v; |
| |
| p++; |
| stringbuffer.offset = 0; |
| while (1) |
| { |
| c = *p++; |
| switch (c) |
| { |
| case ' ': |
| case '\t': |
| case '\v': |
| case '\f': |
| continue; // skip white space |
| |
| case '\r': |
| if (*p == '\n') |
| continue; // ignore |
| |
| // Treat isolated '\r' as if it were a '\n' |
| case '\n': |
| loc.linnum++; |
| continue; |
| |
| case 0: |
| case 0x1a: |
| error("unterminated string constant starting at %s", start.toChars()); |
| t.ustring = ""; |
| t.postfix = 0; |
| return TOK.TOKstring; |
| |
| case '"': |
| if (n & 1) |
| { |
| error("odd number (%d) of hex characters in hex string", n); |
| stringbuffer.write(v); |
| } |
| // t.len = stringbuffer.offset; |
| // stringbuffer.write(cast(byte)0); |
| t.ustring = stringbuffer.toString; |
| // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset); |
| // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset); |
| stringPostfix(t); |
| return TOK.TOKstring; |
| |
| default: |
| if (c >= '0' && c <= '9') |
| c -= '0'; |
| else if (c >= 'a' && c <= 'f') |
| c -= 'a' - 10; |
| else if (c >= 'A' && c <= 'F') |
| c -= 'A' - 10; |
| else if (c & 0x80) |
| { |
| p--; |
| uint u = decodeUTF(); |
| p++; |
| if (u == PS || u == LS) |
| loc.linnum++; |
| else |
| error("non-hex character \\u%x", u); |
| } |
| else |
| error("non-hex character '%s'", c); |
| if (n & 1) |
| { |
| v = (v << 4) | c; |
| stringbuffer.write(v); |
| } |
| else |
| v = c; |
| n++; |
| break; |
| } |
| } |
| } |
| |
| /************************************** |
| */ |
| |
| TOK escapeStringConstant(Token *t, int wide) |
| { |
| uint c; |
| Loc start = loc; |
| |
| p++; |
| stringbuffer.offset = 0; |
| // debug writefln( "escape string constant: %s", std.string.toString( cast(char*)p ) ); |
| while (1) |
| { |
| c = *p++; |
| switch (c) |
| { |
| case '\\': |
| switch (*p) |
| { |
| case 'u': |
| case 'U': |
| case '&': |
| c = escapeSequence(); |
| stringbuffer.write(c); |
| continue; |
| |
| default: |
| c = escapeSequence(); |
| break; |
| } |
| break; |
| |
| case '\n': |
| loc.linnum++; |
| break; |
| |
| case '\r': |
| if (*p == '\n') |
| continue; // ignore |
| c = '\n'; // treat EndOfLine as \n character |
| loc.linnum++; |
| break; |
| |
| case '"': |
| // writefln( "end of string: ", stringbuffer.toString ); |
| t.ustring = stringbuffer.toString().dup; |
| // t.len = stringbuffer.offset; |
| // stringbuffer.write(cast(byte)0); |
| // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset); |
| // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset); |
| stringPostfix(t); |
| |
| return TOK.TOKstring; |
| |
| case 0: |
| case 0x1a: |
| p--; |
| error("unterminated string constant starting at %s", start.toChars()); |
| t.ustring = ""; |
| // t.len = 0; |
| t.postfix = 0; |
| return TOK.TOKstring; |
| |
| default: |
| if (c & 0x80) |
| { |
| p--; |
| c = decodeUTF(); |
| if (c == LS || c == PS) |
| { |
| c = '\n'; |
| loc.linnum++; |
| } |
| p++; |
| stringbuffer.write(cast(char)c); |
| continue; |
| } |
| break; |
| } |
| stringbuffer.write(cast(char)c); |
| // writefln( stringbuffer.toString ); |
| } |
| } |
| |
| //************************************** |
| TOK charConstant(Token *t, int wide) |
| { |
| uint c; |
| TOK tk = TOK.TOKcharv; |
| |
| //printf("Lexer.charConstant\n"); |
| p++; |
| c = *p++; |
| switch (c) |
| { |
| case '\\': |
| switch (*p) |
| { |
| case 'u': |
| t.uns64value = escapeSequence(); |
| tk = TOK.TOKwcharv; |
| break; |
| |
| case 'U': |
| case '&': |
| t.uns64value = escapeSequence(); |
| tk = TOK.TOKdcharv; |
| break; |
| |
| default: |
| t.uns64value = escapeSequence(); |
| break; |
| } |
| break; |
| |
| case '\n': |
| L1: |
| loc.linnum++; |
| |
| case '\r': |
| case 0: |
| case 0x1a: |
| case '\'': |
| error("unterminated character constant"); |
| return tk; |
| |
| default: |
| if (c & 0x80) |
| { |
| p--; |
| c = decodeUTF(); |
| p++; |
| if (c == LS || c == PS) |
| goto L1; |
| if (c < 0xd800 || (c >= 0xe000 && c < 0xfffe)) |
| tk = TOK.TOKwcharv; |
| else |
| tk = TOK.TOKdcharv; |
| } |
| t.uns64value = c; |
| break; |
| } |
| |
| if (*p != '\'') |
| { |
| error("unterminated character constant"); |
| return tk; |
| } |
| p++; |
| return tk; |
| } |
| |
| // Get postfix of string literal. |
| void stringPostfix(Token *t) |
| { |
| switch (*p) |
| { |
| case 'c': |
| case 'w': |
| case 'd': |
| t.postfix = *p; |
| p++; |
| break; |
| |
| default: |
| t.postfix = 0; |
| break; |
| } |
| } |
| |
| /*************************************** |
| * Read \u or \U unicode sequence |
| * Input: |
| * u 'u' or 'U' |
| */ |
| /* |
| uint Wchar(uint u) |
| { |
| uint value; |
| uint n; |
| ubyte c; |
| uint nchars; |
| |
| nchars = (u == 'U') ? 8 : 4; |
| value = 0; |
| for (n = 0; 1; n++) |
| { |
| ++p; |
| if( n == nchars) |
| break; |
| c = *p; |
| if( !ishex(c)) |
| { |
| error("\\%s sequence must be followed by %d hex characters", u, nchars); |
| break; |
| } |
| if( isdigit(c)) |
| c -= '0'; |
| else if( islower(c)) |
| c -= 'a' - 10; |
| else |
| c -= 'A' - 10; |
| value <<= 4; |
| value |= c; |
| } |
| return value; |
| } |
| */ |
| |
| /************************************** |
| * Read in a number. |
| * If it's an integer, store it in tok.TKutok.Vlong. |
| * integers can be decimal, octal or hex |
| * Handle the suffixes U, UL, LU, L, etc. |
| * If it's double, store it in tok.TKutok.Vdouble. |
| * Returns: |
| * TKnum |
| * TKdouble,... |
| */ |
| |
| TOK number(Token *t) |
| { |
| //debug writefln("Lexer.number()"); |
| // We use a state machine to collect numbers |
| enum STATE |
| { |
| STATE_initial, |
| STATE_0, |
| STATE_decimal, |
| STATE_octal, |
| STATE_octale, |
| STATE_hex, |
| STATE_binary, |
| STATE_hex0, |
| STATE_binary0, |
| STATE_hexh, |
| STATE_error |
| } |
| |
| enum FLAGS |
| { |
| FLAGS_decimal = 1, // decimal |
| FLAGS_unsigned = 2, // u or U suffix |
| FLAGS_long = 4, // l or L suffix |
| } |
| FLAGS flags = FLAGS.FLAGS_decimal; |
| |
| int i; |
| TOK result; |
| int base; |
| |
| stringbuffer.offset = 0; |
| // stringbuffer.data = null; |
| STATE state = STATE.STATE_initial; |
| ubyte *start = p; |
| |
| TOK _isreal() |
| { |
| p = start; |
| return inreal(t); |
| } |
| |
| while (true) |
| { |
| char c = cast(char)*p; |
| switch (state) |
| { |
| case STATE.STATE_initial: // opening state |
| if (c == '0') |
| state = STATE.STATE_0; |
| else |
| state = STATE.STATE_decimal; |
| break; |
| |
| case STATE.STATE_0: |
| flags = cast(FLAGS)(flags & ~FLAGS.FLAGS_decimal); |
| switch (c) |
| { |
| // #if ZEROH |
| // case 'H': // 0h |
| // case 'h': |
| // goto hexh; |
| // #endif |
| case 'X': |
| case 'x': |
| state = STATE.STATE_hex0; |
| break; |
| |
| case '.': |
| if (p[1] == '.') // .. is a separate token |
| goto done; |
| |
| case 'i': |
| case 'f': |
| case 'F': |
| goto _Real; |
| |
| // #if ZEROH |
| // case 'E': |
| // case 'e': |
| // goto case_hex; |
| // #endif |
| case 'B': |
| case 'b': |
| state = STATE.STATE_binary0; |
| break; |
| |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| state = STATE.STATE_octal; |
| break; |
| |
| // #if ZEROH |
| // case '8': case '9': case 'A': |
| // case 'C': case 'D': case 'F': |
| // case 'a': case 'c': case 'd': case 'f': |
| // case_hex: |
| // state = STATE.STATE_hexh; |
| // break; |
| // #endif |
| case '_': |
| state = STATE.STATE_octal; |
| p++; |
| continue; |
| |
| default: |
| goto done; |
| } |
| break; |
| |
| case STATE.STATE_decimal: // reading decimal number |
| |
| // if its not a digit - decimal complete or not a decimal |
| if (!isdigit(c)) |
| { |
| // debug writefln( "\tnon-digit( %s )", c ); |
| // #if ZEROH |
| // if( ishex(c) || c == 'H' || c == 'h' ) |
| // goto hexh; |
| // #endif |
| //! wtf ? |
| // ignore embedded _ |
| if (c == '_') |
| { |
| p++; |
| continue; |
| } |
| |
| // check decimal point - make real |
| if (c == '.' && p[1] != '.') |
| goto _Real; |
| |
| // check for mantra - make real |
| if (c == 'i' || c == 'f' || c == 'F' || c == 'e' || c == 'E') |
| { |
| _Real: // It's a real number. Back up and rescan as a real |
| p = start; |
| return inreal(t); |
| } |
| |
| goto done; |
| } |
| break; |
| |
| case STATE.STATE_hex0: // reading hex number |
| case STATE.STATE_hex: |
| if (!ishex(c)) |
| { |
| if (c == '_') // ignore embedded _ |
| { |
| p++; |
| continue; |
| } |
| if (c == '.' && p[1] != '.') |
| goto _Real; |
| if (c == 'P' || c == 'p' || c == 'i') |
| goto _Real; |
| if (state == STATE.STATE_hex0) |
| error("Hex digit expected, not '%s'", c); |
| goto done; |
| } |
| state = STATE.STATE_hex; |
| break; |
| |
| // #if ZEROH |
| // hexh: |
| // state = STATE.STATE_hexh; |
| // |
| // case STATE.STATE_hexh: // parse numbers like 0FFh |
| // if( !ishex(c)) |
| // { |
| // if( c == 'H' || c == 'h') |
| // { |
| // p++; |
| // base = 16; |
| // goto done; |
| // } |
| // else |
| // { |
| // // Check for something like 1E3 or 0E24 |
| // if( memchr(stringbuffer.data.ptr, 'E', stringbuffer.offset) || memchr( stringbuffer.data.ptr, 'e', stringbuffer.offset)) |
| // goto _Real; |
| // error("Hex digit expected, not '%s'", c); |
| // goto done; |
| // } |
| // } |
| // break; |
| // #endif |
| |
| case STATE.STATE_octal: // reading octal number |
| case STATE.STATE_octale: // reading octal number with non-octal digits |
| if (!isoctal(c)) |
| { |
| // #if ZEROH |
| // if( ishex(c) || c == 'H' || c == 'h' ) |
| // goto hexh; |
| // #endif |
| if (c == '_') // ignore embedded _ |
| { |
| p++; |
| continue; |
| } |
| if (c == '.' && p[1] != '.') |
| goto _Real; |
| if (c == 'i') |
| goto _Real; |
| if (isdigit(c)) |
| state = STATE.STATE_octale; |
| else |
| goto done; |
| } |
| break; |
| |
| case STATE.STATE_binary0: // starting binary number |
| case STATE.STATE_binary: // reading binary number |
| if (c != '0' && c != '1') |
| { |
| // #if ZEROH |
| // if( ishex(c) || c == 'H' || c == 'h' ) |
| // goto hexh; |
| // #endif |
| if (c == '_') // ignore embedded _ |
| { |
| p++; |
| continue; |
| } |
| if (state == STATE.STATE_binary0) |
| { |
| error("binary digit expected"); |
| state = STATE.STATE_error; |
| break; |
| } |
| else |
| goto done; |
| } |
| state = STATE.STATE_binary; |
| break; |
| |
| case STATE.STATE_error: // for error recovery |
| if (!isdigit(c)) // scan until non-digit |
| goto done; |
| break; |
| |
| default: |
| assert(0); |
| } |
| stringbuffer.write(cast(char)c); |
| p++; |
| } |
| done: |
| stringbuffer.write(cast(char)0); // terminate string |
| |
| // debug writefln( "\tdigit complete( %s )", stringbuffer.toString ); |
| |
| if (state == STATE.STATE_octale) |
| error("Octal digit expected"); |
| |
| uinteger_t n; // unsigned >=64 bit integer type |
| |
| if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0)) |
| n = stringbuffer.data[0] - '0'; |
| else |
| { |
| // Convert string to integer |
| char *p = cast(char *)stringbuffer.data.ptr; |
| int r = 10; |
| int d; |
| if (*p == '0') |
| { |
| if (p[1] == 'x' || p[1] == 'X') |
| { |
| // "0x#" |
| p += 2; |
| r = 16; |
| } |
| else if (p[1] == 'b' || p[1] == 'B') |
| { |
| // "0b#" - binary |
| p += 2; |
| r = 2; |
| } |
| else if (isdigit(p[1])) |
| { |
| p += 1; |
| r = 8; |
| } |
| } |
| |
| n = 0; |
| |
| while (true) |
| { |
| if (*p >= '0' && *p <= '9') |
| d = *p - '0'; |
| else if (*p >= 'a' && *p <= 'z') |
| d = *p - 'a' + 10; |
| else if (*p >= 'A' && *p <= 'Z') |
| d = *p - 'A' + 10; |
| else |
| break; |
| |
| if (d >= r) |
| break; |
| |
| if (n * r + d < n) |
| { |
| error("integer overflow"); |
| break; |
| } |
| |
| n = n * r + d; |
| p++; |
| } |
| |
| // if n needs more than 64 bits |
| if (n.sizeof > 8 && n > 0xffffffffffffffffL) |
| error("integer overflow"); |
| } |
| |
| // Parse trailing 'u', 'U', 'l' or 'L' in any combination |
| while (true) |
| { |
| ubyte f; |
| switch (*p) |
| { |
| case 'U': |
| case 'u': |
| f = FLAGS.FLAGS_unsigned; |
| goto L1; |
| |
| case 'L': |
| case 'l': |
| f = FLAGS.FLAGS_long; |
| L1: |
| p++; |
| if (flags & f) |
| error("unrecognized token"); |
| flags = cast(FLAGS)(flags | f); |
| continue; |
| |
| default: |
| break; |
| } |
| break; |
| } |
| |
| switch (flags) |
| { |
| case 0: |
| /* Octal or Hexadecimal constant. |
| * First that fits: int, uint, long, ulong |
| */ |
| if (n & 0x8000000000000000L) |
| result = TOK.TOKuns64v; |
| else if (n & 0xffffffff00000000L) |
| result = TOK.TOKint64v; |
| else if (n & 0x80000000) |
| result = TOK.TOKuns32v; |
| else |
| result = TOK.TOKint32v; |
| break; |
| |
| case FLAGS.FLAGS_decimal: |
| /* First that fits: int, long, long long |
| */ |
| if (n & 0x8000000000000000L) |
| { |
| error("signed integer overflow"); |
| result = TOK.TOKuns64v; |
| } |
| else if (n & 0xffffffff80000000L) |
| result = TOK.TOKint64v; |
| else |
| result = TOK.TOKint32v; |
| break; |
| |
| case FLAGS.FLAGS_unsigned: |
| case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned: |
| /* First that fits: uint, ulong |
| */ |
| if (n & 0xffffffff00000000L) |
| result = TOK.TOKuns64v; |
| else |
| result = TOK.TOKuns32v; |
| break; |
| |
| case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long: |
| if (n & 0x8000000000000000L) |
| { |
| error("signed integer overflow"); |
| result = TOK.TOKuns64v; |
| } |
| else |
| result = TOK.TOKint64v; |
| break; |
| |
| case FLAGS.FLAGS_long: |
| if (n & 0x8000000000000000L) |
| result = TOK.TOKuns64v; |
| else |
| result = TOK.TOKint64v; |
| break; |
| |
| case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: |
| case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: |
| result = TOK.TOKuns64v; |
| break; |
| |
| default: |
| debug writefln("%x", flags); |
| assert(0); |
| } |
| t.uns64value = n; |
| return result; |
| } |
| |
| /************************************** |
| * Read in characters, converting them to real. |
| * Bugs: |
| * Exponent overflow not detected. |
| * Too much requested precision is not detected. |
| */ |
| |
| TOK inreal(Token *t) |
| { |
| int dblstate; |
| uint c; |
| char hex; // is this a hexadecimal-floating-constant? |
| TOK result; |
| |
| //printf("Lexer.inreal()\n"); |
| stringbuffer.offset = 0; |
| dblstate = 0; |
| hex = 0; |
| Lnext: |
| while (1) |
| { |
| // Get next char from input |
| c = *p++; |
| //printf("dblstate = %d, c = '%s'\n", dblstate, c); |
| while (1) |
| { |
| switch (dblstate) |
| { |
| case 0: // opening state |
| if (c == '0') |
| dblstate = 9; |
| else if (c == '.') |
| dblstate = 3; |
| else |
| dblstate = 1; |
| break; |
| |
| case 9: |
| dblstate = 1; |
| if (c == 'X' || c == 'x') |
| { |
| hex++; |
| break; |
| } |
| |
| case 1: // digits to left of . |
| case 3: // digits to right of . |
| case 7: // continuing exponent digits |
| if (!isdigit(c) && !(hex && isxdigit(c))) |
| { |
| if (c == '_') |
| goto Lnext; // ignore embedded '_' |
| dblstate++; |
| continue; |
| } |
| break; |
| |
| case 2: // no more digits to left of . |
| if (c == '.') |
| { |
| dblstate++; |
| break; |
| } |
| |
| case 4: // no more digits to right of . |
| if ((c == 'E' || c == 'e') || hex && (c == 'P' || c == 'p')) |
| { |
| dblstate = 5; |
| hex = 0; // exponent is always decimal |
| break; |
| } |
| if (hex) |
| error("binary-exponent-part required"); |
| goto done; |
| |
| case 5: // looking immediately to right of E |
| dblstate++; |
| if (c == '-' || c == '+') |
| break; |
| |
| case 6: // 1st exponent digit expected |
| if (!isdigit(c)) |
| error("exponent expected"); |
| dblstate++; |
| break; |
| |
| case 8: // past end of exponent digits |
| goto done; |
| } |
| break; |
| } |
| stringbuffer.write(c); |
| } |
| done: |
| p--; |
| |
| stringbuffer.write(cast(byte)0); |
| |
| // #if _WIN32 && __DMC__ |
| char *save = __locale_decpoint; |
| __locale_decpoint = "."; |
| // #endif |
| t.float80value = strtold(cast(char *)stringbuffer.data.ptr, null); |
| errno = 0; |
| switch (*p) |
| { |
| case 'F': |
| case 'f': |
| strtof(cast(char *)stringbuffer.data.ptr, null); |
| result = TOK.TOKfloat32v; |
| p++; |
| break; |
| |
| default: |
| strtod(cast(char *)stringbuffer.data.ptr, null); |
| result = TOK.TOKfloat64v; |
| break; |
| |
| case 'L': |
| case 'l': |
| result = TOK.TOKfloat80v; |
| p++; |
| break; |
| } |
| if (*p == 'i' || *p == 'I') |
| { |
| p++; |
| switch (result) |
| { |
| case TOK.TOKfloat32v: |
| result = TOK.TOKimaginary32v; |
| break; |
| |
| case TOK.TOKfloat64v: |
| result = TOK.TOKimaginary64v; |
| break; |
| |
| case TOK.TOKfloat80v: |
| result = TOK.TOKimaginary80v; |
| break; |
| } |
| } |
| // #if _WIN32 && __DMC__ |
| __locale_decpoint = save; |
| // #endif |
| if (errno == ERANGE) |
| error("number is not representable"); |
| return result; |
| } |
| |
| |
| |
| |
| /********************************************* |
| * Do pragma. |
| * Currently, the only pragma supported is: |
| * #line linnum [filespec] |
| */ |
| |
| void Pragma() |
| { |
| Token tok; |
| int linnum; |
| |
| char[] filespec; |
| Loc loc = this.loc; |
| |
| scan(&tok); |
| |
| if (tok.value != TOK.TOKidentifier || tok.ident != Id.line) |
| goto Lerr; |
| |
| scan(&tok); |
| if (tok.value == TOK.TOKint32v || tok.value == TOK.TOKint64v) |
| linnum = tok.uns64value - 1; |
| else |
| goto Lerr; |
| |
| while (1) |
| { |
| switch (*p) |
| { |
| case 0: |
| case 0x1a: |
| case '\n': |
| Lnewline: |
| this.loc.linnum = linnum; |
| if (filespec.length) |
| this.loc.filename = filespec; |
| return; |
| |
| case '\r': |
| p++; |
| if (*p != '\n') |
| { |
| p--; |
| goto Lnewline; |
| } |
| continue; |
| |
| case ' ': |
| case '\t': |
| case '\v': |
| case '\f': |
| p++; |
| continue; // skip white space |
| |
| case '_': |
| if (mod && memcmp(p, cast(char *)"__FILE__", 8) == 0) |
| { |
| p += 8; |
| //! filespec = mem.strdup(loc.filename ? loc.filename : mod.ident.toChars()); |
| } |
| continue; |
| |
| case '"': |
| if (filespec) |
| goto Lerr; |
| stringbuffer.offset = 0; |
| p++; |
| while (1) |
| { |
| uint c; |
| c = *p; |
| switch (c) |
| { |
| case '\n': |
| case '\r': |
| case 0: |
| case 0x1a: |
| goto Lerr; |
| |
| case '"': |
| stringbuffer.write(cast(byte)0); |
| // filespec = mem.strdup((char *)stringbuffer.data); |
| filespec = stringbuffer.toString.dup; |
| p++; |
| break; |
| |
| default: |
| if (c & 0x80) |
| { |
| uint u = decodeUTF(); |
| if (u == PS || u == LS) |
| goto Lerr; |
| } |
| stringbuffer.write(c); |
| p++; |
| continue; |
| } |
| break; |
| } |
| continue; |
| |
| default: |
| if (*p & 0x80) |
| { |
| uint u = decodeUTF(); |
| if (u == PS || u == LS) |
| goto Lnewline; |
| } |
| goto Lerr; |
| } |
| } |
| |
| Lerr: |
| errorLoc(loc, "#line integer [\"filespec\"]\\n expected"); |
| } |
| |
| |
| |
| /*************************************************** |
| * Parse doc comment embedded between t.ptr and p. |
| * Remove trailing blanks and tabs from lines. |
| * Replace all newlines with \n. |
| * Remove leading comment character from each line. |
| * Decide if it's a lineComment or a blockComment. |
| * Append to previous one for this token. |
| */ |
| |
| void getDocComment(Token *t, uint lineComment) |
| { |
| auto OutBuffer buf = new OutBuffer; |
| ubyte ct = t.ptr[2]; |
| ubyte *q = t.ptr + 3; // start of comment text |
| int linestart = 0; |
| |
| ubyte *qend = p; |
| |
| if (ct == '*' || ct == '+') |
| qend -= 2; |
| |
| // Scan over initial row of ****'s or ++++'s or ////'s |
| for (; q < qend; q++) |
| { |
| if (*q != ct) |
| break; |
| } |
| |
| // Remove trailing row of ****'s or ++++'s |
| if (ct != '/') |
| { |
| for (; q < qend; qend--) |
| { |
| if (qend[-1] != ct) |
| break; |
| } |
| } |
| |
| for (; q < qend; q++) |
| { |
| ubyte c = *q; |
| |
| switch (c) |
| { |
| case '*': |
| case '+': |
| if (linestart && c == ct) |
| { |
| linestart = 0; |
| // Trim preceding whitespace up to preceding \n |
| while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) |
| buf.offset--; |
| continue; |
| } |
| break; |
| |
| case ' ': |
| case '\t': |
| break; |
| |
| case '\r': |
| if (q[1] == '\n') |
| continue; // skip the \r |
| goto Lnewline; |
| |
| default: |
| if (c == 226) |
| { |
| // If LS or PS |
| if (q[1] == 128 && |
| (q[2] == 168 || q[2] == 169)) |
| { |
| q += 2; |
| goto Lnewline; |
| } |
| } |
| linestart = 0; |
| break; |
| |
| Lnewline: |
| c = '\n'; // replace all newlines with \n |
| |
| case '\n': |
| linestart = 1; |
| |
| // Trim trailing whitespace |
| while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) |
| buf.offset--; |
| |
| break; |
| } |
| buf.write(c); |
| } |
| |
| // Always end with a newline |
| if (!buf.offset || buf.data[buf.offset - 1] != '\n') |
| buf.writenl(); |
| |
| //buf.write(cast(char)0); |
| |
| // It's a line comment if the start of the doc comment comes |
| // after other non-whitespace on the same line. |
| // ubyte** dc = (lineComment && anyToken) |
| // ? &t.lineComment |
| // : &t.blockComment; |
| |
| char[] dc = (lineComment && anyToken) ? t.lineComment : t.blockComment; |
| |
| // Combine with previous doc comment, if any |
| if (dc.length) |
| dc = combineComments(dc, buf.toString().dup); |
| else |
| dc = buf.toString().dup; |
| |
| // writefln( dc ); |
| |
| if (lineComment && anyToken) |
| t.lineComment = dc; |
| else |
| t.blockComment = dc; |
| } |
| } |
| |
| // character maps |
| static ubyte[256] cmtable; |
| |
| const int CMoctal = 0x1; |
| const int CMhex = 0x2; |
| const int CMidchar = 0x4; |
| |
| ubyte isoctal(ubyte c) |
| { |
| return cmtable[c] & CMoctal; |
| } |
| ubyte ishex(ubyte c) |
| { |
| return cmtable[c] & CMhex; |
| } |
| ubyte isidchar(ubyte c) |
| { |
| return cmtable[c] & CMidchar; |
| } |
| |
| static void cmtable_init() |
| { |
| for (uint c = 0; c < cmtable.length; c++) |
| { |
| if ('0' <= c && c <= '7') |
| cmtable[c] |= CMoctal; |
| if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) |
| cmtable[c] |= CMhex; |
| if (isalnum(c) || c == '_') |
| cmtable[c] |= CMidchar; |
| } |
| } |
| |
| |
| /+ |
| struct StringValue |
| { |
| union |
| { |
| int intvalue; |
| void *ptrvalue; |
| dchar *string; |
| } |
| |
| char[] lstring; |
| } |
| #define CASE_BASIC_TYPES |
| case TOKwchar: case TOKdchar: |
| case TOKbit: case TOKbool: case TOKchar: |
| case TOKint8: case TOKuns8: |
| case TOKint16: case TOKuns16: |
| case TOKint32: case TOKuns32: |
| case TOKint64: case TOKuns64: |
| case TOKfloat32: case TOKfloat64: case TOKfloat80: |
| case TOKimaginary32: case TOKimaginary64: case TOKimaginary80: |
| case TOKcomplex32: case TOKcomplex64: case TOKcomplex80: |
| case TOKvoid: |
| |
| #define CASE_BASIC_TYPES_X(t) \ |
| case TOKvoid: t = Type::tvoid; goto LabelX; \ |
| case TOKint8: t = Type::tint8; goto LabelX; \ |
| case TOKuns8: t = Type::tuns8; goto LabelX; \ |
| case TOKint16: t = Type::tint16; goto LabelX; \ |
| case TOKuns16: t = Type::tuns16; goto LabelX; \ |
| case TOKint32: t = Type::tint32; goto LabelX; \ |
| case TOKuns32: t = Type::tuns32; goto LabelX; \ |
| case TOKint64: t = Type::tint64; goto LabelX; \ |
| case TOKuns64: t = Type::tuns64; goto LabelX; \ |
| case TOKfloat32: t = Type::tfloat32; goto LabelX; \ |
| case TOKfloat64: t = Type::tfloat64; goto LabelX; \ |
| case TOKfloat80: t = Type::tfloat80; goto LabelX; \ |
| case TOKimaginary32: t = Type::timaginary32; goto LabelX; \ |
| case TOKimaginary64: t = Type::timaginary64; goto LabelX; \ |
| case TOKimaginary80: t = Type::timaginary80; goto LabelX; \ |
| case TOKcomplex32: t = Type::tcomplex32; goto LabelX; \ |
| case TOKcomplex64: t = Type::tcomplex64; goto LabelX; \ |
| case TOKcomplex80: t = Type::tcomplex80; goto LabelX; \ |
| case TOKbit: t = Type::tbit; goto LabelX; \ |
| case TOKchar: t = Type::tchar; goto LabelX; \ |
| case TOKwchar: t = Type::twchar; goto LabelX; \ |
| case TOKdchar: t = Type::tdchar; goto LabelX; \ |
| LabelX |
| +/ |