| /* |
| * Lexical analysis and token construction. |
| */ |
| |
| "use strict"; |
| |
| var _ = require("underscore"); |
| var events = require("events"); |
| var reg = require("./reg.js"); |
| var state = require("./state.js").state; |
| |
| // Some of these token types are from JavaScript Parser API |
| // while others are specific to JSHint parser. |
| // JS Parser API: https://developer.mozilla.org/en-US/docs/SpiderMonkey/Parser_API |
| |
| var Token = { |
| Identifier: 1, |
| Punctuator: 2, |
| NumericLiteral: 3, |
| StringLiteral: 4, |
| Comment: 5, |
| Keyword: 6, |
| NullLiteral: 7, |
| BooleanLiteral: 8, |
| RegExp: 9 |
| }; |
| |
| // This is auto generated from the unicode tables. |
| // The tables are at: |
| // http://www.fileformat.info/info/unicode/category/Lu/list.htm |
| // http://www.fileformat.info/info/unicode/category/Ll/list.htm |
| // http://www.fileformat.info/info/unicode/category/Lt/list.htm |
| // http://www.fileformat.info/info/unicode/category/Lm/list.htm |
| // http://www.fileformat.info/info/unicode/category/Lo/list.htm |
| // http://www.fileformat.info/info/unicode/category/Nl/list.htm |
| |
| var unicodeLetterTable = [ |
| 170, 170, 181, 181, 186, 186, 192, 214, |
| 216, 246, 248, 705, 710, 721, 736, 740, 748, 748, 750, 750, |
| 880, 884, 886, 887, 890, 893, 902, 902, 904, 906, 908, 908, |
| 910, 929, 931, 1013, 1015, 1153, 1162, 1319, 1329, 1366, |
| 1369, 1369, 1377, 1415, 1488, 1514, 1520, 1522, 1568, 1610, |
| 1646, 1647, 1649, 1747, 1749, 1749, 1765, 1766, 1774, 1775, |
| 1786, 1788, 1791, 1791, 1808, 1808, 1810, 1839, 1869, 1957, |
| 1969, 1969, 1994, 2026, 2036, 2037, 2042, 2042, 2048, 2069, |
| 2074, 2074, 2084, 2084, 2088, 2088, 2112, 2136, 2308, 2361, |
| 2365, 2365, 2384, 2384, 2392, 2401, 2417, 2423, 2425, 2431, |
| 2437, 2444, 2447, 2448, 2451, 2472, 2474, 2480, 2482, 2482, |
| 2486, 2489, 2493, 2493, 2510, 2510, 2524, 2525, 2527, 2529, |
| 2544, 2545, 2565, 2570, 2575, 2576, 2579, 2600, 2602, 2608, |
| 2610, 2611, 2613, 2614, 2616, 2617, 2649, 2652, 2654, 2654, |
| 2674, 2676, 2693, 2701, 2703, 2705, 2707, 2728, 2730, 2736, |
| 2738, 2739, 2741, 2745, 2749, 2749, 2768, 2768, 2784, 2785, |
| 2821, 2828, 2831, 2832, 2835, 2856, 2858, 2864, 2866, 2867, |
| 2869, 2873, 2877, 2877, 2908, 2909, 2911, 2913, 2929, 2929, |
| 2947, 2947, 2949, 2954, 2958, 2960, 2962, 2965, 2969, 2970, |
| 2972, 2972, 2974, 2975, 2979, 2980, 2984, 2986, 2990, 3001, |
| 3024, 3024, 3077, 3084, 3086, 3088, 3090, 3112, 3114, 3123, |
| 3125, 3129, 3133, 3133, 3160, 3161, 3168, 3169, 3205, 3212, |
| 3214, 3216, 3218, 3240, 3242, 3251, 3253, 3257, 3261, 3261, |
| 3294, 3294, 3296, 3297, 3313, 3314, 3333, 3340, 3342, 3344, |
| 3346, 3386, 3389, 3389, 3406, 3406, 3424, 3425, 3450, 3455, |
| 3461, 3478, 3482, 3505, 3507, 3515, 3517, 3517, 3520, 3526, |
| 3585, 3632, 3634, 3635, 3648, 3654, 3713, 3714, 3716, 3716, |
| 3719, 3720, 3722, 3722, 3725, 3725, 3732, 3735, 3737, 3743, |
| 3745, 3747, 3749, 3749, 3751, 3751, 3754, 3755, 3757, 3760, |
| 3762, 3763, 3773, 3773, 3776, 3780, 3782, 3782, 3804, 3805, |
| 3840, 3840, 3904, 3911, 3913, 3948, 3976, 3980, 4096, 4138, |
| 4159, 4159, 4176, 4181, 4186, 4189, 4193, 4193, 4197, 4198, |
| 4206, 4208, 4213, 4225, 4238, 4238, 4256, 4293, 4304, 4346, |
| 4348, 4348, 4352, 4680, 4682, 4685, 4688, 4694, 4696, 4696, |
| 4698, 4701, 4704, 4744, 4746, 4749, 4752, 4784, 4786, 4789, |
| 4792, 4798, 4800, 4800, 4802, 4805, 4808, 4822, 4824, 4880, |
| 4882, 4885, 4888, 4954, 4992, 5007, 5024, 5108, 5121, 5740, |
| 5743, 5759, 5761, 5786, 5792, 5866, 5870, 5872, 5888, 5900, |
| 5902, 5905, 5920, 5937, 5952, 5969, 5984, 5996, 5998, 6000, |
| 6016, 6067, 6103, 6103, 6108, 6108, 6176, 6263, 6272, 6312, |
| 6314, 6314, 6320, 6389, 6400, 6428, 6480, 6509, 6512, 6516, |
| 6528, 6571, 6593, 6599, 6656, 6678, 6688, 6740, 6823, 6823, |
| 6917, 6963, 6981, 6987, 7043, 7072, 7086, 7087, 7104, 7141, |
| 7168, 7203, 7245, 7247, 7258, 7293, 7401, 7404, 7406, 7409, |
| 7424, 7615, 7680, 7957, 7960, 7965, 7968, 8005, 8008, 8013, |
| 8016, 8023, 8025, 8025, 8027, 8027, 8029, 8029, 8031, 8061, |
| 8064, 8116, 8118, 8124, 8126, 8126, 8130, 8132, 8134, 8140, |
| 8144, 8147, 8150, 8155, 8160, 8172, 8178, 8180, 8182, 8188, |
| 8305, 8305, 8319, 8319, 8336, 8348, 8450, 8450, 8455, 8455, |
| 8458, 8467, 8469, 8469, 8473, 8477, 8484, 8484, 8486, 8486, |
| 8488, 8488, 8490, 8493, 8495, 8505, 8508, 8511, 8517, 8521, |
| 8526, 8526, 8544, 8584, 11264, 11310, 11312, 11358, |
| 11360, 11492, 11499, 11502, 11520, 11557, 11568, 11621, |
| 11631, 11631, 11648, 11670, 11680, 11686, 11688, 11694, |
| 11696, 11702, 11704, 11710, 11712, 11718, 11720, 11726, |
| 11728, 11734, 11736, 11742, 11823, 11823, 12293, 12295, |
| 12321, 12329, 12337, 12341, 12344, 12348, 12353, 12438, |
| 12445, 12447, 12449, 12538, 12540, 12543, 12549, 12589, |
| 12593, 12686, 12704, 12730, 12784, 12799, 13312, 13312, |
| 19893, 19893, 19968, 19968, 40907, 40907, 40960, 42124, |
| 42192, 42237, 42240, 42508, 42512, 42527, 42538, 42539, |
| 42560, 42606, 42623, 42647, 42656, 42735, 42775, 42783, |
| 42786, 42888, 42891, 42894, 42896, 42897, 42912, 42921, |
| 43002, 43009, 43011, 43013, 43015, 43018, 43020, 43042, |
| 43072, 43123, 43138, 43187, 43250, 43255, 43259, 43259, |
| 43274, 43301, 43312, 43334, 43360, 43388, 43396, 43442, |
| 43471, 43471, 43520, 43560, 43584, 43586, 43588, 43595, |
| 43616, 43638, 43642, 43642, 43648, 43695, 43697, 43697, |
| 43701, 43702, 43705, 43709, 43712, 43712, 43714, 43714, |
| 43739, 43741, 43777, 43782, 43785, 43790, 43793, 43798, |
| 43808, 43814, 43816, 43822, 43968, 44002, 44032, 44032, |
| 55203, 55203, 55216, 55238, 55243, 55291, 63744, 64045, |
| 64048, 64109, 64112, 64217, 64256, 64262, 64275, 64279, |
| 64285, 64285, 64287, 64296, 64298, 64310, 64312, 64316, |
| 64318, 64318, 64320, 64321, 64323, 64324, 64326, 64433, |
| 64467, 64829, 64848, 64911, 64914, 64967, 65008, 65019, |
| 65136, 65140, 65142, 65276, 65313, 65338, 65345, 65370, |
| 65382, 65470, 65474, 65479, 65482, 65487, 65490, 65495, |
| 65498, 65500, 65536, 65547, 65549, 65574, 65576, 65594, |
| 65596, 65597, 65599, 65613, 65616, 65629, 65664, 65786, |
| 65856, 65908, 66176, 66204, 66208, 66256, 66304, 66334, |
| 66352, 66378, 66432, 66461, 66464, 66499, 66504, 66511, |
| 66513, 66517, 66560, 66717, 67584, 67589, 67592, 67592, |
| 67594, 67637, 67639, 67640, 67644, 67644, 67647, 67669, |
| 67840, 67861, 67872, 67897, 68096, 68096, 68112, 68115, |
| 68117, 68119, 68121, 68147, 68192, 68220, 68352, 68405, |
| 68416, 68437, 68448, 68466, 68608, 68680, 69635, 69687, |
| 69763, 69807, 73728, 74606, 74752, 74850, 77824, 78894, |
| 92160, 92728, 110592, 110593, 119808, 119892, 119894, 119964, |
| 119966, 119967, 119970, 119970, 119973, 119974, 119977, 119980, |
| 119982, 119993, 119995, 119995, 119997, 120003, 120005, 120069, |
| 120071, 120074, 120077, 120084, 120086, 120092, 120094, 120121, |
| 120123, 120126, 120128, 120132, 120134, 120134, 120138, 120144, |
| 120146, 120485, 120488, 120512, 120514, 120538, 120540, 120570, |
| 120572, 120596, 120598, 120628, 120630, 120654, 120656, 120686, |
| 120688, 120712, 120714, 120744, 120746, 120770, 120772, 120779, |
| 131072, 131072, 173782, 173782, 173824, 173824, 177972, 177972, |
| 177984, 177984, 178205, 178205, 194560, 195101 |
| ]; |
| |
| var identifierStartTable = []; |
| |
| for (var i = 0; i < 128; i++) { |
| identifierStartTable[i] = |
| i === 36 || // $ |
| i >= 65 && i <= 90 || // A-Z |
| i === 95 || // _ |
| i >= 97 && i <= 122; // a-z |
| } |
| |
| var identifierPartTable = []; |
| |
| for (var i = 0; i < 128; i++) { |
| identifierPartTable[i] = |
| identifierStartTable[i] || // $, _, A-Z, a-z |
| i >= 48 && i <= 57; // 0-9 |
| } |
| |
| /* |
| * Lexer for JSHint. |
| * |
| * This object does a char-by-char scan of the provided source code |
| * and produces a sequence of tokens. |
| * |
| * var lex = new Lexer("var i = 0;"); |
| * lex.start(); |
| * lex.token(); // returns the next token |
| * |
| * You have to use the token() method to move the lexer forward |
| * but you don't have to use its return value to get tokens. In addition |
| * to token() method returning the next token, the Lexer object also |
| * emits events. |
| * |
| * lex.on("Identifier", function (data) { |
| * if (data.name.indexOf("_") >= 0) { |
| * // Produce a warning. |
| * } |
| * }); |
| * |
| * Note that the token() method returns tokens in a JSLint-compatible |
| * format while the event emitter uses a slightly modified version of |
| * Mozilla's JavaScript Parser API. Eventually, we will move away from |
| * JSLint format. |
| */ |
| function Lexer(source) { |
| var lines = source; |
| |
| if (typeof lines === "string") { |
| lines = lines |
| .replace(/\r\n/g, "\n") |
| .replace(/\r/g, "\n") |
| .split("\n"); |
| } |
| |
| // If the first line is a shebang (#!), make it a blank and move on. |
| // Shebangs are used by Node scripts. |
| |
| if (lines[0] && lines[0].substr(0, 2) === "#!") { |
| lines[0] = ""; |
| } |
| |
| this.emitter = new events.EventEmitter(); |
| this.source = source; |
| this.lines = lines; |
| this.prereg = true; |
| |
| this.line = 0; |
| this.char = 1; |
| this.from = 1; |
| this.input = ""; |
| |
| for (var i = 0; i < state.option.indent; i += 1) { |
| state.tab += " "; |
| } |
| } |
| |
| Lexer.prototype = { |
| _lines: [], |
| |
| get lines() { |
| this._lines = state.lines; |
| return this._lines; |
| }, |
| |
| set lines(val) { |
| this._lines = val; |
| state.lines = this._lines; |
| }, |
| |
| /* |
| * Return the next i character without actually moving the |
| * char pointer. |
| */ |
| peek: function (i) { |
| return this.input.charAt(i || 0); |
| }, |
| |
| /* |
| * Move the char pointer forward i times. |
| */ |
| skip: function (i) { |
| i = i || 1; |
| this.char += i; |
| this.input = this.input.slice(i); |
| }, |
| |
| /* |
| * Subscribe to a token event. The API for this method is similar |
| * Underscore.js i.e. you can subscribe to multiple events with |
| * one call: |
| * |
| * lex.on("Identifier Number", function (data) { |
| * // ... |
| * }); |
| */ |
| on: function (names, listener) { |
| names.split(" ").forEach(function (name) { |
| this.emitter.on(name, listener); |
| }.bind(this)); |
| }, |
| |
| /* |
| * Trigger a token event. All arguments will be passed to each |
| * listener. |
| */ |
| trigger: function () { |
| this.emitter.emit.apply(this.emitter, Array.prototype.slice.call(arguments)); |
| }, |
| |
| /* |
| * Extract a punctuator out of the next sequence of characters |
| * or return 'null' if its not possible. |
| * |
| * This method's implementation was heavily influenced by the |
| * scanPunctuator function in the Esprima parser's source code. |
| */ |
| scanPunctuator: function () { |
| var ch1 = this.peek(); |
| var ch2, ch3, ch4; |
| |
| switch (ch1) { |
| // Most common single-character punctuators |
| case ".": |
| if ((/^[0-9]$/).test(this.peek(1))) { |
| return null; |
| } |
| |
| /* falls through */ |
| case "(": |
| case ")": |
| case ";": |
| case ",": |
| case "{": |
| case "}": |
| case "[": |
| case "]": |
| case ":": |
| case "~": |
| case "?": |
| return { |
| type: Token.Punctuator, |
| value: ch1 |
| }; |
| |
| // A pound sign (for Node shebangs) |
| case "#": |
| return { |
| type: Token.Punctuator, |
| value: ch1 |
| }; |
| |
| // We're at the end of input |
| case "": |
| return null; |
| } |
| |
| // Peek more characters |
| |
| ch2 = this.peek(1); |
| ch3 = this.peek(2); |
| ch4 = this.peek(3); |
| |
| // 4-character punctuator: >>>= |
| |
| if (ch1 === ">" && ch2 === ">" && ch3 === ">" && ch4 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: ">>>=" |
| }; |
| } |
| |
| // 3-character punctuators: === !== >>> <<= >>= |
| |
| if (ch1 === "=" && ch2 === "=" && ch3 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: "===" |
| }; |
| } |
| |
| if (ch1 === "!" && ch2 === "=" && ch3 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: "!==" |
| }; |
| } |
| |
| if (ch1 === ">" && ch2 === ">" && ch3 === ">") { |
| return { |
| type: Token.Punctuator, |
| value: ">>>" |
| }; |
| } |
| |
| if (ch1 === "<" && ch2 === "<" && ch3 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: "<<=" |
| }; |
| } |
| |
| if (ch1 === ">" && ch2 === ">" && ch3 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: "<<=" |
| }; |
| } |
| |
| // 2-character punctuators: <= >= == != ++ -- << >> && || |
| // += -= *= %= &= |= ^= (but not /=, see below) |
| if (ch1 === ch2 && ("+-<>&|".indexOf(ch1) >= 0)) { |
| return { |
| type: Token.Punctuator, |
| value: ch1 + ch2 |
| }; |
| } |
| |
| if ("<>=!+-*%&|^".indexOf(ch1) >= 0) { |
| if (ch2 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: ch1 + ch2 |
| }; |
| } |
| |
| return { |
| type: Token.Punctuator, |
| value: ch1 |
| }; |
| } |
| |
| // Special case: /=. We need to make sure that this is an |
| // operator and not a regular expression. |
| |
| if (ch1 === "/") { |
| if (ch2 === "=" && /\/=(?!(\S*\/[gim]?))/.test(this.input)) { |
| // /= is not a part of a regular expression, return it as a |
| // punctuator. |
| return { |
| type: Token.Punctuator, |
| value: "/=" |
| }; |
| } |
| |
| return { |
| type: Token.Punctuator, |
| value: "/" |
| }; |
| } |
| |
| return null; |
| }, |
| |
| /* |
| * Extract a comment out of the next sequence of characters and/or |
| * lines or return 'null' if its not possible. Since comments can |
| * span across multiple lines this method has to move the char |
| * pointer. |
| * |
| * In addition to normal JavaScript comments (// and /*) this method |
| * also recognizes JSHint- and JSLint-specific comments such as |
| * /*jshint, /*jslint, /*globals and so on. |
| */ |
| scanComments: function () { |
| var ch1 = this.peek(); |
| var ch2 = this.peek(1); |
| var rest = this.input.substr(2); |
| var startLine = this.line; |
| var startChar = this.char; |
| |
| // Create a comment token object and make sure it |
| // has all the data JSHint needs to work with special |
| // comments. |
| |
| function commentToken(label, body, opt) { |
| var special = ["jshint", "jslint", "members", "member", "globals", "global", "exported"]; |
| var isSpecial = false; |
| var value = label + body; |
| var commentType = "plain"; |
| opt = opt || {}; |
| |
| if (opt.isMultiline) { |
| value += "*/"; |
| } |
| |
| special.forEach(function (str) { |
| if (isSpecial) { |
| return; |
| } |
| |
| // Don't recognize any special comments other than jshint for single-line |
| // comments. This introduced many problems with legit comments. |
| if (label === "//" && str !== "jshint") { |
| return; |
| } |
| |
| if (body.substr(0, str.length) === str) { |
| isSpecial = true; |
| label = label + str; |
| body = body.substr(str.length); |
| } |
| |
| if (!isSpecial && body.charAt(0) === " " && body.substr(1, str.length) === str) { |
| isSpecial = true; |
| label = label + " " + str; |
| body = body.substr(str.length + 1); |
| } |
| |
| if (!isSpecial) { |
| return; |
| } |
| |
| switch (str) { |
| case "member": |
| commentType = "members"; |
| break; |
| case "global": |
| commentType = "globals"; |
| break; |
| default: |
| commentType = str; |
| } |
| }); |
| |
| return { |
| type: Token.Comment, |
| commentType: commentType, |
| value: value, |
| body: body, |
| isSpecial: isSpecial, |
| isMultiline: opt.isMultiline || false, |
| isMalformed: opt.isMalformed || false |
| }; |
| } |
| |
| // End of unbegun comment. Raise an error and skip that input. |
| if (ch1 === "*" && ch2 === "/") { |
| this.trigger("error", { |
| code: "E018", |
| line: startLine, |
| character: startChar |
| }); |
| |
| this.skip(2); |
| return null; |
| } |
| |
| // Comments must start either with // or /* |
| if (ch1 !== "/" || (ch2 !== "*" && ch2 !== "/")) { |
| return null; |
| } |
| |
| // One-line comment |
| if (ch2 === "/") { |
| this.skip(this.input.length); // Skip to the EOL. |
| return commentToken("//", rest); |
| } |
| |
| var body = ""; |
| |
| /* Multi-line comment */ |
| if (ch2 === "*") { |
| this.skip(2); |
| |
| while (this.peek() !== "*" || this.peek(1) !== "/") { |
| if (this.peek() === "") { // End of Line |
| body += "\n"; |
| |
| // If we hit EOF and our comment is still unclosed, |
| // trigger an error and end the comment implicitly. |
| if (!this.nextLine()) { |
| this.trigger("error", { |
| code: "E017", |
| line: startLine, |
| character: startChar |
| }); |
| |
| return commentToken("/*", body, { |
| isMultiline: true, |
| isMalformed: true |
| }); |
| } |
| } else { |
| body += this.peek(); |
| this.skip(); |
| } |
| } |
| |
| this.skip(2); |
| return commentToken("/*", body, { isMultiline: true }); |
| } |
| }, |
| |
| /* |
| * Extract a keyword out of the next sequence of characters or |
| * return 'null' if its not possible. |
| */ |
| scanKeyword: function () { |
| var result = /^[a-zA-Z_$][a-zA-Z0-9_$]*/.exec(this.input); |
| var keywords = [ |
| "if", "in", "do", "var", "for", "new", |
| "try", "let", "this", "else", "case", |
| "void", "with", "enum", "while", "break", |
| "catch", "throw", "const", "yield", "class", |
| "super", "return", "typeof", "delete", |
| "switch", "export", "import", "default", |
| "finally", "extends", "function", "continue", |
| "debugger", "instanceof" |
| ]; |
| |
| if (result && keywords.indexOf(result[0]) >= 0) { |
| return { |
| type: Token.Keyword, |
| value: result[0] |
| }; |
| } |
| |
| return null; |
| }, |
| |
| /* |
| * Extract a JavaScript identifier out of the next sequence of |
| * characters or return 'null' if its not possible. In addition, |
| * to Identifier this method can also produce BooleanLiteral |
| * (true/false) and NullLiteral (null). |
| */ |
| scanIdentifier: function () { |
| var id = ""; |
| var index = 0; |
| var type, char; |
| |
| // Detects any character in the Unicode categories "Uppercase |
| // letter (Lu)", "Lowercase letter (Ll)", "Titlecase letter |
| // (Lt)", "Modifier letter (Lm)", "Other letter (Lo)", or |
| // "Letter number (Nl)". |
| // |
| // Both approach and unicodeLetterTable were borrowed from |
| // Google's Traceur. |
| |
| function isUnicodeLetter(code) { |
| for (var i = 0; i < unicodeLetterTable.length;) { |
| if (code < unicodeLetterTable[i++]) { |
| return false; |
| } |
| |
| if (code <= unicodeLetterTable[i++]) { |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| function isHexDigit(str) { |
| return (/^[0-9a-fA-F]$/).test(str); |
| } |
| |
| var readUnicodeEscapeSequence = function () { |
| /*jshint validthis:true */ |
| index += 1; |
| |
| if (this.peek(index) !== "u") { |
| return null; |
| } |
| |
| var ch1 = this.peek(index + 1); |
| var ch2 = this.peek(index + 2); |
| var ch3 = this.peek(index + 3); |
| var ch4 = this.peek(index + 4); |
| var code; |
| |
| if (isHexDigit(ch1) && isHexDigit(ch2) && isHexDigit(ch3) && isHexDigit(ch4)) { |
| code = parseInt(ch1 + ch2 + ch3 + ch4, 16); |
| |
| if (isUnicodeLetter(code)) { |
| index += 5; |
| return "\\u" + ch1 + ch2 + ch3 + ch4; |
| } |
| |
| return null; |
| } |
| |
| return null; |
| }.bind(this); |
| |
| var getIdentifierStart = function () { |
| /*jshint validthis:true */ |
| var chr = this.peek(index); |
| var code = chr.charCodeAt(0); |
| |
| if (code === 92) { |
| return readUnicodeEscapeSequence(); |
| } |
| |
| if (code < 128) { |
| if (identifierStartTable[code]) { |
| index += 1; |
| return chr; |
| } |
| |
| return null; |
| } |
| |
| if (isUnicodeLetter(code)) { |
| index += 1; |
| return chr; |
| } |
| |
| return null; |
| }.bind(this); |
| |
| var getIdentifierPart = function () { |
| /*jshint validthis:true */ |
| var chr = this.peek(index); |
| var code = chr.charCodeAt(0); |
| |
| if (code === 92) { |
| return readUnicodeEscapeSequence(); |
| } |
| |
| if (code < 128) { |
| if (identifierPartTable[code]) { |
| index += 1; |
| return chr; |
| } |
| |
| return null; |
| } |
| |
| if (isUnicodeLetter(code)) { |
| index += 1; |
| return chr; |
| } |
| |
| return null; |
| }.bind(this); |
| |
| char = getIdentifierStart(); |
| if (char === null) { |
| return null; |
| } |
| |
| id = char; |
| for (;;) { |
| char = getIdentifierPart(); |
| |
| if (char === null) { |
| break; |
| } |
| |
| id += char; |
| } |
| |
| switch (id) { |
| case "true": |
| case "false": |
| type = Token.BooleanLiteral; |
| break; |
| case "null": |
| type = Token.NullLiteral; |
| break; |
| default: |
| type = Token.Identifier; |
| } |
| |
| return { |
| type: type, |
| value: id |
| }; |
| }, |
| |
| /* |
| * Extract a numeric literal out of the next sequence of |
| * characters or return 'null' if its not possible. This method |
| * supports all numeric literals described in section 7.8.3 |
| * of the EcmaScript 5 specification. |
| * |
| * This method's implementation was heavily influenced by the |
| * scanNumericLiteral function in the Esprima parser's source code. |
| */ |
| scanNumericLiteral: function () { |
| var index = 0; |
| var value = ""; |
| var length = this.input.length; |
| var char = this.peek(index); |
| var bad; |
| |
| function isDecimalDigit(str) { |
| return (/^[0-9]$/).test(str); |
| } |
| |
| function isOctalDigit(str) { |
| return (/^[0-7]$/).test(str); |
| } |
| |
| function isHexDigit(str) { |
| return (/^[0-9a-fA-F]$/).test(str); |
| } |
| |
| function isIdentifierStart(ch) { |
| return (ch === "$") || (ch === "_") || (ch === "\\") || |
| (ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z"); |
| } |
| |
| // Numbers must start either with a decimal digit or a point. |
| |
| if (char !== "." && !isDecimalDigit(char)) { |
| return null; |
| } |
| |
| if (char !== ".") { |
| value = this.peek(index); |
| index += 1; |
| char = this.peek(index); |
| |
| if (value === "0") { |
| // Base-16 numbers. |
| if (char === "x" || char === "X") { |
| index += 1; |
| value += char; |
| |
| while (index < length) { |
| char = this.peek(index); |
| if (!isHexDigit(char)) { |
| break; |
| } |
| value += char; |
| index += 1; |
| } |
| |
| if (value.length <= 2) { // 0x |
| return { |
| type: Token.NumericLiteral, |
| value: value, |
| isMalformed: true |
| }; |
| } |
| |
| if (index < length) { |
| char = this.peek(index); |
| if (isIdentifierStart(char)) { |
| return null; |
| } |
| } |
| |
| return { |
| type: Token.NumericLiteral, |
| value: value, |
| base: 16, |
| isMalformed: false |
| }; |
| } |
| |
| // Base-8 numbers. |
| if (isOctalDigit(char)) { |
| index += 1; |
| value += char; |
| bad = false; |
| |
| while (index < length) { |
| char = this.peek(index); |
| |
| // Numbers like '019' (note the 9) are not valid octals |
| // but we still parse them and mark as malformed. |
| |
| if (isDecimalDigit(char)) { |
| bad = true; |
| } else if (!isOctalDigit(char)) { |
| break; |
| } |
| value += char; |
| index += 1; |
| } |
| |
| if (index < length) { |
| char = this.peek(index); |
| if (isIdentifierStart(char)) { |
| return null; |
| } |
| } |
| |
| return { |
| type: Token.NumericLiteral, |
| value: value, |
| base: 8, |
| isMalformed: false |
| }; |
| } |
| |
| // Decimal numbers that start with '0' such as '09' are illegal |
| // but we still parse them and return as malformed. |
| |
| if (isDecimalDigit(char)) { |
| index += 1; |
| value += char; |
| } |
| } |
| |
| while (index < length) { |
| char = this.peek(index); |
| if (!isDecimalDigit(char)) { |
| break; |
| } |
| value += char; |
| index += 1; |
| } |
| } |
| |
| // Decimal digits. |
| |
| if (char === ".") { |
| value += char; |
| index += 1; |
| |
| while (index < length) { |
| char = this.peek(index); |
| if (!isDecimalDigit(char)) { |
| break; |
| } |
| value += char; |
| index += 1; |
| } |
| } |
| |
| // Exponent part. |
| |
| if (char === "e" || char === "E") { |
| value += char; |
| index += 1; |
| char = this.peek(index); |
| |
| if (char === "+" || char === "-") { |
| value += this.peek(index); |
| index += 1; |
| } |
| |
| char = this.peek(index); |
| if (isDecimalDigit(char)) { |
| value += char; |
| index += 1; |
| |
| while (index < length) { |
| char = this.peek(index); |
| if (!isDecimalDigit(char)) { |
| break; |
| } |
| value += char; |
| index += 1; |
| } |
| } else { |
| return null; |
| } |
| } |
| |
| if (index < length) { |
| char = this.peek(index); |
| if (isIdentifierStart(char)) { |
| return null; |
| } |
| } |
| |
| return { |
| type: Token.NumericLiteral, |
| value: value, |
| base: 10, |
| isMalformed: !isFinite(value) |
| }; |
| }, |
| |
| /* |
| * Extract a string out of the next sequence of characters and/or |
| * lines or return 'null' if its not possible. Since strings can |
| * span across multiple lines this method has to move the char |
| * pointer. |
| * |
| * This method recognizes pseudo-multiline JavaScript strings: |
| * |
| * var str = "hello\ |
| * world"; |
| */ |
| scanStringLiteral: function () { |
| var quote = this.peek(); |
| |
| // String must start with a quote. |
| if (quote !== "\"" && quote !== "'") { |
| return null; |
| } |
| |
| // In JSON strings must always use double quotes. |
| if (state.jsonMode && quote !== "\"") { |
| this.trigger("warning", { |
| code: "W108", |
| line: this.line, |
| character: this.char // +1? |
| }); |
| } |
| |
| var value = ""; |
| var startLine = this.line; |
| var startChar = this.char; |
| var allowNewLine = false; |
| |
| this.skip(); |
| |
| while (this.peek() !== quote) { |
| while (this.peek() === "") { // End Of Line |
| |
| // If an EOL is not preceded by a backslash, show a warning |
| // and proceed like it was a legit multi-line string where |
| // author simply forgot to escape the newline symbol. |
| // |
| // Another approach is to implicitly close a string on EOL |
| // but it generates too many false positives. |
| |
| if (!allowNewLine) { |
| this.trigger("warning", { |
| code: "W112", |
| line: this.line, |
| character: this.char |
| }); |
| } else { |
| allowNewLine = false; |
| |
| // Otherwise show a warning if multistr option was not set. |
| // For JSON, show warning no matter what. |
| |
| if (!state.option.multistr) { |
| this.trigger("warning", { |
| code: "W043", |
| line: this.line, |
| character: this.char |
| }); |
| } else if (state.jsonMode) { |
| this.trigger("warning", { |
| code: "W042", |
| line: this.line, |
| character: this.char |
| }); |
| } |
| } |
| |
| // If we get an EOF inside of an unclosed string, show an |
| // error and implicitly close it at the EOF point. |
| |
| if (!this.nextLine()) { |
| this.trigger("error", { |
| code: "E029", |
| line: startLine, |
| character: startChar |
| }); |
| |
| return { |
| type: Token.StringLiteral, |
| value: value, |
| isUnclosed: true, |
| quote: quote |
| }; |
| } |
| } |
| |
| allowNewLine = false; |
| var char = this.peek(); |
| var jump = 1; // A length of a jump, after we're done |
| // parsing this character. |
| |
| if (char < " ") { |
| // Warn about a control character in a string. |
| this.trigger("warning", { |
| code: "W113", |
| line: this.line, |
| character: this.char, |
| data: [ "<non-printable>" ] |
| }); |
| } |
| |
| // Special treatment for some escaped characters. |
| |
| if (char === "\\") { |
| this.skip(); |
| char = this.peek(); |
| |
| switch (char) { |
| case "'": |
| if (state.jsonMode) { |
| this.trigger("warning", { |
| code: "W114", |
| line: this.line, |
| character: this.char, |
| data: [ "\\'" ] |
| }); |
| } |
| break; |
| case "b": |
| char = "\b"; |
| break; |
| case "f": |
| char = "\f"; |
| break; |
| case "n": |
| char = "\n"; |
| break; |
| case "r": |
| char = "\r"; |
| break; |
| case "t": |
| char = "\t"; |
| break; |
| case "0": |
| char = "\0"; |
| |
| // Octal literals fail in strict mode. |
| // Check if the number is between 00 and 07. |
| var n = parseInt(this.peek(1), 10); |
| if (n >= 0 && n <= 7 && state.directive["use strict"]) { |
| this.trigger("warning", { |
| code: "W115", |
| line: this.line, |
| character: this.char |
| }); |
| } |
| break; |
| case "u": |
| char = String.fromCharCode(parseInt(this.input.substr(1, 4), 16)); |
| jump = 5; |
| break; |
| case "v": |
| if (state.jsonMode) { |
| this.trigger("warning", { |
| code: "W114", |
| line: this.line, |
| character: this.char, |
| data: [ "\\v" ] |
| }); |
| } |
| |
| char = "\v"; |
| break; |
| case "x": |
| var x = parseInt(this.input.substr(1, 2), 16); |
| |
| if (state.jsonMode) { |
| this.trigger("warning", { |
| code: "W114", |
| line: this.line, |
| character: this.char, |
| data: [ "\\x-" ] |
| }); |
| } |
| |
| char = String.fromCharCode(x); |
| jump = 3; |
| break; |
| case "\\": |
| case "\"": |
| case "/": |
| break; |
| case "": |
| allowNewLine = true; |
| char = ""; |
| break; |
| case "!": |
| if (value.slice(value.length - 2) === "<") { |
| break; |
| } |
| |
| /*falls through */ |
| default: |
| // Weird escaping. |
| this.trigger("warning", { |
| code: "W044", |
| line: this.line, |
| character: this.char |
| }); |
| } |
| } |
| |
| value += char; |
| this.skip(jump); |
| } |
| |
| this.skip(); |
| return { |
| type: Token.StringLiteral, |
| value: value, |
| isUnclosed: false, |
| quote: quote |
| }; |
| }, |
| |
| /* |
| * Extract a regular expression out of the next sequence of |
| * characters and/or lines or return 'null' if its not possible. |
| * |
| * This method is platform dependent: it accepts almost any |
| * regular expression values but then tries to compile and run |
| * them using system's RegExp object. This means that there are |
| * rare edge cases where one JavaScript engine complains about |
| * your regular expression while others don't. |
| */ |
| scanRegExp: function () { |
| var index = 0; |
| var length = this.input.length; |
| var char = this.peek(); |
| var value = char; |
| var body = ""; |
| var flags = []; |
| var malformed = false; |
| var isCharSet = false; |
| var terminated; |
| |
| var scanUnexpectedChars = function () { |
| // Unexpected control character |
| if (char < " ") { |
| malformed = true; |
| this.trigger("warning", { |
| code: "W048", |
| line: this.line, |
| character: this.char |
| }); |
| } |
| |
| // Unexpected escaped character |
| if (char === "<") { |
| malformed = true; |
| this.trigger("warning", { |
| code: "W049", |
| line: this.line, |
| character: this.char, |
| data: [ char ] |
| }); |
| } |
| }.bind(this); |
| |
| // Regular expressions must start with '/' |
| if (!this.prereg || char !== "/") { |
| return null; |
| } |
| |
| index += 1; |
| terminated = false; |
| |
| // Try to get everything in between slashes. A couple of |
| // cases aside (see scanUnexpectedChars) we don't really |
| // care whether the resulting expression is valid or not. |
| // We will check that later using the RegExp object. |
| |
| while (index < length) { |
| char = this.peek(index); |
| value += char; |
| body += char; |
| |
| if (isCharSet) { |
| if (char === "]") { |
| if (this.peek(index - 1) !== "\\" || this.peek(index - 2) === "\\") { |
| isCharSet = false; |
| } |
| } |
| |
| if (char === "\\") { |
| index += 1; |
| char = this.peek(index); |
| body += char; |
| value += char; |
| |
| scanUnexpectedChars(); |
| } |
| |
| index += 1; |
| continue; |
| } |
| |
| if (char === "\\") { |
| index += 1; |
| char = this.peek(index); |
| body += char; |
| value += char; |
| |
| scanUnexpectedChars(); |
| |
| if (char === "/") { |
| index += 1; |
| continue; |
| } |
| |
| if (char === "[") { |
| index += 1; |
| continue; |
| } |
| } |
| |
| if (char === "[") { |
| isCharSet = true; |
| index += 1; |
| continue; |
| } |
| |
| if (char === "/") { |
| body = body.substr(0, body.length - 1); |
| terminated = true; |
| index += 1; |
| break; |
| } |
| |
| index += 1; |
| } |
| |
| // A regular expression that was never closed is an |
| // error from which we cannot recover. |
| |
| if (!terminated) { |
| this.trigger("error", { |
| code: "E015", |
| line: this.line, |
| character: this.from |
| }); |
| |
| return void this.trigger("fatal", { |
| line: this.line, |
| from: this.from |
| }); |
| } |
| |
| // Parse flags (if any). |
| |
| while (index < length) { |
| char = this.peek(index); |
| if (!/[gim]/.test(char)) { |
| break; |
| } |
| flags.push(char); |
| value += char; |
| index += 1; |
| } |
| |
| // Check regular expression for correctness. |
| |
| try { |
| new RegExp(body, flags.join("")); |
| } catch (err) { |
| malformed = true; |
| this.trigger("error", { |
| code: "E016", |
| line: this.line, |
| character: this.char, |
| data: [ err.message ] // Platform dependent! |
| }); |
| } |
| |
| return { |
| type: Token.RegExp, |
| value: value, |
| flags: flags, |
| isMalformed: malformed |
| }; |
| }, |
| |
| /* |
| * Scan for any occurence of mixed tabs and spaces. If smarttabs option |
| * is on, ignore tabs followed by spaces. |
| * |
| * Tabs followed by one space followed by a block comment are allowed. |
| */ |
| scanMixedSpacesAndTabs: function () { |
| var at, match; |
| |
| if (state.option.smarttabs) { |
| // Negative look-behind for "//" |
| match = this.input.match(/(\/\/)? \t/); |
| at = match && !match[1] ? 0 : -1; |
| } else { |
| at = this.input.search(/ \t|\t [^\*]/); |
| } |
| |
| return at; |
| }, |
| |
| /* |
| * Scan for characters that get silently deleted by one or more browsers. |
| */ |
| scanUnsafeChars: function () { |
| return this.input.search(reg.unsafeChars); |
| }, |
| |
| /* |
| * Produce the next raw token or return 'null' if no tokens can be matched. |
| * This method skips over all space characters. |
| */ |
| next: function () { |
| this.from = this.char; |
| |
| // Move to the next non-space character. |
| var start; |
| if (/\s/.test(this.peek())) { |
| start = this.char; |
| |
| while (/\s/.test(this.peek())) { |
| this.from += 1; |
| this.skip(); |
| } |
| |
| if (this.peek() === "") { // EOL |
| if (state.option.trailing) { |
| this.trigger("warning", { code: "W102", line: this.line, character: start }); |
| } |
| } |
| } |
| |
| // Methods that work with multi-line structures and move the |
| // character pointer. |
| |
| var match = this.scanComments() || |
| this.scanStringLiteral(); |
| |
| if (match) { |
| return match; |
| } |
| |
| // Methods that don't move the character pointer. |
| |
| match = |
| this.scanRegExp() || |
| this.scanPunctuator() || |
| this.scanKeyword() || |
| this.scanIdentifier() || |
| this.scanNumericLiteral(); |
| |
| if (match) { |
| this.skip(match.value.length); |
| return match; |
| } |
| |
| // No token could be matched, give up. |
| |
| return null; |
| }, |
| |
| /* |
| * Switch to the next line and reset all char pointers. Once |
| * switched, this method also checks for mixed spaces and tabs |
| * and other minor warnings. |
| */ |
| nextLine: function () { |
| var char; |
| |
| if (this.line >= this.lines.length) { |
| return false; |
| } |
| |
| this.input = this.lines[this.line]; |
| this.line += 1; |
| this.char = 1; |
| this.from = 1; |
| |
| char = this.scanMixedSpacesAndTabs(); |
| if (char >= 0) { |
| this.trigger("warning", { code: "W099", line: this.line, character: char + 1 }); |
| } |
| |
| this.input = this.input.replace(/\t/g, state.tab); |
| char = this.scanUnsafeChars(); |
| |
| if (char >= 0) { |
| this.trigger("warning", { code: "W100", line: this.line, character: char }); |
| } |
| |
| // If there is a limit on line length, warn when lines get too |
| // long. |
| |
| if (state.option.maxlen && state.option.maxlen < this.input.length) { |
| this.trigger("warning", { code: "W101", line: this.line, character: this.input.length }); |
| } |
| |
| return true; |
| }, |
| |
| /* |
| * This is simply a synonym for nextLine() method with a friendlier |
| * public name. |
| */ |
| start: function () { |
| this.nextLine(); |
| }, |
| |
| /* |
| * Produce the next token. This function is called by advance() to get |
| * the next token. It retuns a token in a JSLint-compatible format. |
| */ |
| token: function () { |
| var token; |
| |
| function isReserved(token, isProperty) { |
| if (!token.reserved) { |
| return false; |
| } |
| |
| if (token.meta && token.meta.isFutureReservedWord) { |
| // ES3 FutureReservedWord in an ES5 environment. |
| if (state.option.es5 && !token.meta.es5) { |
| return false; |
| } |
| |
| // Some ES5 FutureReservedWord identifiers are active only |
| // within a strict mode environment. |
| if (token.meta.strictOnly) { |
| if (!state.option.strict && !state.directive["use strict"]) { |
| return false; |
| } |
| } |
| |
| if (isProperty) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| // Produce a token object. |
| var create = function (type, value, isProperty) { |
| /*jshint validthis:true */ |
| var obj; |
| |
| if (type !== "(endline)" && type !== "(end)") { |
| this.prereg = false; |
| } |
| |
| if (type === "(punctuator)") { |
| switch (value) { |
| case ".": |
| case ")": |
| case "~": |
| case "#": |
| case "]": |
| this.prereg = false; |
| break; |
| default: |
| this.prereg = true; |
| } |
| |
| obj = Object.create(state.syntax[value] || state.syntax["(error)"]); |
| } |
| |
| if (type === "(identifier)") { |
| if (value === "return" || value === "case" || value === "typeof") { |
| this.prereg = true; |
| } |
| |
| if (_.has(state.syntax, value)) { |
| obj = Object.create(state.syntax[value] || state.syntax["(error)"]); |
| |
| // If this can't be a reserved keyword, reset the object. |
| if (!isReserved(obj, isProperty && type === "(identifier)")) { |
| obj = null; |
| } |
| } |
| } |
| |
| if (!obj) { |
| obj = Object.create(state.syntax[type]); |
| } |
| |
| obj.identifier = (type === "(identifier)"); |
| obj.type = obj.type || type; |
| obj.value = value; |
| obj.line = this.line; |
| obj.character = this.char; |
| obj.from = this.from; |
| |
| if (isProperty && obj.identifier) { |
| obj.isProperty = isProperty; |
| } |
| |
| return obj; |
| }.bind(this); |
| |
| for (;;) { |
| if (!this.input.length) { |
| return create(this.nextLine() ? "(endline)" : "(end)", ""); |
| } |
| |
| token = this.next(); |
| |
| if (!token) { |
| if (this.input.length) { |
| // Unexpected character. |
| this.trigger("error", { |
| code: "E024", |
| line: this.line, |
| character: this.char, |
| data: [ this.peek() ] |
| }); |
| |
| this.input = ""; |
| } |
| |
| continue; |
| } |
| |
| switch (token.type) { |
| case Token.StringLiteral: |
| this.trigger("String", { |
| line: this.line, |
| char: this.char, |
| from: this.from, |
| value: token.value, |
| quote: token.quote |
| }); |
| |
| return create("(string)", token.value); |
| case Token.Identifier: |
| this.trigger("Identifier", { |
| line: this.line, |
| char: this.char, |
| from: this.form, |
| name: token.value, |
| isProperty: state.tokens.curr.id === "." |
| }); |
| |
| /* falls through */ |
| case Token.Keyword: |
| case Token.NullLiteral: |
| case Token.BooleanLiteral: |
| return create("(identifier)", token.value, state.tokens.curr.id === "."); |
| |
| case Token.NumericLiteral: |
| if (token.isMalformed) { |
| this.trigger("warning", { |
| code: "W045", |
| line: this.line, |
| character: this.char, |
| data: [ token.value ] |
| }); |
| } |
| |
| if (state.jsonMode && token.base === 16) { |
| this.trigger("warning", { |
| code: "W114", |
| line: this.line, |
| character: this.char, |
| data: [ "0x-" ] |
| }); |
| } |
| |
| if (state.directive["use strict"] && token.base === 8) { |
| this.trigger("warning", { |
| code: "W115", |
| line: this.line, |
| character: this.char |
| }); |
| } |
| |
| this.trigger("Number", { |
| line: this.line, |
| char: this.char, |
| from: this.from, |
| value: token.value, |
| base: token.base, |
| isMalformed: token.malformed |
| }); |
| |
| return create("(number)", token.value); |
| |
| case Token.RegExp: |
| return create("(regexp)", token.value); |
| |
| case Token.Comment: |
| state.tokens.curr.comment = true; |
| |
| if (token.isSpecial) { |
| return { |
| value: token.value, |
| body: token.body, |
| type: token.commentType, |
| isSpecial: token.isSpecial, |
| line: this.line, |
| character: this.char, |
| from: this.from |
| }; |
| } |
| |
| break; |
| |
| case "": |
| break; |
| |
| default: |
| return create("(punctuator)", token.value); |
| } |
| } |
| } |
| }; |
| |
| exports.Lexer = Lexer; |