| /* |
| * Lexical analysis and token construction. |
| */ |
| |
| "use strict"; |
| |
| var _ = require("lodash"); |
| var events = require("events"); |
| var reg = require("./reg.js"); |
| var state = require("./state.js").state; |
| |
| var unicodeData = require("../data/ascii-identifier-data.js"); |
| var asciiIdentifierStartTable = unicodeData.asciiIdentifierStartTable; |
| var asciiIdentifierPartTable = unicodeData.asciiIdentifierPartTable; |
| var nonAsciiIdentifierStartTable = require("../data/non-ascii-identifier-start.js"); |
| var nonAsciiIdentifierPartTable = require("../data/non-ascii-identifier-part-only.js"); |
| |
| // Some of these token types are from JavaScript Parser API |
| // while others are specific to JSHint parser. |
| // JS Parser API: https://developer.mozilla.org/en-US/docs/SpiderMonkey/Parser_API |
| |
| var Token = { |
| Identifier: 1, |
| Punctuator: 2, |
| NumericLiteral: 3, |
| StringLiteral: 4, |
| Comment: 5, |
| Keyword: 6, |
| NullLiteral: 7, |
| BooleanLiteral: 8, |
| RegExp: 9, |
| TemplateHead: 10, |
| TemplateMiddle: 11, |
| TemplateTail: 12, |
| NoSubstTemplate: 13 |
| }; |
| |
| var Context = { |
| Block: 1, |
| Template: 2 |
| }; |
| |
| // Object that handles postponed lexing verifications that checks the parsed |
| // environment state. |
| |
| function asyncTrigger() { |
| var _checks = []; |
| |
| return { |
| push: function(fn) { |
| _checks.push(fn); |
| }, |
| |
| check: function() { |
| for (var check = 0; check < _checks.length; ++check) { |
| _checks[check](); |
| } |
| |
| _checks.splice(0, _checks.length); |
| } |
| }; |
| } |
| |
| /* |
| * Lexer for JSHint. |
| * |
| * This object does a char-by-char scan of the provided source code |
| * and produces a sequence of tokens. |
| * |
| * var lex = new Lexer("var i = 0;"); |
| * lex.start(); |
| * lex.token(); // returns the next token |
| * |
| * You have to use the token() method to move the lexer forward |
| * but you don't have to use its return value to get tokens. In addition |
| * to token() method returning the next token, the Lexer object also |
| * emits events. |
| * |
| * lex.on("Identifier", function(data) { |
| * if (data.name.indexOf("_") >= 0) { |
| * // Produce a warning. |
| * } |
| * }); |
| * |
| * Note that the token() method returns tokens in a JSLint-compatible |
| * format while the event emitter uses a slightly modified version of |
| * Mozilla's JavaScript Parser API. Eventually, we will move away from |
| * JSLint format. |
| */ |
| function Lexer(source) { |
| var lines = source; |
| |
| if (typeof lines === "string") { |
| lines = lines |
| .replace(/\r\n/g, "\n") |
| .replace(/\r/g, "\n") |
| .split("\n"); |
| } |
| |
| // If the first line is a shebang (#!), make it a blank and move on. |
| // Shebangs are used by Node scripts. |
| |
| if (lines[0] && lines[0].substr(0, 2) === "#!") { |
| if (lines[0].indexOf("node") !== -1) { |
| state.option.node = true; |
| } |
| lines[0] = ""; |
| } |
| |
| this.emitter = new events.EventEmitter(); |
| this.source = source; |
| this.setLines(lines); |
| this.prereg = true; |
| |
| this.line = 0; |
| this.char = 1; |
| this.from = 1; |
| this.input = ""; |
| this.inComment = false; |
| this.context = []; |
| this.templateStarts = []; |
| |
| for (var i = 0; i < state.option.indent; i += 1) { |
| state.tab += " "; |
| } |
| } |
| |
| Lexer.prototype = { |
| _lines: [], |
| |
| inContext: function(ctxType) { |
| return this.context.length > 0 && this.context[this.context.length - 1].type === ctxType; |
| }, |
| |
| pushContext: function(ctxType) { |
| this.context.push({ type: ctxType }); |
| }, |
| |
| popContext: function() { |
| return this.context.pop(); |
| }, |
| |
| isContext: function(context) { |
| return this.context.length > 0 && this.context[this.context.length - 1] === context; |
| }, |
| |
| currentContext: function() { |
| return this.context.length > 0 && this.context[this.context.length - 1]; |
| }, |
| |
| getLines: function() { |
| this._lines = state.lines; |
| return this._lines; |
| }, |
| |
| setLines: function(val) { |
| this._lines = val; |
| state.lines = this._lines; |
| }, |
| |
| /* |
| * Return the next i character without actually moving the |
| * char pointer. |
| */ |
| peek: function(i) { |
| return this.input.charAt(i || 0); |
| }, |
| |
| /* |
| * Move the char pointer forward i times. |
| */ |
| skip: function(i) { |
| i = i || 1; |
| this.char += i; |
| this.input = this.input.slice(i); |
| }, |
| |
| /* |
| * Subscribe to a token event. The API for this method is similar |
| * Underscore.js i.e. you can subscribe to multiple events with |
| * one call: |
| * |
| * lex.on("Identifier Number", function(data) { |
| * // ... |
| * }); |
| */ |
| on: function(names, listener) { |
| names.split(" ").forEach(function(name) { |
| this.emitter.on(name, listener); |
| }.bind(this)); |
| }, |
| |
| /* |
| * Trigger a token event. All arguments will be passed to each |
| * listener. |
| */ |
| trigger: function() { |
| this.emitter.emit.apply(this.emitter, Array.prototype.slice.call(arguments)); |
| }, |
| |
| /* |
| * Postpone a token event. the checking condition is set as |
| * last parameter, and the trigger function is called in a |
| * stored callback. To be later called using the check() function |
| * by the parser. This avoids parser's peek() to give the lexer |
| * a false context. |
| */ |
| triggerAsync: function(type, args, checks, fn) { |
| checks.push(function() { |
| if (fn()) { |
| this.trigger(type, args); |
| } |
| }.bind(this)); |
| }, |
| |
| /* |
| * Extract a punctuator out of the next sequence of characters |
| * or return 'null' if its not possible. |
| * |
| * This method's implementation was heavily influenced by the |
| * scanPunctuator function in the Esprima parser's source code. |
| */ |
| scanPunctuator: function() { |
| var ch1 = this.peek(); |
| var ch2, ch3, ch4; |
| |
| switch (ch1) { |
| // Most common single-character punctuators |
| case ".": |
| if ((/^[0-9]$/).test(this.peek(1))) { |
| return null; |
| } |
| if (this.peek(1) === "." && this.peek(2) === ".") { |
| return { |
| type: Token.Punctuator, |
| value: "..." |
| }; |
| } |
| /* falls through */ |
| case "(": |
| case ")": |
| case ";": |
| case ",": |
| case "[": |
| case "]": |
| case ":": |
| case "~": |
| case "?": |
| return { |
| type: Token.Punctuator, |
| value: ch1 |
| }; |
| |
| // A block/object opener |
| case "{": |
| this.pushContext(Context.Block); |
| return { |
| type: Token.Punctuator, |
| value: ch1 |
| }; |
| |
| // A block/object closer |
| case "}": |
| if (this.inContext(Context.Block)) { |
| this.popContext(); |
| } |
| return { |
| type: Token.Punctuator, |
| value: ch1 |
| }; |
| |
| // A pound sign (for Node shebangs) |
| case "#": |
| return { |
| type: Token.Punctuator, |
| value: ch1 |
| }; |
| |
| // We're at the end of input |
| case "": |
| return null; |
| } |
| |
| // Peek more characters |
| |
| ch2 = this.peek(1); |
| ch3 = this.peek(2); |
| ch4 = this.peek(3); |
| |
| // 4-character punctuator: >>>= |
| |
| if (ch1 === ">" && ch2 === ">" && ch3 === ">" && ch4 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: ">>>=" |
| }; |
| } |
| |
| // 3-character punctuators: === !== >>> <<= >>= |
| |
| if (ch1 === "=" && ch2 === "=" && ch3 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: "===" |
| }; |
| } |
| |
| if (ch1 === "!" && ch2 === "=" && ch3 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: "!==" |
| }; |
| } |
| |
| if (ch1 === ">" && ch2 === ">" && ch3 === ">") { |
| return { |
| type: Token.Punctuator, |
| value: ">>>" |
| }; |
| } |
| |
| if (ch1 === "<" && ch2 === "<" && ch3 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: "<<=" |
| }; |
| } |
| |
| if (ch1 === ">" && ch2 === ">" && ch3 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: ">>=" |
| }; |
| } |
| |
| // Fat arrow punctuator |
| if (ch1 === "=" && ch2 === ">") { |
| return { |
| type: Token.Punctuator, |
| value: ch1 + ch2 |
| }; |
| } |
| |
| // 2-character punctuators: <= >= == != ++ -- << >> && || |
| // += -= *= %= &= |= ^= (but not /=, see below) |
| if (ch1 === ch2 && ("+-<>&|".indexOf(ch1) >= 0)) { |
| return { |
| type: Token.Punctuator, |
| value: ch1 + ch2 |
| }; |
| } |
| |
| if ("<>=!+-*%&|^".indexOf(ch1) >= 0) { |
| if (ch2 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: ch1 + ch2 |
| }; |
| } |
| |
| return { |
| type: Token.Punctuator, |
| value: ch1 |
| }; |
| } |
| |
| // Special case: /=. |
| |
| if (ch1 === "/") { |
| if (ch2 === "=") { |
| return { |
| type: Token.Punctuator, |
| value: "/=" |
| }; |
| } |
| |
| return { |
| type: Token.Punctuator, |
| value: "/" |
| }; |
| } |
| |
| return null; |
| }, |
| |
| /* |
| * Extract a comment out of the next sequence of characters and/or |
| * lines or return 'null' if its not possible. Since comments can |
| * span across multiple lines this method has to move the char |
| * pointer. |
| * |
| * In addition to normal JavaScript comments (// and /*) this method |
| * also recognizes JSHint- and JSLint-specific comments such as |
| * /*jshint, /*jslint, /*globals and so on. |
| */ |
| scanComments: function() { |
| var ch1 = this.peek(); |
| var ch2 = this.peek(1); |
| var rest = this.input.substr(2); |
| var startLine = this.line; |
| var startChar = this.char; |
| |
| // Create a comment token object and make sure it |
| // has all the data JSHint needs to work with special |
| // comments. |
| |
| function commentToken(label, body, opt) { |
| var special = ["jshint", "jslint", "members", "member", "globals", "global", "exported"]; |
| var isSpecial = false; |
| var value = label + body; |
| var commentType = "plain"; |
| opt = opt || {}; |
| |
| if (opt.isMultiline) { |
| value += "*/"; |
| } |
| |
| body = body.replace(/\n/g, " "); |
| |
| special.forEach(function(str) { |
| if (isSpecial) { |
| return; |
| } |
| |
| // Don't recognize any special comments other than jshint for single-line |
| // comments. This introduced many problems with legit comments. |
| if (label === "//" && str !== "jshint") { |
| return; |
| } |
| |
| if (body.charAt(str.length) === " " && body.substr(0, str.length) === str) { |
| isSpecial = true; |
| label = label + str; |
| body = body.substr(str.length); |
| } |
| |
| if (!isSpecial && body.charAt(0) === " " && body.charAt(str.length + 1) === " " && |
| body.substr(1, str.length) === str) { |
| isSpecial = true; |
| label = label + " " + str; |
| body = body.substr(str.length + 1); |
| } |
| |
| if (!isSpecial) { |
| return; |
| } |
| |
| switch (str) { |
| case "member": |
| commentType = "members"; |
| break; |
| case "global": |
| commentType = "globals"; |
| break; |
| default: |
| commentType = str; |
| } |
| }); |
| |
| return { |
| type: Token.Comment, |
| commentType: commentType, |
| value: value, |
| body: body, |
| isSpecial: isSpecial, |
| isMultiline: opt.isMultiline || false, |
| isMalformed: opt.isMalformed || false |
| }; |
| } |
| |
| // End of unbegun comment. Raise an error and skip that input. |
| if (ch1 === "*" && ch2 === "/") { |
| this.trigger("error", { |
| code: "E018", |
| line: startLine, |
| character: startChar |
| }); |
| |
| this.skip(2); |
| return null; |
| } |
| |
| // Comments must start either with // or /* |
| if (ch1 !== "/" || (ch2 !== "*" && ch2 !== "/")) { |
| return null; |
| } |
| |
| // One-line comment |
| if (ch2 === "/") { |
| this.skip(this.input.length); // Skip to the EOL. |
| return commentToken("//", rest); |
| } |
| |
| var body = ""; |
| |
| /* Multi-line comment */ |
| if (ch2 === "*") { |
| this.inComment = true; |
| this.skip(2); |
| |
| while (this.peek() !== "*" || this.peek(1) !== "/") { |
| if (this.peek() === "") { // End of Line |
| body += "\n"; |
| |
| // If we hit EOF and our comment is still unclosed, |
| // trigger an error and end the comment implicitly. |
| if (!this.nextLine()) { |
| this.trigger("error", { |
| code: "E017", |
| line: startLine, |
| character: startChar |
| }); |
| |
| this.inComment = false; |
| return commentToken("/*", body, { |
| isMultiline: true, |
| isMalformed: true |
| }); |
| } |
| } else { |
| body += this.peek(); |
| this.skip(); |
| } |
| } |
| |
| this.skip(2); |
| this.inComment = false; |
| return commentToken("/*", body, { isMultiline: true }); |
| } |
| }, |
| |
| /* |
| * Extract a keyword out of the next sequence of characters or |
| * return 'null' if its not possible. |
| */ |
| scanKeyword: function() { |
| var result = /^[a-zA-Z_$][a-zA-Z0-9_$]*/.exec(this.input); |
| var keywords = [ |
| "if", "in", "do", "var", "for", "new", |
| "try", "let", "this", "else", "case", |
| "void", "with", "enum", "while", "break", |
| "catch", "throw", "const", "yield", "class", |
| "super", "return", "typeof", "delete", |
| "switch", "export", "import", "default", |
| "finally", "extends", "function", "continue", |
| "debugger", "instanceof" |
| ]; |
| |
| if (result && keywords.indexOf(result[0]) >= 0) { |
| return { |
| type: Token.Keyword, |
| value: result[0] |
| }; |
| } |
| |
| return null; |
| }, |
| |
| /* |
| * Extract a JavaScript identifier out of the next sequence of |
| * characters or return 'null' if its not possible. In addition, |
| * to Identifier this method can also produce BooleanLiteral |
| * (true/false) and NullLiteral (null). |
| */ |
| scanIdentifier: function() { |
| var id = ""; |
| var index = 0; |
| var type, char; |
| |
| function isNonAsciiIdentifierStart(code) { |
| return nonAsciiIdentifierStartTable.indexOf(code) > -1; |
| } |
| |
| function isNonAsciiIdentifierPart(code) { |
| return isNonAsciiIdentifierStart(code) || nonAsciiIdentifierPartTable.indexOf(code) > -1; |
| } |
| |
| function isHexDigit(str) { |
| return (/^[0-9a-fA-F]$/).test(str); |
| } |
| |
| var readUnicodeEscapeSequence = function() { |
| /*jshint validthis:true */ |
| index += 1; |
| |
| if (this.peek(index) !== "u") { |
| return null; |
| } |
| |
| var ch1 = this.peek(index + 1); |
| var ch2 = this.peek(index + 2); |
| var ch3 = this.peek(index + 3); |
| var ch4 = this.peek(index + 4); |
| var code; |
| |
| if (isHexDigit(ch1) && isHexDigit(ch2) && isHexDigit(ch3) && isHexDigit(ch4)) { |
| code = parseInt(ch1 + ch2 + ch3 + ch4, 16); |
| |
| if (asciiIdentifierPartTable[code] || isNonAsciiIdentifierPart(code)) { |
| index += 5; |
| return "\\u" + ch1 + ch2 + ch3 + ch4; |
| } |
| |
| return null; |
| } |
| |
| return null; |
| }.bind(this); |
| |
| var getIdentifierStart = function() { |
| /*jshint validthis:true */ |
| var chr = this.peek(index); |
| var code = chr.charCodeAt(0); |
| |
| if (code === 92) { |
| return readUnicodeEscapeSequence(); |
| } |
| |
| if (code < 128) { |
| if (asciiIdentifierStartTable[code]) { |
| index += 1; |
| return chr; |
| } |
| |
| return null; |
| } |
| |
| if (isNonAsciiIdentifierStart(code)) { |
| index += 1; |
| return chr; |
| } |
| |
| return null; |
| }.bind(this); |
| |
| var getIdentifierPart = function() { |
| /*jshint validthis:true */ |
| var chr = this.peek(index); |
| var code = chr.charCodeAt(0); |
| |
| if (code === 92) { |
| return readUnicodeEscapeSequence(); |
| } |
| |
| if (code < 128) { |
| if (asciiIdentifierPartTable[code]) { |
| index += 1; |
| return chr; |
| } |
| |
| return null; |
| } |
| |
| if (isNonAsciiIdentifierPart(code)) { |
| index += 1; |
| return chr; |
| } |
| |
| return null; |
| }.bind(this); |
| |
| function removeEscapeSequences(id) { |
| return id.replace(/\\u([0-9a-fA-F]{4})/g, function(m0, codepoint) { |
| return String.fromCharCode(parseInt(codepoint, 16)); |
| }); |
| } |
| |
| char = getIdentifierStart(); |
| if (char === null) { |
| return null; |
| } |
| |
| id = char; |
| for (;;) { |
| char = getIdentifierPart(); |
| |
| if (char === null) { |
| break; |
| } |
| |
| id += char; |
| } |
| |
| switch (id) { |
| case "true": |
| case "false": |
| type = Token.BooleanLiteral; |
| break; |
| case "null": |
| type = Token.NullLiteral; |
| break; |
| default: |
| type = Token.Identifier; |
| } |
| |
| return { |
| type: type, |
| value: removeEscapeSequences(id), |
| text: id, |
| tokenLength: id.length |
| }; |
| }, |
| |
| /* |
| * Extract a numeric literal out of the next sequence of |
| * characters or return 'null' if its not possible. This method |
| * supports all numeric literals described in section 7.8.3 |
| * of the EcmaScript 5 specification. |
| * |
| * This method's implementation was heavily influenced by the |
| * scanNumericLiteral function in the Esprima parser's source code. |
| */ |
| scanNumericLiteral: function() { |
| var index = 0; |
| var value = ""; |
| var length = this.input.length; |
| var char = this.peek(index); |
| var bad; |
| var isAllowedDigit = isDecimalDigit; |
| var base = 10; |
| var isLegacy = false; |
| |
| function isDecimalDigit(str) { |
| return (/^[0-9]$/).test(str); |
| } |
| |
| function isOctalDigit(str) { |
| return (/^[0-7]$/).test(str); |
| } |
| |
| function isBinaryDigit(str) { |
| return (/^[01]$/).test(str); |
| } |
| |
| function isHexDigit(str) { |
| return (/^[0-9a-fA-F]$/).test(str); |
| } |
| |
| function isIdentifierStart(ch) { |
| return (ch === "$") || (ch === "_") || (ch === "\\") || |
| (ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z"); |
| } |
| |
| // Numbers must start either with a decimal digit or a point. |
| |
| if (char !== "." && !isDecimalDigit(char)) { |
| return null; |
| } |
| |
| if (char !== ".") { |
| value = this.peek(index); |
| index += 1; |
| char = this.peek(index); |
| |
| if (value === "0") { |
| // Base-16 numbers. |
| if (char === "x" || char === "X") { |
| isAllowedDigit = isHexDigit; |
| base = 16; |
| |
| index += 1; |
| value += char; |
| } |
| |
| // Base-8 numbers. |
| if (char === "o" || char === "O") { |
| isAllowedDigit = isOctalDigit; |
| base = 8; |
| |
| if (!state.option.esnext) { |
| this.trigger("warning", { |
| code: "W119", |
| line: this.line, |
| character: this.char, |
| data: [ "Octal integer literal" ] |
| }); |
| } |
| |
| index += 1; |
| value += char; |
| } |
| |
| // Base-2 numbers. |
| if (char === "b" || char === "B") { |
| isAllowedDigit = isBinaryDigit; |
| base = 2; |
| |
| if (!state.option.esnext) { |
| this.trigger("warning", { |
| code: "W119", |
| line: this.line, |
| character: this.char, |
| data: [ "Binary integer literal" ] |
| }); |
| } |
| |
| index += 1; |
| value += char; |
| } |
| |
| // Legacy base-8 numbers. |
| if (isOctalDigit(char)) { |
| isAllowedDigit = isOctalDigit; |
| base = 8; |
| isLegacy = true; |
| bad = false; |
| |
| index += 1; |
| value += char; |
| } |
| |
| // Decimal numbers that start with '0' such as '09' are illegal |
| // but we still parse them and return as malformed. |
| |
| if (!isOctalDigit(char) && isDecimalDigit(char)) { |
| index += 1; |
| value += char; |
| } |
| } |
| |
| while (index < length) { |
| char = this.peek(index); |
| |
| if (isLegacy && isDecimalDigit(char)) { |
| // Numbers like '019' (note the 9) are not valid octals |
| // but we still parse them and mark as malformed. |
| bad = true; |
| } else if (!isAllowedDigit(char)) { |
| break; |
| } |
| value += char; |
| index += 1; |
| } |
| |
| if (isAllowedDigit !== isDecimalDigit) { |
| if (!isLegacy && value.length <= 2) { // 0x |
| return { |
| type: Token.NumericLiteral, |
| value: value, |
| isMalformed: true |
| }; |
| } |
| |
| if (index < length) { |
| char = this.peek(index); |
| if (isIdentifierStart(char)) { |
| return null; |
| } |
| } |
| |
| return { |
| type: Token.NumericLiteral, |
| value: value, |
| base: base, |
| isLegacy: isLegacy, |
| isMalformed: false |
| }; |
| } |
| } |
| |
| // Decimal digits. |
| |
| if (char === ".") { |
| value += char; |
| index += 1; |
| |
| while (index < length) { |
| char = this.peek(index); |
| if (!isDecimalDigit(char)) { |
| break; |
| } |
| value += char; |
| index += 1; |
| } |
| } |
| |
| // Exponent part. |
| |
| if (char === "e" || char === "E") { |
| value += char; |
| index += 1; |
| char = this.peek(index); |
| |
| if (char === "+" || char === "-") { |
| value += this.peek(index); |
| index += 1; |
| } |
| |
| char = this.peek(index); |
| if (isDecimalDigit(char)) { |
| value += char; |
| index += 1; |
| |
| while (index < length) { |
| char = this.peek(index); |
| if (!isDecimalDigit(char)) { |
| break; |
| } |
| value += char; |
| index += 1; |
| } |
| } else { |
| return null; |
| } |
| } |
| |
| if (index < length) { |
| char = this.peek(index); |
| if (isIdentifierStart(char)) { |
| return null; |
| } |
| } |
| |
| return { |
| type: Token.NumericLiteral, |
| value: value, |
| base: base, |
| isMalformed: !isFinite(value) |
| }; |
| }, |
| |
| |
| // Assumes previously parsed character was \ (=== '\\') and was not skipped. |
| scanEscapeSequence: function(checks) { |
| var allowNewLine = false; |
| var jump = 1; |
| this.skip(); |
| var char = this.peek(); |
| |
| switch (char) { |
| case "'": |
| this.triggerAsync("warning", { |
| code: "W114", |
| line: this.line, |
| character: this.char, |
| data: [ "\\'" ] |
| }, checks, function() {return state.jsonMode; }); |
| break; |
| case "b": |
| char = "\\b"; |
| break; |
| case "f": |
| char = "\\f"; |
| break; |
| case "n": |
| char = "\\n"; |
| break; |
| case "r": |
| char = "\\r"; |
| break; |
| case "t": |
| char = "\\t"; |
| break; |
| case "0": |
| char = "\\0"; |
| |
| // Octal literals fail in strict mode. |
| // Check if the number is between 00 and 07. |
| var n = parseInt(this.peek(1), 10); |
| this.triggerAsync("warning", { |
| code: "W115", |
| line: this.line, |
| character: this.char |
| }, checks, |
| function() { return n >= 0 && n <= 7 && state.isStrict(); }); |
| break; |
| case "u": |
| var hexCode = this.input.substr(1, 4); |
| var code = parseInt(hexCode, 16); |
| if (isNaN(code)) { |
| this.trigger("warning", { |
| code: "W052", |
| line: this.line, |
| character: this.char, |
| data: [ "u" + hexCode ] |
| }); |
| } |
| char = String.fromCharCode(code); |
| jump = 5; |
| break; |
| case "v": |
| this.triggerAsync("warning", { |
| code: "W114", |
| line: this.line, |
| character: this.char, |
| data: [ "\\v" ] |
| }, checks, function() { return state.jsonMode; }); |
| |
| char = "\v"; |
| break; |
| case "x": |
| var x = parseInt(this.input.substr(1, 2), 16); |
| |
| this.triggerAsync("warning", { |
| code: "W114", |
| line: this.line, |
| character: this.char, |
| data: [ "\\x-" ] |
| }, checks, function() { return state.jsonMode; }); |
| |
| char = String.fromCharCode(x); |
| jump = 3; |
| break; |
| case "\\": |
| char = "\\\\"; |
| break; |
| case "\"": |
| char = "\\\""; |
| break; |
| case "/": |
| break; |
| case "": |
| allowNewLine = true; |
| char = ""; |
| break; |
| } |
| |
| return { char: char, jump: jump, allowNewLine: allowNewLine }; |
| }, |
| |
| /* |
| * Extract a template literal out of the next sequence of characters |
| * and/or lines or return 'null' if its not possible. Since template |
| * literals can span across multiple lines, this method has to move |
| * the char pointer. |
| */ |
| scanTemplateLiteral: function(checks) { |
| var tokenType; |
| var value = ""; |
| var ch; |
| var startLine = this.line; |
| var startChar = this.char; |
| var depth = this.templateStarts.length; |
| |
| if (!state.option.esnext) { |
| // Only lex template strings in ESNext mode. |
| return null; |
| } else if (this.peek() === "`") { |
| // Template must start with a backtick. |
| tokenType = Token.TemplateHead; |
| this.templateStarts.push({ line: this.line, char: this.char }); |
| depth = this.templateStarts.length; |
| this.skip(1); |
| this.pushContext(Context.Template); |
| } else if (this.inContext(Context.Template) && this.peek() === "}") { |
| // If we're in a template context, and we have a '}', lex a TemplateMiddle. |
| tokenType = Token.TemplateMiddle; |
| } else { |
| // Go lex something else. |
| return null; |
| } |
| |
| while (this.peek() !== "`") { |
| while ((ch = this.peek()) === "") { |
| value += "\n"; |
| if (!this.nextLine()) { |
| // Unclosed template literal --- point to the starting "`" |
| var startPos = this.templateStarts.pop(); |
| this.trigger("error", { |
| code: "E052", |
| line: startPos.line, |
| character: startPos.char |
| }); |
| return { |
| type: tokenType, |
| value: value, |
| startLine: startLine, |
| startChar: startChar, |
| isUnclosed: true, |
| depth: depth, |
| context: this.popContext() |
| }; |
| } |
| } |
| |
| if (ch === '$' && this.peek(1) === '{') { |
| value += '${'; |
| this.skip(2); |
| return { |
| type: tokenType, |
| value: value, |
| startLine: startLine, |
| startChar: startChar, |
| isUnclosed: false, |
| depth: depth, |
| context: this.currentContext() |
| }; |
| } else if (ch === '\\') { |
| var escape = this.scanEscapeSequence(checks); |
| value += escape.char; |
| this.skip(escape.jump); |
| } else if (ch !== '`') { |
| // Otherwise, append the value and continue. |
| value += ch; |
| this.skip(1); |
| } |
| } |
| |
| // Final value is either NoSubstTemplate or TemplateTail |
| tokenType = tokenType === Token.TemplateHead ? Token.NoSubstTemplate : Token.TemplateTail; |
| this.skip(1); |
| this.templateStarts.pop(); |
| |
| return { |
| type: tokenType, |
| value: value, |
| startLine: startLine, |
| startChar: startChar, |
| isUnclosed: false, |
| depth: depth, |
| context: this.popContext() |
| }; |
| }, |
| |
| /* |
| * Extract a string out of the next sequence of characters and/or |
| * lines or return 'null' if its not possible. Since strings can |
| * span across multiple lines this method has to move the char |
| * pointer. |
| * |
| * This method recognizes pseudo-multiline JavaScript strings: |
| * |
| * var str = "hello\ |
| * world"; |
| */ |
| scanStringLiteral: function(checks) { |
| /*jshint loopfunc:true */ |
| var quote = this.peek(); |
| |
| // String must start with a quote. |
| if (quote !== "\"" && quote !== "'") { |
| return null; |
| } |
| |
| // In JSON strings must always use double quotes. |
| this.triggerAsync("warning", { |
| code: "W108", |
| line: this.line, |
| character: this.char // +1? |
| }, checks, function() { return state.jsonMode && quote !== "\""; }); |
| |
| var value = ""; |
| var startLine = this.line; |
| var startChar = this.char; |
| var allowNewLine = false; |
| |
| this.skip(); |
| |
| while (this.peek() !== quote) { |
| if (this.peek() === "") { // End Of Line |
| |
| // If an EOL is not preceded by a backslash, show a warning |
| // and proceed like it was a legit multi-line string where |
| // author simply forgot to escape the newline symbol. |
| // |
| // Another approach is to implicitly close a string on EOL |
| // but it generates too many false positives. |
| |
| if (!allowNewLine) { |
| this.trigger("warning", { |
| code: "W112", |
| line: this.line, |
| character: this.char |
| }); |
| } else { |
| allowNewLine = false; |
| |
| // Otherwise show a warning if multistr option was not set. |
| // For JSON, show warning no matter what. |
| |
| this.triggerAsync("warning", { |
| code: "W043", |
| line: this.line, |
| character: this.char |
| }, checks, function() { return !state.option.multistr; }); |
| |
| this.triggerAsync("warning", { |
| code: "W042", |
| line: this.line, |
| character: this.char |
| }, checks, function() { return state.jsonMode && state.option.multistr; }); |
| } |
| |
| // If we get an EOF inside of an unclosed string, show an |
| // error and implicitly close it at the EOF point. |
| |
| if (!this.nextLine()) { |
| this.trigger("error", { |
| code: "E029", |
| line: startLine, |
| character: startChar |
| }); |
| |
| return { |
| type: Token.StringLiteral, |
| value: value, |
| startLine: startLine, |
| startChar: startChar, |
| isUnclosed: true, |
| quote: quote |
| }; |
| } |
| |
| } else { // Any character other than End Of Line |
| |
| allowNewLine = false; |
| var char = this.peek(); |
| var jump = 1; // A length of a jump, after we're done |
| // parsing this character. |
| |
| if (char < " ") { |
| // Warn about a control character in a string. |
| this.trigger("warning", { |
| code: "W113", |
| line: this.line, |
| character: this.char, |
| data: [ "<non-printable>" ] |
| }); |
| } |
| |
| // Special treatment for some escaped characters. |
| if (char === "\\") { |
| var parsed = this.scanEscapeSequence(checks); |
| char = parsed.char; |
| jump = parsed.jump; |
| allowNewLine = parsed.allowNewLine; |
| } |
| |
| value += char; |
| this.skip(jump); |
| } |
| } |
| |
| this.skip(); |
| return { |
| type: Token.StringLiteral, |
| value: value, |
| startLine: startLine, |
| startChar: startChar, |
| isUnclosed: false, |
| quote: quote |
| }; |
| }, |
| |
| /* |
| * Extract a regular expression out of the next sequence of |
| * characters and/or lines or return 'null' if its not possible. |
| * |
| * This method is platform dependent: it accepts almost any |
| * regular expression values but then tries to compile and run |
| * them using system's RegExp object. This means that there are |
| * rare edge cases where one JavaScript engine complains about |
| * your regular expression while others don't. |
| */ |
| scanRegExp: function() { |
| var index = 0; |
| var length = this.input.length; |
| var char = this.peek(); |
| var value = char; |
| var body = ""; |
| var flags = []; |
| var malformed = false; |
| var isCharSet = false; |
| var terminated; |
| |
| var scanUnexpectedChars = function() { |
| // Unexpected control character |
| if (char < " ") { |
| malformed = true; |
| this.trigger("warning", { |
| code: "W048", |
| line: this.line, |
| character: this.char |
| }); |
| } |
| |
| // Unexpected escaped character |
| if (char === "<") { |
| malformed = true; |
| this.trigger("warning", { |
| code: "W049", |
| line: this.line, |
| character: this.char, |
| data: [ char ] |
| }); |
| } |
| }.bind(this); |
| |
| // Regular expressions must start with '/' |
| if (!this.prereg || char !== "/") { |
| return null; |
| } |
| |
| index += 1; |
| terminated = false; |
| |
| // Try to get everything in between slashes. A couple of |
| // cases aside (see scanUnexpectedChars) we don't really |
| // care whether the resulting expression is valid or not. |
| // We will check that later using the RegExp object. |
| |
| while (index < length) { |
| char = this.peek(index); |
| value += char; |
| body += char; |
| |
| if (isCharSet) { |
| if (char === "]") { |
| if (this.peek(index - 1) !== "\\" || this.peek(index - 2) === "\\") { |
| isCharSet = false; |
| } |
| } |
| |
| if (char === "\\") { |
| index += 1; |
| char = this.peek(index); |
| body += char; |
| value += char; |
| |
| scanUnexpectedChars(); |
| } |
| |
| index += 1; |
| continue; |
| } |
| |
| if (char === "\\") { |
| index += 1; |
| char = this.peek(index); |
| body += char; |
| value += char; |
| |
| scanUnexpectedChars(); |
| |
| if (char === "/") { |
| index += 1; |
| continue; |
| } |
| |
| if (char === "[") { |
| index += 1; |
| continue; |
| } |
| } |
| |
| if (char === "[") { |
| isCharSet = true; |
| index += 1; |
| continue; |
| } |
| |
| if (char === "/") { |
| body = body.substr(0, body.length - 1); |
| terminated = true; |
| index += 1; |
| break; |
| } |
| |
| index += 1; |
| } |
| |
| // A regular expression that was never closed is an |
| // error from which we cannot recover. |
| |
| if (!terminated) { |
| this.trigger("error", { |
| code: "E015", |
| line: this.line, |
| character: this.from |
| }); |
| |
| return void this.trigger("fatal", { |
| line: this.line, |
| from: this.from |
| }); |
| } |
| |
| // Parse flags (if any). |
| |
| while (index < length) { |
| char = this.peek(index); |
| if (!/[gim]/.test(char)) { |
| break; |
| } |
| flags.push(char); |
| value += char; |
| index += 1; |
| } |
| |
| // Check regular expression for correctness. |
| |
| try { |
| new RegExp(body, flags.join("")); |
| } catch (err) { |
| malformed = true; |
| this.trigger("error", { |
| code: "E016", |
| line: this.line, |
| character: this.char, |
| data: [ err.message ] // Platform dependent! |
| }); |
| } |
| |
| return { |
| type: Token.RegExp, |
| value: value, |
| flags: flags, |
| isMalformed: malformed |
| }; |
| }, |
| |
| /* |
| * Scan for any occurrence of non-breaking spaces. Non-breaking spaces |
| * can be mistakenly typed on OS X with option-space. Non UTF-8 web |
| * pages with non-breaking pages produce syntax errors. |
| */ |
| scanNonBreakingSpaces: function() { |
| return state.option.nonbsp ? |
| this.input.search(/(\u00A0)/) : -1; |
| }, |
| |
| /* |
| * Scan for characters that get silently deleted by one or more browsers. |
| */ |
| scanUnsafeChars: function() { |
| return this.input.search(reg.unsafeChars); |
| }, |
| |
| /* |
| * Produce the next raw token or return 'null' if no tokens can be matched. |
| * This method skips over all space characters. |
| */ |
| next: function(checks) { |
| this.from = this.char; |
| |
| // Move to the next non-space character. |
| var start; |
| if (/\s/.test(this.peek())) { |
| start = this.char; |
| |
| while (/\s/.test(this.peek())) { |
| this.from += 1; |
| this.skip(); |
| } |
| } |
| |
| // Methods that work with multi-line structures and move the |
| // character pointer. |
| |
| var match = this.scanComments() || |
| this.scanStringLiteral(checks) || |
| this.scanTemplateLiteral(checks); |
| |
| if (match) { |
| return match; |
| } |
| |
| // Methods that don't move the character pointer. |
| |
| match = |
| this.scanRegExp() || |
| this.scanPunctuator() || |
| this.scanKeyword() || |
| this.scanIdentifier() || |
| this.scanNumericLiteral(); |
| |
| if (match) { |
| this.skip(match.tokenLength || match.value.length); |
| return match; |
| } |
| |
| // No token could be matched, give up. |
| |
| return null; |
| }, |
| |
| /* |
| * Switch to the next line and reset all char pointers. Once |
| * switched, this method also checks for other minor warnings. |
| */ |
| nextLine: function() { |
| var char; |
| |
| if (this.line >= this.getLines().length) { |
| return false; |
| } |
| |
| this.input = this.getLines()[this.line]; |
| this.line += 1; |
| this.char = 1; |
| this.from = 1; |
| |
| var inputTrimmed = this.input.trim(); |
| |
| var startsWith = function() { |
| return _.some(arguments, function(prefix) { |
| return inputTrimmed.indexOf(prefix) === 0; |
| }); |
| }; |
| |
| var endsWith = function() { |
| return _.some(arguments, function(suffix) { |
| return inputTrimmed.indexOf(suffix, inputTrimmed.length - suffix.length) !== -1; |
| }); |
| }; |
| |
| // If we are ignoring linter errors, replace the input with empty string |
| // if it doesn't already at least start or end a multi-line comment |
| if (state.ignoreLinterErrors === true) { |
| if (!startsWith("/*", "//") && !(this.inComment && endsWith("*/"))) { |
| this.input = ""; |
| } |
| } |
| |
| char = this.scanNonBreakingSpaces(); |
| if (char >= 0) { |
| this.trigger("warning", { code: "W125", line: this.line, character: char + 1 }); |
| } |
| |
| this.input = this.input.replace(/\t/g, state.tab); |
| char = this.scanUnsafeChars(); |
| |
| if (char >= 0) { |
| this.trigger("warning", { code: "W100", line: this.line, character: char }); |
| } |
| |
| // If there is a limit on line length, warn when lines get too |
| // long. |
| |
| if (state.option.maxlen && state.option.maxlen < this.input.length) { |
| var inComment = this.inComment || |
| startsWith.call(inputTrimmed, "//") || |
| startsWith.call(inputTrimmed, "/*"); |
| |
| var shouldTriggerError = !inComment || !reg.maxlenException.test(inputTrimmed); |
| |
| if (shouldTriggerError) { |
| this.trigger("warning", { code: "W101", line: this.line, character: this.input.length }); |
| } |
| } |
| |
| return true; |
| }, |
| |
| /* |
| * This is simply a synonym for nextLine() method with a friendlier |
| * public name. |
| */ |
| start: function() { |
| this.nextLine(); |
| }, |
| |
| /* |
| * Produce the next token. This function is called by advance() to get |
| * the next token. It returns a token in a JSLint-compatible format. |
| */ |
| token: function() { |
| /*jshint loopfunc:true */ |
| var checks = asyncTrigger(); |
| var token; |
| |
| |
| function isReserved(token, isProperty) { |
| if (!token.reserved) { |
| return false; |
| } |
| var meta = token.meta; |
| |
| if (meta && meta.isFutureReservedWord && state.inES5()) { |
| // ES3 FutureReservedWord in an ES5 environment. |
| if (!meta.es5) { |
| return false; |
| } |
| |
| // Some ES5 FutureReservedWord identifiers are active only |
| // within a strict mode environment. |
| if (meta.strictOnly) { |
| if (!state.option.strict && !state.isStrict()) { |
| return false; |
| } |
| } |
| |
| if (isProperty) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| // Produce a token object. |
| var create = function(type, value, isProperty, token) { |
| /*jshint validthis:true */ |
| var obj; |
| |
| if (type !== "(endline)" && type !== "(end)") { |
| this.prereg = false; |
| } |
| |
| if (type === "(punctuator)") { |
| switch (value) { |
| case ".": |
| case ")": |
| case "~": |
| case "#": |
| case "]": |
| case "++": |
| case "--": |
| this.prereg = false; |
| break; |
| default: |
| this.prereg = true; |
| } |
| |
| obj = Object.create(state.syntax[value] || state.syntax["(error)"]); |
| } |
| |
| if (type === "(identifier)") { |
| if (value === "return" || value === "case" || value === "typeof") { |
| this.prereg = true; |
| } |
| |
| if (_.has(state.syntax, value)) { |
| obj = Object.create(state.syntax[value] || state.syntax["(error)"]); |
| |
| // If this can't be a reserved keyword, reset the object. |
| if (!isReserved(obj, isProperty && type === "(identifier)")) { |
| obj = null; |
| } |
| } |
| } |
| |
| if (!obj) { |
| obj = Object.create(state.syntax[type]); |
| } |
| |
| obj.identifier = (type === "(identifier)"); |
| obj.type = obj.type || type; |
| obj.value = value; |
| obj.line = this.line; |
| obj.character = this.char; |
| obj.from = this.from; |
| if (obj.identifier && token) obj.raw_text = token.text || token.value; |
| if (token && token.startLine && token.startLine !== this.line) { |
| obj.startLine = token.startLine; |
| } |
| if (token && token.context) { |
| // Context of current token |
| obj.context = token.context; |
| } |
| if (token && token.depth) { |
| // Nested template depth |
| obj.depth = token.depth; |
| } |
| if (token && token.isUnclosed) { |
| // Mark token as unclosed string / template literal |
| obj.isUnclosed = token.isUnclosed; |
| } |
| |
| if (isProperty && obj.identifier) { |
| obj.isProperty = isProperty; |
| } |
| |
| obj.check = checks.check; |
| |
| return obj; |
| }.bind(this); |
| |
| for (;;) { |
| if (!this.input.length) { |
| if (this.nextLine()) { |
| return create("(endline)", ""); |
| } |
| |
| if (this.exhausted) { |
| return null; |
| } |
| |
| this.exhausted = true; |
| return create("(end)", ""); |
| } |
| |
| token = this.next(checks); |
| |
| if (!token) { |
| if (this.input.length) { |
| // Unexpected character. |
| this.trigger("error", { |
| code: "E024", |
| line: this.line, |
| character: this.char, |
| data: [ this.peek() ] |
| }); |
| |
| this.input = ""; |
| } |
| |
| continue; |
| } |
| |
| switch (token.type) { |
| case Token.StringLiteral: |
| this.triggerAsync("String", { |
| line: this.line, |
| char: this.char, |
| from: this.from, |
| startLine: token.startLine, |
| startChar: token.startChar, |
| value: token.value, |
| quote: token.quote |
| }, checks, function() { return true; }); |
| |
| return create("(string)", token.value, null, token); |
| |
| case Token.TemplateHead: |
| this.trigger("TemplateHead", { |
| line: this.line, |
| char: this.char, |
| from: this.from, |
| startLine: token.startLine, |
| startChar: token.startChar, |
| value: token.value |
| }); |
| return create("(template)", token.value, null, token); |
| |
| case Token.TemplateMiddle: |
| this.trigger("TemplateMiddle", { |
| line: this.line, |
| char: this.char, |
| from: this.from, |
| startLine: token.startLine, |
| startChar: token.startChar, |
| value: token.value |
| }); |
| return create("(template middle)", token.value, null, token); |
| |
| case Token.TemplateTail: |
| this.trigger("TemplateTail", { |
| line: this.line, |
| char: this.char, |
| from: this.from, |
| startLine: token.startLine, |
| startChar: token.startChar, |
| value: token.value |
| }); |
| return create("(template tail)", token.value, null, token); |
| |
| case Token.NoSubstTemplate: |
| this.trigger("NoSubstTemplate", { |
| line: this.line, |
| char: this.char, |
| from: this.from, |
| startLine: token.startLine, |
| startChar: token.startChar, |
| value: token.value |
| }); |
| return create("(no subst template)", token.value, null, token); |
| |
| case Token.Identifier: |
| this.trigger("Identifier", { |
| line: this.line, |
| char: this.char, |
| from: this.form, |
| name: token.value, |
| raw_name: token.text, |
| isProperty: state.tokens.curr.id === "." |
| }); |
| |
| /* falls through */ |
| case Token.Keyword: |
| case Token.NullLiteral: |
| case Token.BooleanLiteral: |
| return create("(identifier)", token.value, state.tokens.curr.id === ".", token); |
| |
| case Token.NumericLiteral: |
| if (token.isMalformed) { |
| this.trigger("warning", { |
| code: "W045", |
| line: this.line, |
| character: this.char, |
| data: [ token.value ] |
| }); |
| } |
| |
| this.triggerAsync("warning", { |
| code: "W114", |
| line: this.line, |
| character: this.char, |
| data: [ "0x-" ] |
| }, checks, function() { return token.base === 16 && state.jsonMode; }); |
| |
| this.triggerAsync("warning", { |
| code: "W115", |
| line: this.line, |
| character: this.char |
| }, checks, function() { |
| return state.isStrict() && token.base === 8 && token.isLegacy; |
| }); |
| |
| this.trigger("Number", { |
| line: this.line, |
| char: this.char, |
| from: this.from, |
| value: token.value, |
| base: token.base, |
| isMalformed: token.malformed |
| }); |
| |
| return create("(number)", token.value); |
| |
| case Token.RegExp: |
| return create("(regexp)", token.value); |
| |
| case Token.Comment: |
| state.tokens.curr.comment = true; |
| |
| if (token.isSpecial) { |
| return { |
| id: '(comment)', |
| value: token.value, |
| body: token.body, |
| type: token.commentType, |
| isSpecial: token.isSpecial, |
| line: this.line, |
| character: this.char, |
| from: this.from |
| }; |
| } |
| |
| break; |
| |
| case "": |
| break; |
| |
| default: |
| return create("(punctuator)", token.value); |
| } |
| } |
| } |
| }; |
| |
| exports.Lexer = Lexer; |
| exports.Context = Context; |