| 'use strict'; |
| |
| var CssSyntaxError = require('./error'); |
| |
| var constants = require('./const'); |
| var TYPE = constants.TYPE; |
| var NAME = constants.NAME; |
| var SYMBOL_TYPE = constants.SYMBOL_TYPE; |
| |
| var utils = require('./utils'); |
| var firstCharOffset = utils.firstCharOffset; |
| var cmpStr = utils.cmpStr; |
| var isNumber = utils.isNumber; |
| var findWhiteSpaceStart = utils.findWhiteSpaceStart; |
| var findWhiteSpaceEnd = utils.findWhiteSpaceEnd; |
| var findCommentEnd = utils.findCommentEnd; |
| var findStringEnd = utils.findStringEnd; |
| var findNumberEnd = utils.findNumberEnd; |
| var findIdentifierEnd = utils.findIdentifierEnd; |
| var findUrlRawEnd = utils.findUrlRawEnd; |
| |
| var NULL = 0; |
| var WHITESPACE = TYPE.WhiteSpace; |
| var IDENTIFIER = TYPE.Identifier; |
| var NUMBER = TYPE.Number; |
| var STRING = TYPE.String; |
| var COMMENT = TYPE.Comment; |
| var PUNCTUATOR = TYPE.Punctuator; |
| var CDO = TYPE.CDO; |
| var CDC = TYPE.CDC; |
| var ATKEYWORD = TYPE.AtKeyword; |
| var FUNCTION = TYPE.Function; |
| var URL = TYPE.Url; |
| var RAW = TYPE.Raw; |
| |
| var N = 10; |
| var F = 12; |
| var R = 13; |
| var STAR = TYPE.Asterisk; |
| var SLASH = TYPE.Solidus; |
| var FULLSTOP = TYPE.FullStop; |
| var PLUSSIGN = TYPE.PlusSign; |
| var HYPHENMINUS = TYPE.HyphenMinus; |
| var GREATERTHANSIGN = TYPE.GreaterThanSign; |
| var LESSTHANSIGN = TYPE.LessThanSign; |
| var EXCLAMATIONMARK = TYPE.ExclamationMark; |
| var COMMERCIALAT = TYPE.CommercialAt; |
| var QUOTATIONMARK = TYPE.QuotationMark; |
| var APOSTROPHE = TYPE.Apostrophe; |
| var LEFTPARENTHESIS = TYPE.LeftParenthesis; |
| var RIGHTPARENTHESIS = TYPE.RightParenthesis; |
| var LEFTCURLYBRACKET = TYPE.LeftCurlyBracket; |
| var RIGHTCURLYBRACKET = TYPE.RightCurlyBracket; |
| var LEFTSQUAREBRACKET = TYPE.LeftSquareBracket; |
| var RIGHTSQUAREBRACKET = TYPE.RightSquareBracket; |
| |
| var MIN_BUFFER_SIZE = 16 * 1024; |
| var OFFSET_MASK = 0x00FFFFFF; |
| var TYPE_SHIFT = 24; |
| var SafeUint32Array = typeof Uint32Array !== 'undefined' ? Uint32Array : Array; // fallback on Array when TypedArray is not supported |
| |
| function computeLinesAndColumns(tokenizer, source) { |
| var sourceLength = source.length; |
| var start = firstCharOffset(source); |
| var lines = tokenizer.lines; |
| var line = tokenizer.startLine; |
| var columns = tokenizer.columns; |
| var column = tokenizer.startColumn; |
| |
| if (lines === null || lines.length < sourceLength + 1) { |
| lines = new SafeUint32Array(Math.max(sourceLength + 1024, MIN_BUFFER_SIZE)); |
| columns = new SafeUint32Array(lines.length); |
| } |
| |
| for (var i = start; i < sourceLength; i++) { |
| var code = source.charCodeAt(i); |
| |
| lines[i] = line; |
| columns[i] = column++; |
| |
| if (code === N || code === R || code === F) { |
| if (code === R && i + 1 < sourceLength && source.charCodeAt(i + 1) === N) { |
| i++; |
| lines[i] = line; |
| columns[i] = column; |
| } |
| |
| line++; |
| column = 1; |
| } |
| } |
| |
| lines[i] = line; |
| columns[i] = column; |
| |
| tokenizer.linesAnsColumnsComputed = true; |
| tokenizer.lines = lines; |
| tokenizer.columns = columns; |
| } |
| |
| function tokenLayout(tokenizer, source, startPos) { |
| var sourceLength = source.length; |
| var offsetAndType = tokenizer.offsetAndType; |
| var balance = tokenizer.balance; |
| var tokenCount = 0; |
| var prevType = 0; |
| var offset = startPos; |
| var anchor = 0; |
| var balanceCloseCode = 0; |
| var balanceStart = 0; |
| var balancePrev = 0; |
| |
| if (offsetAndType === null || offsetAndType.length < sourceLength + 1) { |
| offsetAndType = new SafeUint32Array(sourceLength + 1024); |
| balance = new SafeUint32Array(sourceLength + 1024); |
| } |
| |
| while (offset < sourceLength) { |
| var code = source.charCodeAt(offset); |
| var type = code < 0x80 ? SYMBOL_TYPE[code] : IDENTIFIER; |
| |
| balance[tokenCount] = sourceLength; |
| |
| switch (type) { |
| case WHITESPACE: |
| offset = findWhiteSpaceEnd(source, offset + 1); |
| break; |
| |
| case PUNCTUATOR: |
| switch (code) { |
| case balanceCloseCode: |
| balancePrev = balanceStart & OFFSET_MASK; |
| balanceStart = balance[balancePrev]; |
| balanceCloseCode = balanceStart >> TYPE_SHIFT; |
| balance[tokenCount] = balancePrev; |
| balance[balancePrev++] = tokenCount; |
| for (; balancePrev < tokenCount; balancePrev++) { |
| if (balance[balancePrev] === sourceLength) { |
| balance[balancePrev] = tokenCount; |
| } |
| } |
| break; |
| |
| case LEFTSQUAREBRACKET: |
| balance[tokenCount] = balanceStart; |
| balanceCloseCode = RIGHTSQUAREBRACKET; |
| balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount; |
| break; |
| |
| case LEFTCURLYBRACKET: |
| balance[tokenCount] = balanceStart; |
| balanceCloseCode = RIGHTCURLYBRACKET; |
| balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount; |
| break; |
| |
| case LEFTPARENTHESIS: |
| balance[tokenCount] = balanceStart; |
| balanceCloseCode = RIGHTPARENTHESIS; |
| balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount; |
| break; |
| } |
| |
| // /* |
| if (code === STAR && prevType === SLASH) { |
| type = COMMENT; |
| offset = findCommentEnd(source, offset + 1); |
| tokenCount--; // rewrite prev token |
| break; |
| } |
| |
| // edge case for -.123 and +.123 |
| if (code === FULLSTOP && (prevType === PLUSSIGN || prevType === HYPHENMINUS)) { |
| if (offset + 1 < sourceLength && isNumber(source.charCodeAt(offset + 1))) { |
| type = NUMBER; |
| offset = findNumberEnd(source, offset + 2, false); |
| tokenCount--; // rewrite prev token |
| break; |
| } |
| } |
| |
| // <!-- |
| if (code === EXCLAMATIONMARK && prevType === LESSTHANSIGN) { |
| if (offset + 2 < sourceLength && |
| source.charCodeAt(offset + 1) === HYPHENMINUS && |
| source.charCodeAt(offset + 2) === HYPHENMINUS) { |
| type = CDO; |
| offset = offset + 3; |
| tokenCount--; // rewrite prev token |
| break; |
| } |
| } |
| |
| // --> |
| if (code === HYPHENMINUS && prevType === HYPHENMINUS) { |
| if (offset + 1 < sourceLength && source.charCodeAt(offset + 1) === GREATERTHANSIGN) { |
| type = CDC; |
| offset = offset + 2; |
| tokenCount--; // rewrite prev token |
| break; |
| } |
| } |
| |
| // ident( |
| if (code === LEFTPARENTHESIS && prevType === IDENTIFIER) { |
| offset = offset + 1; |
| tokenCount--; // rewrite prev token |
| balance[tokenCount] = balance[tokenCount + 1]; |
| balanceStart--; |
| |
| // 4 char length identifier and equal to `url(` (case insensitive) |
| if (offset - anchor === 4 && cmpStr(source, anchor, offset, 'url(')) { |
| // special case for url() because it can contain any symbols sequence with few exceptions |
| anchor = findWhiteSpaceEnd(source, offset); |
| code = source.charCodeAt(anchor); |
| if (code !== LEFTPARENTHESIS && |
| code !== RIGHTPARENTHESIS && |
| code !== QUOTATIONMARK && |
| code !== APOSTROPHE) { |
| // url( |
| offsetAndType[tokenCount++] = (URL << TYPE_SHIFT) | offset; |
| balance[tokenCount] = sourceLength; |
| |
| // ws* |
| if (anchor !== offset) { |
| offsetAndType[tokenCount++] = (WHITESPACE << TYPE_SHIFT) | anchor; |
| balance[tokenCount] = sourceLength; |
| } |
| |
| // raw |
| type = RAW; |
| offset = findUrlRawEnd(source, anchor); |
| } else { |
| type = URL; |
| } |
| } else { |
| type = FUNCTION; |
| } |
| break; |
| } |
| |
| type = code; |
| offset = offset + 1; |
| break; |
| |
| case NUMBER: |
| offset = findNumberEnd(source, offset + 1, prevType !== FULLSTOP); |
| |
| // merge number with a preceding dot, dash or plus |
| if (prevType === FULLSTOP || |
| prevType === HYPHENMINUS || |
| prevType === PLUSSIGN) { |
| tokenCount--; // rewrite prev token |
| } |
| |
| break; |
| |
| case STRING: |
| offset = findStringEnd(source, offset + 1, code); |
| break; |
| |
| default: |
| anchor = offset; |
| offset = findIdentifierEnd(source, offset); |
| |
| // merge identifier with a preceding dash |
| if (prevType === HYPHENMINUS) { |
| // rewrite prev token |
| tokenCount--; |
| // restore prev prev token type |
| // for case @-prefix-ident |
| prevType = tokenCount === 0 ? 0 : offsetAndType[tokenCount - 1] >> TYPE_SHIFT; |
| } |
| |
| if (prevType === COMMERCIALAT) { |
| // rewrite prev token and change type to <at-keyword-token> |
| tokenCount--; |
| type = ATKEYWORD; |
| } |
| } |
| |
| offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | offset; |
| prevType = type; |
| } |
| |
| // finalize arrays |
| offsetAndType[tokenCount] = offset; |
| balance[tokenCount] = sourceLength; |
| balance[sourceLength] = sourceLength; // prevents false positive balance match with any token |
| while (balanceStart !== 0) { |
| balancePrev = balanceStart & OFFSET_MASK; |
| balanceStart = balance[balancePrev]; |
| balance[balancePrev] = sourceLength; |
| } |
| |
| tokenizer.offsetAndType = offsetAndType; |
| tokenizer.tokenCount = tokenCount; |
| tokenizer.balance = balance; |
| } |
| |
| // |
| // tokenizer |
| // |
| |
| var Tokenizer = function(source, startOffset, startLine, startColumn) { |
| this.offsetAndType = null; |
| this.balance = null; |
| this.lines = null; |
| this.columns = null; |
| |
| this.setSource(source, startOffset, startLine, startColumn); |
| }; |
| |
| Tokenizer.prototype = { |
| setSource: function(source, startOffset, startLine, startColumn) { |
| var safeSource = String(source || ''); |
| var start = firstCharOffset(safeSource); |
| |
| this.source = safeSource; |
| this.firstCharOffset = start; |
| this.startOffset = typeof startOffset === 'undefined' ? 0 : startOffset; |
| this.startLine = typeof startLine === 'undefined' ? 1 : startLine; |
| this.startColumn = typeof startColumn === 'undefined' ? 1 : startColumn; |
| this.linesAnsColumnsComputed = false; |
| |
| this.eof = false; |
| this.currentToken = -1; |
| this.tokenType = 0; |
| this.tokenStart = start; |
| this.tokenEnd = start; |
| |
| tokenLayout(this, safeSource, start); |
| this.next(); |
| }, |
| |
| lookupType: function(offset) { |
| offset += this.currentToken; |
| |
| if (offset < this.tokenCount) { |
| return this.offsetAndType[offset] >> TYPE_SHIFT; |
| } |
| |
| return NULL; |
| }, |
| lookupNonWSType: function(offset) { |
| offset += this.currentToken; |
| |
| for (var type; offset < this.tokenCount; offset++) { |
| type = this.offsetAndType[offset] >> TYPE_SHIFT; |
| |
| if (type !== WHITESPACE) { |
| return type; |
| } |
| } |
| |
| return NULL; |
| }, |
| lookupValue: function(offset, referenceStr) { |
| offset += this.currentToken; |
| |
| if (offset < this.tokenCount) { |
| return cmpStr( |
| this.source, |
| this.offsetAndType[offset - 1] & OFFSET_MASK, |
| this.offsetAndType[offset] & OFFSET_MASK, |
| referenceStr |
| ); |
| } |
| |
| return false; |
| }, |
| getTokenStart: function(tokenNum) { |
| if (tokenNum === this.currentToken) { |
| return this.tokenStart; |
| } |
| |
| if (tokenNum > 0) { |
| return tokenNum < this.tokenCount |
| ? this.offsetAndType[tokenNum - 1] & OFFSET_MASK |
| : this.offsetAndType[this.tokenCount] & OFFSET_MASK; |
| } |
| |
| return this.firstCharOffset; |
| }, |
| getOffsetExcludeWS: function() { |
| if (this.currentToken > 0) { |
| if ((this.offsetAndType[this.currentToken - 1] >> TYPE_SHIFT) === WHITESPACE) { |
| return this.currentToken > 1 |
| ? this.offsetAndType[this.currentToken - 2] & OFFSET_MASK |
| : this.firstCharOffset; |
| } |
| } |
| return this.tokenStart; |
| }, |
| getRawLength: function(startToken, endTokenType1, endTokenType2, includeTokenType2) { |
| var cursor = startToken; |
| var balanceEnd; |
| |
| loop: |
| for (; cursor < this.tokenCount; cursor++) { |
| balanceEnd = this.balance[cursor]; |
| |
| // belance end points to offset before start |
| if (balanceEnd < startToken) { |
| break loop; |
| } |
| |
| // check token is stop type |
| switch (this.offsetAndType[cursor] >> TYPE_SHIFT) { |
| case endTokenType1: |
| break loop; |
| |
| case endTokenType2: |
| if (includeTokenType2) { |
| cursor++; |
| } |
| break loop; |
| |
| default: |
| // fast forward to the end of balanced block |
| if (this.balance[balanceEnd] === cursor) { |
| cursor = balanceEnd; |
| } |
| } |
| |
| } |
| |
| return cursor - this.currentToken; |
| }, |
| isBalanceEdge: function(pos) { |
| var balanceStart = this.balance[this.currentToken]; |
| return balanceStart < pos; |
| }, |
| |
| getTokenValue: function() { |
| return this.source.substring(this.tokenStart, this.tokenEnd); |
| }, |
| substrToCursor: function(start) { |
| return this.source.substring(start, this.tokenStart); |
| }, |
| |
| skipWS: function() { |
| for (var i = this.currentToken, skipTokenCount = 0; i < this.tokenCount; i++, skipTokenCount++) { |
| if ((this.offsetAndType[i] >> TYPE_SHIFT) !== WHITESPACE) { |
| break; |
| } |
| } |
| |
| if (skipTokenCount > 0) { |
| this.skip(skipTokenCount); |
| } |
| }, |
| skipSC: function() { |
| while (this.tokenType === WHITESPACE || this.tokenType === COMMENT) { |
| this.next(); |
| } |
| }, |
| skip: function(tokenCount) { |
| var next = this.currentToken + tokenCount; |
| |
| if (next < this.tokenCount) { |
| this.currentToken = next; |
| this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK; |
| next = this.offsetAndType[next]; |
| this.tokenType = next >> TYPE_SHIFT; |
| this.tokenEnd = next & OFFSET_MASK; |
| } else { |
| this.currentToken = this.tokenCount; |
| this.next(); |
| } |
| }, |
| next: function() { |
| var next = this.currentToken + 1; |
| |
| if (next < this.tokenCount) { |
| this.currentToken = next; |
| this.tokenStart = this.tokenEnd; |
| next = this.offsetAndType[next]; |
| this.tokenType = next >> TYPE_SHIFT; |
| this.tokenEnd = next & OFFSET_MASK; |
| } else { |
| this.currentToken = this.tokenCount; |
| this.eof = true; |
| this.tokenType = NULL; |
| this.tokenStart = this.tokenEnd = this.source.length; |
| } |
| }, |
| |
| eat: function(tokenType) { |
| if (this.tokenType !== tokenType) { |
| var offset = this.tokenStart; |
| var message = NAME[tokenType] + ' is expected'; |
| |
| // tweak message and offset |
| if (tokenType === IDENTIFIER) { |
| // when identifier is expected but there is a function or url |
| if (this.tokenType === FUNCTION || this.tokenType === URL) { |
| offset = this.tokenEnd - 1; |
| message += ' but function found'; |
| } |
| } else { |
| // when test type is part of another token show error for current position + 1 |
| // e.g. eat(HYPHENMINUS) will fail on "-foo", but pointing on "-" is odd |
| if (this.source.charCodeAt(this.tokenStart) === tokenType) { |
| offset = offset + 1; |
| } |
| } |
| |
| this.error(message, offset); |
| } |
| |
| this.next(); |
| }, |
| eatNonWS: function(tokenType) { |
| this.skipWS(); |
| this.eat(tokenType); |
| }, |
| |
| consume: function(tokenType) { |
| var value = this.getTokenValue(); |
| |
| this.eat(tokenType); |
| |
| return value; |
| }, |
| consumeFunctionName: function() { |
| var name = this.source.substring(this.tokenStart, this.tokenEnd - 1); |
| |
| this.eat(FUNCTION); |
| |
| return name; |
| }, |
| consumeNonWS: function(tokenType) { |
| this.skipWS(); |
| |
| return this.consume(tokenType); |
| }, |
| |
| expectIdentifier: function(name) { |
| if (this.tokenType !== IDENTIFIER || cmpStr(this.source, this.tokenStart, this.tokenEnd, name) === false) { |
| this.error('Identifier `' + name + '` is expected'); |
| } |
| |
| this.next(); |
| }, |
| |
| getLocation: function(offset, filename) { |
| if (!this.linesAnsColumnsComputed) { |
| computeLinesAndColumns(this, this.source); |
| } |
| |
| return { |
| source: filename, |
| offset: this.startOffset + offset, |
| line: this.lines[offset], |
| column: this.columns[offset] |
| }; |
| }, |
| |
| getLocationRange: function(start, end, filename) { |
| if (!this.linesAnsColumnsComputed) { |
| computeLinesAndColumns(this, this.source); |
| } |
| |
| return { |
| source: filename, |
| start: { |
| offset: this.startOffset + start, |
| line: this.lines[start], |
| column: this.columns[start] |
| }, |
| end: { |
| offset: this.startOffset + end, |
| line: this.lines[end], |
| column: this.columns[end] |
| } |
| }; |
| }, |
| |
| error: function(message, offset) { |
| var location = typeof offset !== 'undefined' && offset < this.source.length |
| ? this.getLocation(offset) |
| : this.eof |
| ? this.getLocation(findWhiteSpaceStart(this.source, this.source.length - 1)) |
| : this.getLocation(this.tokenStart); |
| |
| throw new CssSyntaxError( |
| message || 'Unexpected input', |
| this.source, |
| location.offset, |
| location.line, |
| location.column |
| ); |
| }, |
| |
| dump: function() { |
| var offset = 0; |
| |
| return Array.prototype.slice.call(this.offsetAndType, 0, this.tokenCount).map(function(item, idx) { |
| var start = offset; |
| var end = item & OFFSET_MASK; |
| |
| offset = end; |
| |
| return { |
| idx: idx, |
| type: NAME[item >> TYPE_SHIFT], |
| chunk: this.source.substring(start, end), |
| balance: this.balance[idx] |
| }; |
| }, this); |
| } |
| }; |
| |
| // extend with error class |
| Tokenizer.CssSyntaxError = CssSyntaxError; |
| |
| // extend tokenizer with constants |
| Object.keys(constants).forEach(function(key) { |
| Tokenizer[key] = constants[key]; |
| }); |
| |
| // extend tokenizer with static methods from utils |
| Object.keys(utils).forEach(function(key) { |
| Tokenizer[key] = utils[key]; |
| }); |
| |
| // warm up tokenizer to elimitate code branches that never execute |
| // fix soft deoptimizations (insufficient type feedback) |
| new Tokenizer('\n\r\r\n\f<!---->//""\'\'/*\r\n\f*/1a;.\\31\t\+2{url(a);func();+1.2e3 -.4e-5 .6e+7}').getLocation(); |
| |
| module.exports = Tokenizer; |