blob: 60457938e1d65d198e07cbe2fc2120df4bda4c00 [file] [log] [blame]
'use strict';
var CssSyntaxError = require('./error');
var constants = require('./const');
var TYPE = constants.TYPE;
var NAME = constants.NAME;
var SYMBOL_TYPE = constants.SYMBOL_TYPE;
var utils = require('./utils');
var firstCharOffset = utils.firstCharOffset;
var cmpStr = utils.cmpStr;
var isNumber = utils.isNumber;
var findWhiteSpaceStart = utils.findWhiteSpaceStart;
var findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
var findCommentEnd = utils.findCommentEnd;
var findStringEnd = utils.findStringEnd;
var findNumberEnd = utils.findNumberEnd;
var findIdentifierEnd = utils.findIdentifierEnd;
var findUrlRawEnd = utils.findUrlRawEnd;
var NULL = 0;
var WHITESPACE = TYPE.WhiteSpace;
var IDENTIFIER = TYPE.Identifier;
var NUMBER = TYPE.Number;
var STRING = TYPE.String;
var COMMENT = TYPE.Comment;
var PUNCTUATOR = TYPE.Punctuator;
var CDO = TYPE.CDO;
var CDC = TYPE.CDC;
var ATRULE = TYPE.Atrule;
var FUNCTION = TYPE.Function;
var URL = TYPE.Url;
var RAW = TYPE.Raw;
var N = 10;
var F = 12;
var R = 13;
var STAR = TYPE.Asterisk;
var SLASH = TYPE.Solidus;
var FULLSTOP = TYPE.FullStop;
var PLUSSIGN = TYPE.PlusSign;
var HYPHENMINUS = TYPE.HyphenMinus;
var GREATERTHANSIGN = TYPE.GreaterThanSign;
var LESSTHANSIGN = TYPE.LessThanSign;
var EXCLAMATIONMARK = TYPE.ExclamationMark;
var COMMERCIALAT = TYPE.CommercialAt;
var QUOTATIONMARK = TYPE.QuotationMark;
var APOSTROPHE = TYPE.Apostrophe;
var LEFTPARENTHESIS = TYPE.LeftParenthesis;
var RIGHTPARENTHESIS = TYPE.RightParenthesis;
var LEFTCURLYBRACKET = TYPE.LeftCurlyBracket;
var RIGHTCURLYBRACKET = TYPE.RightCurlyBracket;
var LEFTSQUAREBRACKET = TYPE.LeftSquareBracket;
var RIGHTSQUAREBRACKET = TYPE.RightSquareBracket;
var MIN_BUFFER_SIZE = 16 * 1024;
var OFFSET_MASK = 0x00FFFFFF;
var TYPE_SHIFT = 24;
var SafeUint32Array = typeof Uint32Array !== 'undefined' ? Uint32Array : Array; // fallback on Array when TypedArray is not supported
function computeLinesAndColumns(tokenizer, source) {
var sourceLength = source.length;
var start = firstCharOffset(source);
var lines = tokenizer.lines;
var line = tokenizer.startLine;
var columns = tokenizer.columns;
var column = tokenizer.startColumn;
if (lines === null || lines.length < sourceLength + 1) {
lines = new SafeUint32Array(Math.max(sourceLength + 1024, MIN_BUFFER_SIZE));
columns = new SafeUint32Array(lines.length);
}
for (var i = start; i < sourceLength; i++) {
var code = source.charCodeAt(i);
lines[i] = line;
columns[i] = column++;
if (code === N || code === R || code === F) {
if (code === R && i + 1 < sourceLength && source.charCodeAt(i + 1) === N) {
i++;
lines[i] = line;
columns[i] = column;
}
line++;
column = 1;
}
}
lines[i] = line;
columns[i] = column;
tokenizer.linesAnsColumnsComputed = true;
tokenizer.lines = lines;
tokenizer.columns = columns;
}
function tokenLayout(tokenizer, source, startPos) {
var sourceLength = source.length;
var offsetAndType = tokenizer.offsetAndType;
var balance = tokenizer.balance;
var tokenCount = 0;
var prevType = 0;
var offset = startPos;
var anchor = 0;
var balanceCloseCode = 0;
var balanceStart = 0;
var balancePrev = 0;
if (offsetAndType === null || offsetAndType.length < sourceLength + 1) {
offsetAndType = new SafeUint32Array(sourceLength + 1024);
balance = new SafeUint32Array(sourceLength + 1024);
}
while (offset < sourceLength) {
var code = source.charCodeAt(offset);
var type = code < 0x80 ? SYMBOL_TYPE[code] : IDENTIFIER;
balance[tokenCount] = sourceLength;
switch (type) {
case WHITESPACE:
offset = findWhiteSpaceEnd(source, offset + 1);
break;
case PUNCTUATOR:
switch (code) {
case balanceCloseCode:
balancePrev = balanceStart & OFFSET_MASK;
balanceStart = balance[balancePrev];
balanceCloseCode = balanceStart >> TYPE_SHIFT;
balance[tokenCount] = balancePrev;
balance[balancePrev++] = tokenCount;
for (; balancePrev < tokenCount; balancePrev++) {
if (balance[balancePrev] === sourceLength) {
balance[balancePrev] = tokenCount;
}
}
break;
case LEFTSQUAREBRACKET:
balance[tokenCount] = balanceStart;
balanceCloseCode = RIGHTSQUAREBRACKET;
balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount;
break;
case LEFTCURLYBRACKET:
balance[tokenCount] = balanceStart;
balanceCloseCode = RIGHTCURLYBRACKET;
balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount;
break;
case LEFTPARENTHESIS:
balance[tokenCount] = balanceStart;
balanceCloseCode = RIGHTPARENTHESIS;
balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount;
break;
}
// /*
if (code === STAR && prevType === SLASH) {
type = COMMENT;
offset = findCommentEnd(source, offset + 1);
tokenCount--; // rewrite prev token
break;
}
// edge case for -.123 and +.123
if (code === FULLSTOP && (prevType === PLUSSIGN || prevType === HYPHENMINUS)) {
if (offset + 1 < sourceLength && isNumber(source.charCodeAt(offset + 1))) {
type = NUMBER;
offset = findNumberEnd(source, offset + 2, false);
tokenCount--; // rewrite prev token
break;
}
}
// <!--
if (code === EXCLAMATIONMARK && prevType === LESSTHANSIGN) {
if (offset + 2 < sourceLength &&
source.charCodeAt(offset + 1) === HYPHENMINUS &&
source.charCodeAt(offset + 2) === HYPHENMINUS) {
type = CDO;
offset = offset + 3;
tokenCount--; // rewrite prev token
break;
}
}
// -->
if (code === HYPHENMINUS && prevType === HYPHENMINUS) {
if (offset + 1 < sourceLength && source.charCodeAt(offset + 1) === GREATERTHANSIGN) {
type = CDC;
offset = offset + 2;
tokenCount--; // rewrite prev token
break;
}
}
// ident(
if (code === LEFTPARENTHESIS && prevType === IDENTIFIER) {
offset = offset + 1;
tokenCount--; // rewrite prev token
balance[tokenCount] = balance[tokenCount + 1];
balanceStart--;
// 4 char length identifier and equal to `url(` (case insensitive)
if (offset - anchor === 4 && cmpStr(source, anchor, offset, 'url(')) {
// special case for url() because it can contain any symbols sequence with few exceptions
anchor = findWhiteSpaceEnd(source, offset);
code = source.charCodeAt(anchor);
if (code !== LEFTPARENTHESIS &&
code !== RIGHTPARENTHESIS &&
code !== QUOTATIONMARK &&
code !== APOSTROPHE) {
// url(
offsetAndType[tokenCount++] = (URL << TYPE_SHIFT) | offset;
balance[tokenCount] = sourceLength;
// ws*
if (anchor !== offset) {
offsetAndType[tokenCount++] = (WHITESPACE << TYPE_SHIFT) | anchor;
balance[tokenCount] = sourceLength;
}
// raw
type = RAW;
offset = findUrlRawEnd(source, anchor);
} else {
type = URL;
}
} else {
type = FUNCTION;
}
break;
}
type = code;
offset = offset + 1;
break;
case NUMBER:
offset = findNumberEnd(source, offset + 1, prevType !== FULLSTOP);
// merge number with a preceding dot, dash or plus
if (prevType === FULLSTOP ||
prevType === HYPHENMINUS ||
prevType === PLUSSIGN) {
tokenCount--; // rewrite prev token
}
break;
case STRING:
offset = findStringEnd(source, offset + 1, code);
break;
default:
anchor = offset;
offset = findIdentifierEnd(source, offset);
// merge identifier with a preceding dash
if (prevType === HYPHENMINUS) {
// rewrite prev token
tokenCount--;
// restore prev prev token type
// for case @-prefix-ident
prevType = tokenCount === 0 ? 0 : offsetAndType[tokenCount - 1] >> TYPE_SHIFT;
}
if (prevType === COMMERCIALAT) {
// rewrite prev token and change type to <at-keyword-token>
tokenCount--;
type = ATRULE;
}
}
offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | offset;
prevType = type;
}
// finalize arrays
offsetAndType[tokenCount] = offset;
balance[tokenCount] = sourceLength;
balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
while (balanceStart !== 0) {
balancePrev = balanceStart & OFFSET_MASK;
balanceStart = balance[balancePrev];
balance[balancePrev] = sourceLength;
}
tokenizer.offsetAndType = offsetAndType;
tokenizer.tokenCount = tokenCount;
tokenizer.balance = balance;
}
//
// tokenizer
//
var Tokenizer = function(source, startOffset, startLine, startColumn) {
this.offsetAndType = null;
this.balance = null;
this.lines = null;
this.columns = null;
this.setSource(source, startOffset, startLine, startColumn);
};
Tokenizer.prototype = {
setSource: function(source, startOffset, startLine, startColumn) {
var safeSource = String(source || '');
var start = firstCharOffset(safeSource);
this.source = safeSource;
this.firstCharOffset = start;
this.startOffset = typeof startOffset === 'undefined' ? 0 : startOffset;
this.startLine = typeof startLine === 'undefined' ? 1 : startLine;
this.startColumn = typeof startColumn === 'undefined' ? 1 : startColumn;
this.linesAnsColumnsComputed = false;
this.eof = false;
this.currentToken = -1;
this.tokenType = 0;
this.tokenStart = start;
this.tokenEnd = start;
tokenLayout(this, safeSource, start);
this.next();
},
lookupType: function(offset) {
offset += this.currentToken;
if (offset < this.tokenCount) {
return this.offsetAndType[offset] >> TYPE_SHIFT;
}
return NULL;
},
lookupNonWSType: function(offset) {
offset += this.currentToken;
for (var type; offset < this.tokenCount; offset++) {
type = this.offsetAndType[offset] >> TYPE_SHIFT;
if (type !== WHITESPACE) {
return type;
}
}
return NULL;
},
lookupValue: function(offset, referenceStr) {
offset += this.currentToken;
if (offset < this.tokenCount) {
return cmpStr(
this.source,
this.offsetAndType[offset - 1] & OFFSET_MASK,
this.offsetAndType[offset] & OFFSET_MASK,
referenceStr
);
}
return false;
},
getTokenStart: function(tokenNum) {
if (tokenNum === this.currentToken) {
return this.tokenStart;
}
if (tokenNum > 0) {
return tokenNum < this.tokenCount
? this.offsetAndType[tokenNum - 1] & OFFSET_MASK
: this.offsetAndType[this.tokenCount] & OFFSET_MASK;
}
return this.firstCharOffset;
},
getOffsetExcludeWS: function() {
if (this.currentToken > 0) {
if ((this.offsetAndType[this.currentToken - 1] >> TYPE_SHIFT) === WHITESPACE) {
return this.currentToken > 1
? this.offsetAndType[this.currentToken - 2] & OFFSET_MASK
: this.firstCharOffset;
}
}
return this.tokenStart;
},
getRawLength: function(startToken, endTokenType1, endTokenType2, includeTokenType2) {
var cursor = startToken;
var balanceEnd;
loop:
for (; cursor < this.tokenCount; cursor++) {
balanceEnd = this.balance[cursor];
// belance end points to offset before start
if (balanceEnd < startToken) {
break loop;
}
// check token is stop type
switch (this.offsetAndType[cursor] >> TYPE_SHIFT) {
case endTokenType1:
break loop;
case endTokenType2:
if (includeTokenType2) {
cursor++;
}
break loop;
default:
// fast forward to the end of balanced block
if (this.balance[balanceEnd] === cursor) {
cursor = balanceEnd;
}
}
}
return cursor - this.currentToken;
},
isBalanceEdge: function(pos) {
var balanceStart = this.balance[this.currentToken];
return balanceStart < pos;
},
getTokenValue: function() {
return this.source.substring(this.tokenStart, this.tokenEnd);
},
substrToCursor: function(start) {
return this.source.substring(start, this.tokenStart);
},
skipWS: function() {
for (var i = this.currentToken, skipTokenCount = 0; i < this.tokenCount; i++, skipTokenCount++) {
if ((this.offsetAndType[i] >> TYPE_SHIFT) !== WHITESPACE) {
break;
}
}
if (skipTokenCount > 0) {
this.skip(skipTokenCount);
}
},
skipSC: function() {
while (this.tokenType === WHITESPACE || this.tokenType === COMMENT) {
this.next();
}
},
skip: function(tokenCount) {
var next = this.currentToken + tokenCount;
if (next < this.tokenCount) {
this.currentToken = next;
this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK;
next = this.offsetAndType[next];
this.tokenType = next >> TYPE_SHIFT;
this.tokenEnd = next & OFFSET_MASK;
} else {
this.currentToken = this.tokenCount;
this.next();
}
},
next: function() {
var next = this.currentToken + 1;
if (next < this.tokenCount) {
this.currentToken = next;
this.tokenStart = this.tokenEnd;
next = this.offsetAndType[next];
this.tokenType = next >> TYPE_SHIFT;
this.tokenEnd = next & OFFSET_MASK;
} else {
this.currentToken = this.tokenCount;
this.eof = true;
this.tokenType = NULL;
this.tokenStart = this.tokenEnd = this.source.length;
}
},
eat: function(tokenType) {
if (this.tokenType !== tokenType) {
var offset = this.tokenStart;
var message = NAME[tokenType] + ' is expected';
// tweak message and offset
if (tokenType === IDENTIFIER) {
// when identifier is expected but there is a function or url
if (this.tokenType === FUNCTION || this.tokenType === URL) {
offset = this.tokenEnd - 1;
message += ' but function found';
}
} else {
// when test type is part of another token show error for current position + 1
// e.g. eat(HYPHENMINUS) will fail on "-foo", but pointing on "-" is odd
if (this.source.charCodeAt(this.tokenStart) === tokenType) {
offset = offset + 1;
}
}
this.error(message, offset);
}
this.next();
},
eatNonWS: function(tokenType) {
this.skipWS();
this.eat(tokenType);
},
consume: function(tokenType) {
var value = this.getTokenValue();
this.eat(tokenType);
return value;
},
consumeFunctionName: function() {
var name = this.source.substring(this.tokenStart, this.tokenEnd - 1);
this.eat(FUNCTION);
return name;
},
consumeNonWS: function(tokenType) {
this.skipWS();
return this.consume(tokenType);
},
expectIdentifier: function(name) {
if (this.tokenType !== IDENTIFIER || cmpStr(this.source, this.tokenStart, this.tokenEnd, name) === false) {
this.error('Identifier `' + name + '` is expected');
}
this.next();
},
getLocation: function(offset, filename) {
if (!this.linesAnsColumnsComputed) {
computeLinesAndColumns(this, this.source);
}
return {
source: filename,
offset: this.startOffset + offset,
line: this.lines[offset],
column: this.columns[offset]
};
},
getLocationRange: function(start, end, filename) {
if (!this.linesAnsColumnsComputed) {
computeLinesAndColumns(this, this.source);
}
return {
source: filename,
start: {
offset: this.startOffset + start,
line: this.lines[start],
column: this.columns[start]
},
end: {
offset: this.startOffset + end,
line: this.lines[end],
column: this.columns[end]
}
};
},
error: function(message, offset) {
var location = typeof offset !== 'undefined' && offset < this.source.length
? this.getLocation(offset)
: this.eof
? this.getLocation(findWhiteSpaceStart(this.source, this.source.length - 1))
: this.getLocation(this.tokenStart);
throw new CssSyntaxError(
message || 'Unexpected input',
this.source,
location.offset,
location.line,
location.column
);
},
dump: function() {
var offset = 0;
return Array.prototype.slice.call(this.offsetAndType, 0, this.tokenCount).map(function(item, idx) {
var start = offset;
var end = item & OFFSET_MASK;
offset = end;
return {
idx: idx,
type: NAME[item >> TYPE_SHIFT],
chunk: this.source.substring(start, end),
balance: this.balance[idx]
};
}, this);
}
};
// extend with error class
Tokenizer.CssSyntaxError = CssSyntaxError;
// extend tokenizer with constants
Object.keys(constants).forEach(function(key) {
Tokenizer[key] = constants[key];
});
// extend tokenizer with static methods from utils
Object.keys(utils).forEach(function(key) {
Tokenizer[key] = utils[key];
});
// warm up tokenizer to elimitate code branches that never execute
// fix soft deoptimizations (insufficient type feedback)
new Tokenizer('\n\r\r\n\f<!---->//""\'\'/*\r\n\f*/1a;.\\31\t\+2{url(a);func();+1.2e3 -.4e-5 .6e+7}').getLocation();
module.exports = Tokenizer;