| var Tokenizer = require('./tokenizer'); |
| var TAB = 9; |
| var N = 10; |
| var F = 12; |
| var R = 13; |
| var SPACE = 32; |
| var EXCLAMATIONMARK = 33; // ! |
| var NUMBERSIGN = 35; // # |
| var AMPERSAND = 38; // & |
| var APOSTROPHE = 39; // ' |
| var LEFTPARENTHESIS = 40; // ( |
| var RIGHTPARENTHESIS = 41; // ) |
| var ASTERISK = 42; // * |
| var PLUSSIGN = 43; // + |
| var COMMA = 44; // , |
| var LESSTHANSIGN = 60; // < |
| var GREATERTHANSIGN = 62; // > |
| var QUESTIONMARK = 63; // ? |
| var COMMERCIALAT = 64; // @ |
| var LEFTSQUAREBRACKET = 91; // [ |
| var RIGHTSQUAREBRACKET = 93; // ] |
| var LEFTCURLYBRACKET = 123; // { |
| var VERTICALLINE = 124; // | |
| var RIGHTCURLYBRACKET = 125; // } |
| var NAME_CHAR = createCharMap(function(ch) { |
| return /[a-zA-Z0-9\-]/.test(ch); |
| }); |
| var COMBINATOR_PRECEDENCE = { |
| ' ': 1, |
| '&&': 2, |
| '||': 3, |
| '|': 4 |
| }; |
| |
| function createCharMap(fn) { |
| var array = typeof Uint32Array === 'function' ? new Uint32Array(128) : new Array(128); |
| for (var i = 0; i < 128; i++) { |
| array[i] = fn(String.fromCharCode(i)) ? 1 : 0; |
| } |
| return array; |
| } |
| |
| function scanSpaces(tokenizer) { |
| return tokenizer.substringToPos( |
| tokenizer.findWsEnd(tokenizer.pos + 1) |
| ); |
| } |
| |
| function scanWord(tokenizer) { |
| var end = tokenizer.pos; |
| |
| for (; end < tokenizer.str.length; end++) { |
| var code = tokenizer.str.charCodeAt(end); |
| if (code >= 128 || NAME_CHAR[code] === 0) { |
| break; |
| } |
| } |
| |
| if (tokenizer.pos === end) { |
| tokenizer.error('Expect a keyword'); |
| } |
| |
| return tokenizer.substringToPos(end); |
| } |
| |
| function scanNumber(tokenizer) { |
| var end = tokenizer.pos; |
| |
| for (; end < tokenizer.str.length; end++) { |
| var code = tokenizer.str.charCodeAt(end); |
| if (code < 48 || code > 57) { |
| break; |
| } |
| } |
| |
| if (tokenizer.pos === end) { |
| tokenizer.error('Expect a number'); |
| } |
| |
| return tokenizer.substringToPos(end); |
| } |
| |
| function scanString(tokenizer) { |
| var end = tokenizer.str.indexOf('\'', tokenizer.pos + 1); |
| |
| if (end === -1) { |
| tokenizer.pos = tokenizer.str.length; |
| tokenizer.error('Expect an apostrophe'); |
| } |
| |
| return tokenizer.substringToPos(end + 1); |
| } |
| |
| function readMultiplierRange(tokenizer) { |
| var min = null; |
| var max = null; |
| |
| tokenizer.eat(LEFTCURLYBRACKET); |
| |
| min = scanNumber(tokenizer); |
| |
| if (tokenizer.charCode() === COMMA) { |
| tokenizer.pos++; |
| if (tokenizer.charCode() !== RIGHTCURLYBRACKET) { |
| max = scanNumber(tokenizer); |
| } |
| } else { |
| max = min; |
| } |
| |
| tokenizer.eat(RIGHTCURLYBRACKET); |
| |
| return { |
| min: Number(min), |
| max: max ? Number(max) : 0 |
| }; |
| } |
| |
| function readMultiplier(tokenizer) { |
| var range = null; |
| var comma = false; |
| |
| switch (tokenizer.charCode()) { |
| case ASTERISK: |
| tokenizer.pos++; |
| |
| range = { |
| min: 0, |
| max: 0 |
| }; |
| |
| break; |
| |
| case PLUSSIGN: |
| tokenizer.pos++; |
| |
| range = { |
| min: 1, |
| max: 0 |
| }; |
| |
| break; |
| |
| case QUESTIONMARK: |
| tokenizer.pos++; |
| |
| range = { |
| min: 0, |
| max: 1 |
| }; |
| |
| break; |
| |
| case NUMBERSIGN: |
| tokenizer.pos++; |
| |
| comma = true; |
| |
| if (tokenizer.charCode() === LEFTCURLYBRACKET) { |
| range = readMultiplierRange(tokenizer); |
| } else { |
| range = { |
| min: 1, |
| max: 0 |
| }; |
| } |
| |
| break; |
| |
| case LEFTCURLYBRACKET: |
| range = readMultiplierRange(tokenizer); |
| break; |
| |
| default: |
| return null; |
| } |
| |
| return { |
| type: 'Multiplier', |
| comma: comma, |
| min: range.min, |
| max: range.max, |
| term: null |
| }; |
| } |
| |
| function maybeMultiplied(tokenizer, node) { |
| var multiplier = readMultiplier(tokenizer); |
| |
| if (multiplier !== null) { |
| multiplier.term = node; |
| return multiplier; |
| } |
| |
| return node; |
| } |
| |
| function maybeToken(tokenizer) { |
| var ch = tokenizer.peek(); |
| |
| if (ch === '') { |
| return null; |
| } |
| |
| return { |
| type: 'Token', |
| value: ch |
| }; |
| } |
| |
| function readProperty(tokenizer) { |
| var name; |
| |
| tokenizer.eat(LESSTHANSIGN); |
| tokenizer.eat(APOSTROPHE); |
| |
| name = scanWord(tokenizer); |
| |
| tokenizer.eat(APOSTROPHE); |
| tokenizer.eat(GREATERTHANSIGN); |
| |
| return maybeMultiplied(tokenizer, { |
| type: 'Property', |
| name: name |
| }); |
| } |
| |
| function readType(tokenizer) { |
| var name; |
| |
| tokenizer.eat(LESSTHANSIGN); |
| name = scanWord(tokenizer); |
| |
| if (tokenizer.charCode() === LEFTPARENTHESIS && |
| tokenizer.nextCharCode() === RIGHTPARENTHESIS) { |
| tokenizer.pos += 2; |
| name += '()'; |
| } |
| |
| tokenizer.eat(GREATERTHANSIGN); |
| |
| return maybeMultiplied(tokenizer, { |
| type: 'Type', |
| name: name |
| }); |
| } |
| |
| function readKeywordOrFunction(tokenizer) { |
| var name; |
| |
| name = scanWord(tokenizer); |
| |
| if (tokenizer.charCode() === LEFTPARENTHESIS) { |
| tokenizer.pos++; |
| |
| return { |
| type: 'Function', |
| name: name |
| }; |
| } |
| |
| return maybeMultiplied(tokenizer, { |
| type: 'Keyword', |
| name: name |
| }); |
| } |
| |
| function regroupTerms(terms, combinators) { |
| function createGroup(terms, combinator) { |
| return { |
| type: 'Group', |
| terms: terms, |
| combinator: combinator, |
| disallowEmpty: false, |
| explicit: false |
| }; |
| } |
| |
| combinators = Object.keys(combinators).sort(function(a, b) { |
| return COMBINATOR_PRECEDENCE[a] - COMBINATOR_PRECEDENCE[b]; |
| }); |
| |
| while (combinators.length > 0) { |
| var combinator = combinators.shift(); |
| for (var i = 0, subgroupStart = 0; i < terms.length; i++) { |
| var term = terms[i]; |
| if (term.type === 'Combinator') { |
| if (term.value === combinator) { |
| if (subgroupStart === -1) { |
| subgroupStart = i - 1; |
| } |
| terms.splice(i, 1); |
| i--; |
| } else { |
| if (subgroupStart !== -1 && i - subgroupStart > 1) { |
| terms.splice( |
| subgroupStart, |
| i - subgroupStart, |
| createGroup(terms.slice(subgroupStart, i), combinator) |
| ); |
| i = subgroupStart + 1; |
| } |
| subgroupStart = -1; |
| } |
| } |
| } |
| |
| if (subgroupStart !== -1 && combinators.length) { |
| terms.splice( |
| subgroupStart, |
| i - subgroupStart, |
| createGroup(terms.slice(subgroupStart, i), combinator) |
| ); |
| } |
| } |
| |
| return combinator; |
| } |
| |
| function readImplicitGroup(tokenizer) { |
| var terms = []; |
| var combinators = {}; |
| var token; |
| var prevToken = null; |
| var prevTokenPos = tokenizer.pos; |
| |
| while (token = peek(tokenizer)) { |
| if (token.type !== 'Spaces') { |
| if (token.type === 'Combinator') { |
| // check for combinator in group beginning and double combinator sequence |
| if (prevToken === null || prevToken.type === 'Combinator') { |
| tokenizer.pos = prevTokenPos; |
| tokenizer.error('Unexpected combinator'); |
| } |
| |
| combinators[token.value] = true; |
| } else if (prevToken !== null && prevToken.type !== 'Combinator') { |
| combinators[' '] = true; // a b |
| terms.push({ |
| type: 'Combinator', |
| value: ' ' |
| }); |
| } |
| |
| terms.push(token); |
| prevToken = token; |
| prevTokenPos = tokenizer.pos; |
| } |
| } |
| |
| // check for combinator in group ending |
| if (prevToken !== null && prevToken.type === 'Combinator') { |
| tokenizer.pos -= prevTokenPos; |
| tokenizer.error('Unexpected combinator'); |
| } |
| |
| return { |
| type: 'Group', |
| terms: terms, |
| combinator: regroupTerms(terms, combinators) || ' ', |
| disallowEmpty: false, |
| explicit: false |
| }; |
| } |
| |
| function readGroup(tokenizer) { |
| var result; |
| |
| tokenizer.eat(LEFTSQUAREBRACKET); |
| result = readImplicitGroup(tokenizer); |
| tokenizer.eat(RIGHTSQUAREBRACKET); |
| |
| result.explicit = true; |
| |
| if (tokenizer.charCode() === EXCLAMATIONMARK) { |
| tokenizer.pos++; |
| result.disallowEmpty = true; |
| } |
| |
| return result; |
| } |
| |
| function peek(tokenizer) { |
| var code = tokenizer.charCode(); |
| |
| if (code < 128 && NAME_CHAR[code] === 1) { |
| return readKeywordOrFunction(tokenizer); |
| } |
| |
| switch (code) { |
| case RIGHTSQUAREBRACKET: |
| // don't eat, stop scan a group |
| break; |
| |
| case LEFTSQUAREBRACKET: |
| return maybeMultiplied(tokenizer, readGroup(tokenizer)); |
| |
| case LESSTHANSIGN: |
| return tokenizer.nextCharCode() === APOSTROPHE |
| ? readProperty(tokenizer) |
| : readType(tokenizer); |
| |
| case VERTICALLINE: |
| return { |
| type: 'Combinator', |
| value: tokenizer.substringToPos( |
| tokenizer.nextCharCode() === VERTICALLINE |
| ? tokenizer.pos + 2 |
| : tokenizer.pos + 1 |
| ) |
| }; |
| |
| case AMPERSAND: |
| tokenizer.pos++; |
| tokenizer.eat(AMPERSAND); |
| |
| return { |
| type: 'Combinator', |
| value: '&&' |
| }; |
| |
| case COMMA: |
| tokenizer.pos++; |
| return { |
| type: 'Comma' |
| }; |
| |
| case APOSTROPHE: |
| return maybeMultiplied(tokenizer, { |
| type: 'String', |
| value: scanString(tokenizer) |
| }); |
| |
| case SPACE: |
| case TAB: |
| case N: |
| case R: |
| case F: |
| return { |
| type: 'Spaces', |
| value: scanSpaces(tokenizer) |
| }; |
| |
| case COMMERCIALAT: |
| code = tokenizer.nextCharCode(); |
| |
| if (code < 128 && NAME_CHAR[code] === 1) { |
| tokenizer.pos++; |
| return { |
| type: 'AtKeyword', |
| name: scanWord(tokenizer) |
| }; |
| } |
| |
| return maybeToken(tokenizer); |
| |
| case ASTERISK: |
| case PLUSSIGN: |
| case QUESTIONMARK: |
| case NUMBERSIGN: |
| case EXCLAMATIONMARK: |
| // prohibited tokens (used as a multiplier start) |
| break; |
| |
| case LEFTCURLYBRACKET: |
| // LEFTCURLYBRACKET is allowed since mdn/data uses it w/o quoting |
| // check next char isn't a number, because it's likely a disjoined multiplier |
| code = tokenizer.nextCharCode(); |
| |
| if (code < 48 || code > 57) { |
| return maybeToken(tokenizer); |
| } |
| |
| break; |
| |
| default: |
| return maybeToken(tokenizer); |
| } |
| } |
| |
| function parse(str) { |
| var tokenizer = new Tokenizer(str); |
| var result = readImplicitGroup(tokenizer); |
| |
| if (tokenizer.pos !== str.length) { |
| tokenizer.error('Unexpected input'); |
| } |
| |
| // reduce redundant groups with single group term |
| if (result.terms.length === 1 && result.terms[0].type === 'Group') { |
| result = result.terms[0]; |
| } |
| |
| return result; |
| } |
| |
| // warm up parse to elimitate code branches that never execute |
| // fix soft deoptimizations (insufficient type feedback) |
| parse('[a&&<b>#|<\'c\'>*||e() f{2} /,(% g#{1,2} h{2,})]!'); |
| |
| module.exports = parse; |