| /** |
| * The MIT License (MIT) |
| * Copyright (c) 2017-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com> |
| */ |
| |
| 'use strict'; |
| |
| /** |
| * A regexp-tree plugin to merge class ranges. |
| * |
| * [a-ec] -> [a-e] |
| * [a-ec-e] -> [a-e] |
| * [\w\da-f] -> [\w] |
| * [abcdef] -> [a-f] |
| */ |
| |
| module.exports = { |
| _hasIUFlags: false, |
| init: function init(ast) { |
| this._hasIUFlags = ast.flags.includes('i') && ast.flags.includes('u'); |
| }, |
| CharacterClass: function CharacterClass(path) { |
| var node = path.node; |
| |
| var expressions = node.expressions; |
| |
| var metas = []; |
| // Extract metas |
| expressions.forEach(function (expression) { |
| if (isMeta(expression)) { |
| metas.push(expression.value); |
| } |
| }); |
| |
| expressions.sort(sortCharClass); |
| |
| for (var i = 0; i < expressions.length; i++) { |
| var expression = expressions[i]; |
| if (fitsInMetas(expression, metas, this._hasIUFlags) || combinesWithPrecedingClassRange(expression, expressions[i - 1]) || combinesWithFollowingClassRange(expression, expressions[i + 1])) { |
| expressions.splice(i, 1); |
| i--; |
| } else { |
| var nbMergedChars = charCombinesWithPrecedingChars(expression, i, expressions); |
| expressions.splice(i - nbMergedChars + 1, nbMergedChars); |
| i -= nbMergedChars; |
| } |
| } |
| } |
| }; |
| |
| /** |
| * Sorts expressions in char class in the following order: |
| * - meta chars, ordered alphabetically by value |
| * - chars (except `control` kind) and class ranges, ordered alphabetically (`from` char is used for class ranges) |
| * - if ambiguous, class range comes before char |
| * - if ambiguous between two class ranges, orders alphabetically by `to` char |
| * - control chars, ordered alphabetically by value |
| * @param {Object} a - Left Char or ClassRange node |
| * @param {Object} b - Right Char or ClassRange node |
| * @returns {number} |
| */ |
| function sortCharClass(a, b) { |
| var aValue = getSortValue(a); |
| var bValue = getSortValue(b); |
| |
| if (aValue === bValue) { |
| // We want ClassRange before Char |
| // [bb-d] -> [b-db] |
| if (a.type === 'ClassRange' && b.type !== 'ClassRange') { |
| return -1; |
| } |
| if (b.type === 'ClassRange' && a.type !== 'ClassRange') { |
| return 1; |
| } |
| if (a.type === 'ClassRange' && b.type === 'ClassRange') { |
| return getSortValue(a.to) - getSortValue(b.to); |
| } |
| if (isMeta(a) && isMeta(b) || isControl(a) && isControl(b)) { |
| return a.value < b.value ? -1 : 1; |
| } |
| } |
| return aValue - bValue; |
| } |
| |
| /** |
| * @param {Object} expression - Char or ClassRange node |
| * @returns {number} |
| */ |
| function getSortValue(expression) { |
| if (expression.type === 'Char') { |
| if (expression.kind === 'control') { |
| return Infinity; |
| } |
| if (expression.kind === 'meta' && isNaN(expression.codePoint)) { |
| return -1; |
| } |
| return expression.codePoint; |
| } |
| // ClassRange |
| return expression.from.codePoint; |
| } |
| |
| /** |
| * Checks if a node is a meta char from the set \d\w\s\D\W\S |
| * @param {Object} expression - Char or ClassRange node |
| * @param {?string} value |
| * @returns {boolean} |
| */ |
| function isMeta(expression) { |
| var value = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null; |
| |
| return expression.type === 'Char' && expression.kind === 'meta' && (value ? expression.value === value : /^\\[dws]$/i.test(expression.value)); |
| } |
| |
| /** |
| * @param {Object} expression - Char or ClassRange node |
| * @returns {boolean} |
| */ |
| function isControl(expression) { |
| return expression.type === 'Char' && expression.kind === 'control'; |
| } |
| |
| /** |
| * @param {Object} expression - Char or ClassRange node |
| * @param {string[]} metas - Array of meta chars, e.g. ["\\w", "\\s"] |
| * @param {boolean} hasIUFlags |
| * @returns {boolean} |
| */ |
| function fitsInMetas(expression, metas, hasIUFlags) { |
| for (var i = 0; i < metas.length; i++) { |
| if (fitsInMeta(expression, metas[i], hasIUFlags)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * @param {Object} expression - Char or ClassRange node |
| * @param {string} meta - e.g. "\\w" |
| * @param {boolean} hasIUFlags |
| * @returns {boolean} |
| */ |
| function fitsInMeta(expression, meta, hasIUFlags) { |
| if (expression.type === 'ClassRange') { |
| return fitsInMeta(expression.from, meta, hasIUFlags) && fitsInMeta(expression.to, meta, hasIUFlags); |
| } |
| |
| // Special cases: |
| // \S contains \w and \d |
| if (meta === '\\S' && (isMeta(expression, '\\w') || isMeta(expression, '\\d'))) { |
| return true; |
| } |
| // \D contains \W and \s |
| if (meta === '\\D' && (isMeta(expression, '\\W') || isMeta(expression, '\\s'))) { |
| return true; |
| } |
| // \w contains \d |
| if (meta === '\\w' && isMeta(expression, '\\d')) { |
| return true; |
| } |
| // \W contains \s |
| if (meta === '\\W' && isMeta(expression, '\\s')) { |
| return true; |
| } |
| |
| if (expression.type !== 'Char' || isNaN(expression.codePoint)) { |
| return false; |
| } |
| |
| if (meta === '\\s') { |
| return fitsInMetaS(expression); |
| } |
| if (meta === '\\S') { |
| return !fitsInMetaS(expression); |
| } |
| if (meta === '\\d') { |
| return fitsInMetaD(expression); |
| } |
| if (meta === '\\D') { |
| return !fitsInMetaD(expression); |
| } |
| if (meta === '\\w') { |
| return fitsInMetaW(expression, hasIUFlags); |
| } |
| if (meta === '\\W') { |
| return !fitsInMetaW(expression, hasIUFlags); |
| } |
| return false; |
| } |
| |
| /** |
| * @param {Object} expression - Char node with codePoint |
| * @returns {boolean} |
| */ |
| function fitsInMetaS(expression) { |
| return expression.codePoint === 0x0009 || // \t |
| expression.codePoint === 0x000a || // \n |
| expression.codePoint === 0x000b || // \v |
| expression.codePoint === 0x000c || // \f |
| expression.codePoint === 0x000d || // \r |
| expression.codePoint === 0x0020 || // space |
| expression.codePoint === 0x00a0 || // nbsp |
| expression.codePoint === 0x1680 || // part of Zs |
| expression.codePoint >= 0x2000 && expression.codePoint <= 0x200a || // part of Zs |
| expression.codePoint === 0x2028 || // line separator |
| expression.codePoint === 0x2029 || // paragraph separator |
| expression.codePoint === 0x202f || // part of Zs |
| expression.codePoint === 0x205f || // part of Zs |
| expression.codePoint === 0x3000 || // part of Zs |
| expression.codePoint === 0xfeff; // zwnbsp |
| } |
| |
| /** |
| * @param {Object} expression - Char node with codePoint |
| * @returns {boolean} |
| */ |
| function fitsInMetaD(expression) { |
| return expression.codePoint >= 0x30 && expression.codePoint <= 0x39; // 0-9 |
| } |
| |
| /** |
| * @param {Object} expression - Char node with codePoint |
| * @param {boolean} hasIUFlags |
| * @returns {boolean} |
| */ |
| function fitsInMetaW(expression, hasIUFlags) { |
| return fitsInMetaD(expression) || expression.codePoint >= 0x41 && expression.codePoint <= 0x5a || // A-Z |
| expression.codePoint >= 0x61 && expression.codePoint <= 0x7a || // a-z |
| expression.value === '_' || hasIUFlags && (expression.codePoint === 0x017f || expression.codePoint === 0x212a); |
| } |
| |
| /** |
| * @param {Object} expression - Char or ClassRange node |
| * @param {Object} classRange - Char or ClassRange node |
| * @returns {boolean} |
| */ |
| function combinesWithPrecedingClassRange(expression, classRange) { |
| if (classRange && classRange.type === 'ClassRange') { |
| |
| if (fitsInClassRange(expression, classRange)) { |
| // [a-gc] -> [a-g] |
| // [a-gc-e] -> [a-g] |
| return true; |
| } else if ( |
| // We only want \w chars or char codes to keep readability |
| isMetaWCharOrCode(expression) && classRange.to.codePoint === expression.codePoint - 1) { |
| // [a-de] -> [a-e] |
| classRange.to = expression; |
| return true; |
| } else if (expression.type === 'ClassRange' && expression.from.codePoint <= classRange.to.codePoint + 1 && expression.to.codePoint >= classRange.from.codePoint - 1) { |
| // [a-db-f] -> [a-f] |
| // [b-fa-d] -> [a-f] |
| // [a-cd-f] -> [a-f] |
| if (expression.from.codePoint < classRange.from.codePoint) { |
| classRange.from = expression.from; |
| } |
| if (expression.to.codePoint > classRange.to.codePoint) { |
| classRange.to = expression.to; |
| } |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * @param {Object} expression - Char or ClassRange node |
| * @param {Object} classRange - Char or ClassRange node |
| * @returns {boolean} |
| */ |
| function combinesWithFollowingClassRange(expression, classRange) { |
| if (classRange && classRange.type === 'ClassRange') { |
| // Considering the elements were ordered alphabetically, |
| // there is only one case to handle |
| // [ab-e] -> [a-e] |
| if ( |
| // We only want \w chars or char codes to keep readability |
| isMetaWCharOrCode(expression) && classRange.from.codePoint === expression.codePoint + 1) { |
| classRange.from = expression; |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /** |
| * @param {Object} expression - Char or ClassRange node |
| * @param {Object} classRange - ClassRange node |
| * @returns {boolean} |
| */ |
| function fitsInClassRange(expression, classRange) { |
| if (expression.type === 'Char' && isNaN(expression.codePoint)) { |
| return false; |
| } |
| if (expression.type === 'ClassRange') { |
| return fitsInClassRange(expression.from, classRange) && fitsInClassRange(expression.to, classRange); |
| } |
| return expression.codePoint >= classRange.from.codePoint && expression.codePoint <= classRange.to.codePoint; |
| } |
| |
| /** |
| * @param {Object} expression - Char or ClassRange node |
| * @param {Number} index |
| * @param {Object[]} expressions - expressions in CharClass |
| * @returns {number} - Number of characters combined with expression |
| */ |
| function charCombinesWithPrecedingChars(expression, index, expressions) { |
| // We only want \w chars or char codes to keep readability |
| if (!isMetaWCharOrCode(expression)) { |
| return 0; |
| } |
| var nbMergedChars = 0; |
| while (index > 0) { |
| var currentExpression = expressions[index]; |
| var precedingExpresion = expressions[index - 1]; |
| if (isMetaWCharOrCode(precedingExpresion) && precedingExpresion.codePoint === currentExpression.codePoint - 1) { |
| nbMergedChars++; |
| index--; |
| } else { |
| break; |
| } |
| } |
| |
| if (nbMergedChars > 1) { |
| expressions[index] = { |
| type: 'ClassRange', |
| from: expressions[index], |
| to: expression |
| }; |
| return nbMergedChars; |
| } |
| return 0; |
| } |
| |
| function isMetaWCharOrCode(expression) { |
| return expression && expression.type === 'Char' && !isNaN(expression.codePoint) && (fitsInMetaW(expression, false) || expression.kind === 'unicode' || expression.kind === 'hex' || expression.kind === 'oct' || expression.kind === 'decimal'); |
| } |