| /*! https://mths.be/regenerate v1.4.2 by @mathias | MIT license */ |
| ;(function(root) { |
| |
| // Detect free variables `exports`. |
| var freeExports = typeof exports == 'object' && exports; |
| |
| // Detect free variable `module`. |
| var freeModule = typeof module == 'object' && module && |
| module.exports == freeExports && module; |
| |
| // Detect free variable `global`, from Node.js/io.js or Browserified code, |
| // and use it as `root`. |
| var freeGlobal = typeof global == 'object' && global; |
| if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) { |
| root = freeGlobal; |
| } |
| |
| /*--------------------------------------------------------------------------*/ |
| |
| var ERRORS = { |
| 'rangeOrder': 'A range\u2019s `stop` value must be greater than or equal ' + |
| 'to the `start` value.', |
| 'codePointRange': 'Invalid code point value. Code points range from ' + |
| 'U+000000 to U+10FFFF.' |
| }; |
| |
| // https://mathiasbynens.be/notes/javascript-encoding#surrogate-pairs |
| var HIGH_SURROGATE_MIN = 0xD800; |
| var HIGH_SURROGATE_MAX = 0xDBFF; |
| var LOW_SURROGATE_MIN = 0xDC00; |
| var LOW_SURROGATE_MAX = 0xDFFF; |
| |
| // In Regenerate output, `\0` is never preceded by `\` because we sort by |
| // code point value, so let’s keep this regular expression simple. |
| var regexNull = /\\x00([^0123456789]|$)/g; |
| |
| var object = {}; |
| var hasOwnProperty = object.hasOwnProperty; |
| var extend = function(destination, source) { |
| var key; |
| for (key in source) { |
| if (hasOwnProperty.call(source, key)) { |
| destination[key] = source[key]; |
| } |
| } |
| return destination; |
| }; |
| |
| var forEach = function(array, callback) { |
| var index = -1; |
| var length = array.length; |
| while (++index < length) { |
| callback(array[index], index); |
| } |
| }; |
| |
| var toString = object.toString; |
| var isArray = function(value) { |
| return toString.call(value) == '[object Array]'; |
| }; |
| var isNumber = function(value) { |
| return typeof value == 'number' || |
| toString.call(value) == '[object Number]'; |
| }; |
| |
| // This assumes that `number` is a positive integer that `toString()`s nicely |
| // (which is the case for all code point values). |
| var zeroes = '0000'; |
| var pad = function(number, totalCharacters) { |
| var string = String(number); |
| return string.length < totalCharacters |
| ? (zeroes + string).slice(-totalCharacters) |
| : string; |
| }; |
| |
| var hex = function(number) { |
| return Number(number).toString(16).toUpperCase(); |
| }; |
| |
| var slice = [].slice; |
| |
| /*--------------------------------------------------------------------------*/ |
| |
| var dataFromCodePoints = function(codePoints) { |
| var index = -1; |
| var length = codePoints.length; |
| var max = length - 1; |
| var result = []; |
| var isStart = true; |
| var tmp; |
| var previous = 0; |
| while (++index < length) { |
| tmp = codePoints[index]; |
| if (isStart) { |
| result.push(tmp); |
| previous = tmp; |
| isStart = false; |
| } else { |
| if (tmp == previous + 1) { |
| if (index != max) { |
| previous = tmp; |
| continue; |
| } else { |
| isStart = true; |
| result.push(tmp + 1); |
| } |
| } else { |
| // End the previous range and start a new one. |
| result.push(previous + 1, tmp); |
| previous = tmp; |
| } |
| } |
| } |
| if (!isStart) { |
| result.push(tmp + 1); |
| } |
| return result; |
| }; |
| |
| var dataRemove = function(data, codePoint) { |
| // Iterate over the data per `(start, end)` pair. |
| var index = 0; |
| var start; |
| var end; |
| var length = data.length; |
| while (index < length) { |
| start = data[index]; |
| end = data[index + 1]; |
| if (codePoint >= start && codePoint < end) { |
| // Modify this pair. |
| if (codePoint == start) { |
| if (end == start + 1) { |
| // Just remove `start` and `end`. |
| data.splice(index, 2); |
| return data; |
| } else { |
| // Just replace `start` with a new value. |
| data[index] = codePoint + 1; |
| return data; |
| } |
| } else if (codePoint == end - 1) { |
| // Just replace `end` with a new value. |
| data[index + 1] = codePoint; |
| return data; |
| } else { |
| // Replace `[start, end]` with `[startA, endA, startB, endB]`. |
| data.splice(index, 2, start, codePoint, codePoint + 1, end); |
| return data; |
| } |
| } |
| index += 2; |
| } |
| return data; |
| }; |
| |
| var dataRemoveRange = function(data, rangeStart, rangeEnd) { |
| if (rangeEnd < rangeStart) { |
| throw Error(ERRORS.rangeOrder); |
| } |
| // Iterate over the data per `(start, end)` pair. |
| var index = 0; |
| var start; |
| var end; |
| while (index < data.length) { |
| start = data[index]; |
| end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive. |
| |
| // Exit as soon as no more matching pairs can be found. |
| if (start > rangeEnd) { |
| return data; |
| } |
| |
| // Check if this range pair is equal to, or forms a subset of, the range |
| // to be removed. |
| // E.g. we have `[0, 11, 40, 51]` and want to remove 0-10 → `[40, 51]`. |
| // E.g. we have `[40, 51]` and want to remove 0-100 → `[]`. |
| if (rangeStart <= start && rangeEnd >= end) { |
| // Remove this pair. |
| data.splice(index, 2); |
| continue; |
| } |
| |
| // Check if both `rangeStart` and `rangeEnd` are within the bounds of |
| // this pair. |
| // E.g. we have `[0, 11]` and want to remove 4-6 → `[0, 4, 7, 11]`. |
| if (rangeStart >= start && rangeEnd < end) { |
| if (rangeStart == start) { |
| // Replace `[start, end]` with `[startB, endB]`. |
| data[index] = rangeEnd + 1; |
| data[index + 1] = end + 1; |
| return data; |
| } |
| // Replace `[start, end]` with `[startA, endA, startB, endB]`. |
| data.splice(index, 2, start, rangeStart, rangeEnd + 1, end + 1); |
| return data; |
| } |
| |
| // Check if only `rangeStart` is within the bounds of this pair. |
| // E.g. we have `[0, 11]` and want to remove 4-20 → `[0, 4]`. |
| if (rangeStart >= start && rangeStart <= end) { |
| // Replace `end` with `rangeStart`. |
| data[index + 1] = rangeStart; |
| // Note: we cannot `return` just yet, in case any following pairs still |
| // contain matching code points. |
| // E.g. we have `[0, 11, 14, 31]` and want to remove 4-20 |
| // → `[0, 4, 21, 31]`. |
| } |
| |
| // Check if only `rangeEnd` is within the bounds of this pair. |
| // E.g. we have `[14, 31]` and want to remove 4-20 → `[21, 31]`. |
| else if (rangeEnd >= start && rangeEnd <= end) { |
| // Just replace `start`. |
| data[index] = rangeEnd + 1; |
| return data; |
| } |
| |
| index += 2; |
| } |
| return data; |
| }; |
| |
| var dataAdd = function(data, codePoint) { |
| // Iterate over the data per `(start, end)` pair. |
| var index = 0; |
| var start; |
| var end; |
| var lastIndex = null; |
| var length = data.length; |
| if (codePoint < 0x0 || codePoint > 0x10FFFF) { |
| throw RangeError(ERRORS.codePointRange); |
| } |
| while (index < length) { |
| start = data[index]; |
| end = data[index + 1]; |
| |
| // Check if the code point is already in the set. |
| if (codePoint >= start && codePoint < end) { |
| return data; |
| } |
| |
| if (codePoint == start - 1) { |
| // Just replace `start` with a new value. |
| data[index] = codePoint; |
| return data; |
| } |
| |
| // At this point, if `start` is `greater` than `codePoint`, insert a new |
| // `[start, end]` pair before the current pair, or after the current pair |
| // if there is a known `lastIndex`. |
| if (start > codePoint) { |
| data.splice( |
| lastIndex != null ? lastIndex + 2 : 0, |
| 0, |
| codePoint, |
| codePoint + 1 |
| ); |
| return data; |
| } |
| |
| if (codePoint == end) { |
| // Check if adding this code point causes two separate ranges to become |
| // a single range, e.g. `dataAdd([0, 4, 5, 10], 4)` → `[0, 10]`. |
| if (codePoint + 1 == data[index + 2]) { |
| data.splice(index, 4, start, data[index + 3]); |
| return data; |
| } |
| // Else, just replace `end` with a new value. |
| data[index + 1] = codePoint + 1; |
| return data; |
| } |
| lastIndex = index; |
| index += 2; |
| } |
| // The loop has finished; add the new pair to the end of the data set. |
| data.push(codePoint, codePoint + 1); |
| return data; |
| }; |
| |
| var dataAddData = function(dataA, dataB) { |
| // Iterate over the data per `(start, end)` pair. |
| var index = 0; |
| var start; |
| var end; |
| var data = dataA.slice(); |
| var length = dataB.length; |
| while (index < length) { |
| start = dataB[index]; |
| end = dataB[index + 1] - 1; |
| if (start == end) { |
| data = dataAdd(data, start); |
| } else { |
| data = dataAddRange(data, start, end); |
| } |
| index += 2; |
| } |
| return data; |
| }; |
| |
| var dataRemoveData = function(dataA, dataB) { |
| // Iterate over the data per `(start, end)` pair. |
| var index = 0; |
| var start; |
| var end; |
| var data = dataA.slice(); |
| var length = dataB.length; |
| while (index < length) { |
| start = dataB[index]; |
| end = dataB[index + 1] - 1; |
| if (start == end) { |
| data = dataRemove(data, start); |
| } else { |
| data = dataRemoveRange(data, start, end); |
| } |
| index += 2; |
| } |
| return data; |
| }; |
| |
| var dataAddRange = function(data, rangeStart, rangeEnd) { |
| if (rangeEnd < rangeStart) { |
| throw Error(ERRORS.rangeOrder); |
| } |
| if ( |
| rangeStart < 0x0 || rangeStart > 0x10FFFF || |
| rangeEnd < 0x0 || rangeEnd > 0x10FFFF |
| ) { |
| throw RangeError(ERRORS.codePointRange); |
| } |
| // Iterate over the data per `(start, end)` pair. |
| var index = 0; |
| var start; |
| var end; |
| var added = false; |
| var length = data.length; |
| while (index < length) { |
| start = data[index]; |
| end = data[index + 1]; |
| |
| if (added) { |
| // The range has already been added to the set; at this point, we just |
| // need to get rid of the following ranges in case they overlap. |
| |
| // Check if this range can be combined with the previous range. |
| if (start == rangeEnd + 1) { |
| data.splice(index - 1, 2); |
| return data; |
| } |
| |
| // Exit as soon as no more possibly overlapping pairs can be found. |
| if (start > rangeEnd) { |
| return data; |
| } |
| |
| // E.g. `[0, 11, 12, 16]` and we’ve added 5-15, so we now have |
| // `[0, 16, 12, 16]`. Remove the `12,16` part, as it lies within the |
| // `0,16` range that was previously added. |
| if (start >= rangeStart && start <= rangeEnd) { |
| // `start` lies within the range that was previously added. |
| |
| if (end > rangeStart && end - 1 <= rangeEnd) { |
| // `end` lies within the range that was previously added as well, |
| // so remove this pair. |
| data.splice(index, 2); |
| index -= 2; |
| // Note: we cannot `return` just yet, as there may still be other |
| // overlapping pairs. |
| } else { |
| // `start` lies within the range that was previously added, but |
| // `end` doesn’t. E.g. `[0, 11, 12, 31]` and we’ve added 5-15, so |
| // now we have `[0, 16, 12, 31]`. This must be written as `[0, 31]`. |
| // Remove the previously added `end` and the current `start`. |
| data.splice(index - 1, 2); |
| index -= 2; |
| } |
| |
| // Note: we cannot return yet. |
| } |
| |
| } |
| |
| else if (start == rangeEnd + 1 || start == rangeEnd) { |
| data[index] = rangeStart; |
| return data; |
| } |
| |
| // Check if a new pair must be inserted *before* the current one. |
| else if (start > rangeEnd) { |
| data.splice(index, 0, rangeStart, rangeEnd + 1); |
| return data; |
| } |
| |
| else if (rangeStart >= start && rangeStart < end && rangeEnd + 1 <= end) { |
| // The new range lies entirely within an existing range pair. No action |
| // needed. |
| return data; |
| } |
| |
| else if ( |
| // E.g. `[0, 11]` and you add 5-15 → `[0, 16]`. |
| (rangeStart >= start && rangeStart < end) || |
| // E.g. `[0, 3]` and you add 3-6 → `[0, 7]`. |
| end == rangeStart |
| ) { |
| // Replace `end` with the new value. |
| data[index + 1] = rangeEnd + 1; |
| // Make sure the next range pair doesn’t overlap, e.g. `[0, 11, 12, 14]` |
| // and you add 5-15 → `[0, 16]`, i.e. remove the `12,14` part. |
| added = true; |
| // Note: we cannot `return` just yet. |
| } |
| |
| else if (rangeStart <= start && rangeEnd + 1 >= end) { |
| // The new range is a superset of the old range. |
| data[index] = rangeStart; |
| data[index + 1] = rangeEnd + 1; |
| added = true; |
| } |
| |
| index += 2; |
| } |
| // The loop has finished without doing anything; add the new pair to the end |
| // of the data set. |
| if (!added) { |
| data.push(rangeStart, rangeEnd + 1); |
| } |
| return data; |
| }; |
| |
| var dataContains = function(data, codePoint) { |
| var index = 0; |
| var length = data.length; |
| // Exit early if `codePoint` is not within `data`’s overall range. |
| var start = data[index]; |
| var end = data[length - 1]; |
| if (length >= 2) { |
| if (codePoint < start || codePoint > end) { |
| return false; |
| } |
| } |
| // Iterate over the data per `(start, end)` pair. |
| while (index < length) { |
| start = data[index]; |
| end = data[index + 1]; |
| if (codePoint >= start && codePoint < end) { |
| return true; |
| } |
| index += 2; |
| } |
| return false; |
| }; |
| |
| var dataIntersection = function(data, codePoints) { |
| var index = 0; |
| var length = codePoints.length; |
| var codePoint; |
| var result = []; |
| while (index < length) { |
| codePoint = codePoints[index]; |
| if (dataContains(data, codePoint)) { |
| result.push(codePoint); |
| } |
| ++index; |
| } |
| return dataFromCodePoints(result); |
| }; |
| |
| var dataIsEmpty = function(data) { |
| return !data.length; |
| }; |
| |
| var dataIsSingleton = function(data) { |
| // Check if the set only represents a single code point. |
| return data.length == 2 && data[0] + 1 == data[1]; |
| }; |
| |
| var dataToArray = function(data) { |
| // Iterate over the data per `(start, end)` pair. |
| var index = 0; |
| var start; |
| var end; |
| var result = []; |
| var length = data.length; |
| while (index < length) { |
| start = data[index]; |
| end = data[index + 1]; |
| while (start < end) { |
| result.push(start); |
| ++start; |
| } |
| index += 2; |
| } |
| return result; |
| }; |
| |
| /*--------------------------------------------------------------------------*/ |
| |
| // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae |
| var floor = Math.floor; |
| var highSurrogate = function(codePoint) { |
| return parseInt( |
| floor((codePoint - 0x10000) / 0x400) + HIGH_SURROGATE_MIN, |
| 10 |
| ); |
| }; |
| |
| var lowSurrogate = function(codePoint) { |
| return parseInt( |
| (codePoint - 0x10000) % 0x400 + LOW_SURROGATE_MIN, |
| 10 |
| ); |
| }; |
| |
| var stringFromCharCode = String.fromCharCode; |
| var codePointToString = function(codePoint) { |
| var string; |
| // https://mathiasbynens.be/notes/javascript-escapes#single |
| // Note: the `\b` escape sequence for U+0008 BACKSPACE in strings has a |
| // different meaning in regular expressions (word boundary), so it cannot |
| // be used here. |
| if (codePoint == 0x09) { |
| string = '\\t'; |
| } |
| // Note: IE < 9 treats `'\v'` as `'v'`, so avoid using it. |
| // else if (codePoint == 0x0B) { |
| // string = '\\v'; |
| // } |
| else if (codePoint == 0x0A) { |
| string = '\\n'; |
| } |
| else if (codePoint == 0x0C) { |
| string = '\\f'; |
| } |
| else if (codePoint == 0x0D) { |
| string = '\\r'; |
| } |
| else if (codePoint == 0x2D) { |
| // https://mathiasbynens.be/notes/javascript-escapes#hexadecimal |
| // Note: `-` (U+002D HYPHEN-MINUS) is escaped in this way rather |
| // than by backslash-escaping, in case the output is used outside |
| // of a character class in a `u` RegExp. /\-/u throws, but |
| // /\x2D/u is fine. |
| string = '\\x2D'; |
| } |
| else if (codePoint == 0x5C) { |
| string = '\\\\'; |
| } |
| else if ( |
| codePoint == 0x24 || |
| (codePoint >= 0x28 && codePoint <= 0x2B) || |
| codePoint == 0x2E || codePoint == 0x2F || |
| codePoint == 0x3F || |
| (codePoint >= 0x5B && codePoint <= 0x5E) || |
| (codePoint >= 0x7B && codePoint <= 0x7D) |
| ) { |
| // The code point maps to an unsafe printable ASCII character; |
| // backslash-escape it. Here’s the list of those symbols: |
| // |
| // $()*+./?[\]^{|} |
| // |
| // This matches SyntaxCharacters as well as `/` (U+002F SOLIDUS). |
| // https://tc39.github.io/ecma262/#prod-SyntaxCharacter |
| string = '\\' + stringFromCharCode(codePoint); |
| } |
| else if (codePoint >= 0x20 && codePoint <= 0x7E) { |
| // The code point maps to one of these printable ASCII symbols |
| // (including the space character): |
| // |
| // !"#%&',/0123456789:;<=>@ABCDEFGHIJKLMNO |
| // PQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz~ |
| // |
| // These can safely be used directly. |
| string = stringFromCharCode(codePoint); |
| } |
| else if (codePoint <= 0xFF) { |
| string = '\\x' + pad(hex(codePoint), 2); |
| } |
| else { // `codePoint <= 0xFFFF` holds true. |
| // https://mathiasbynens.be/notes/javascript-escapes#unicode |
| string = '\\u' + pad(hex(codePoint), 4); |
| } |
| |
| // There’s no need to account for astral symbols / surrogate pairs here, |
| // since `codePointToString` is private and only used for BMP code points. |
| // But if that’s what you need, just add an `else` block with this code: |
| // |
| // string = '\\u' + pad(hex(highSurrogate(codePoint)), 4) |
| // + '\\u' + pad(hex(lowSurrogate(codePoint)), 4); |
| |
| return string; |
| }; |
| |
| var codePointToStringUnicode = function(codePoint) { |
| if (codePoint <= 0xFFFF) { |
| return codePointToString(codePoint); |
| } |
| return '\\u{' + codePoint.toString(16).toUpperCase() + '}'; |
| }; |
| |
| var symbolToCodePoint = function(symbol) { |
| var length = symbol.length; |
| var first = symbol.charCodeAt(0); |
| var second; |
| if ( |
| first >= HIGH_SURROGATE_MIN && first <= HIGH_SURROGATE_MAX && |
| length > 1 // There is a next code unit. |
| ) { |
| // `first` is a high surrogate, and there is a next character. Assume |
| // it’s a low surrogate (else it’s invalid usage of Regenerate anyway). |
| second = symbol.charCodeAt(1); |
| // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae |
| return (first - HIGH_SURROGATE_MIN) * 0x400 + |
| second - LOW_SURROGATE_MIN + 0x10000; |
| } |
| return first; |
| }; |
| |
| var createBMPCharacterClasses = function(data) { |
| // Iterate over the data per `(start, end)` pair. |
| var result = ''; |
| var index = 0; |
| var start; |
| var end; |
| var length = data.length; |
| if (dataIsSingleton(data)) { |
| return codePointToString(data[0]); |
| } |
| while (index < length) { |
| start = data[index]; |
| end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive. |
| if (start == end) { |
| result += codePointToString(start); |
| } else if (start + 1 == end) { |
| result += codePointToString(start) + codePointToString(end); |
| } else { |
| result += codePointToString(start) + '-' + codePointToString(end); |
| } |
| index += 2; |
| } |
| return '[' + result + ']'; |
| }; |
| |
| var createUnicodeCharacterClasses = function(data) { |
| // Iterate over the data per `(start, end)` pair. |
| var result = ''; |
| var index = 0; |
| var start; |
| var end; |
| var length = data.length; |
| if (dataIsSingleton(data)) { |
| return codePointToStringUnicode(data[0]); |
| } |
| while (index < length) { |
| start = data[index]; |
| end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive. |
| if (start == end) { |
| result += codePointToStringUnicode(start); |
| } else if (start + 1 == end) { |
| result += codePointToStringUnicode(start) + codePointToStringUnicode(end); |
| } else { |
| result += codePointToStringUnicode(start) + '-' + codePointToStringUnicode(end); |
| } |
| index += 2; |
| } |
| return '[' + result + ']'; |
| }; |
| |
| var splitAtBMP = function(data) { |
| // Iterate over the data per `(start, end)` pair. |
| var loneHighSurrogates = []; |
| var loneLowSurrogates = []; |
| var bmp = []; |
| var astral = []; |
| var index = 0; |
| var start; |
| var end; |
| var length = data.length; |
| while (index < length) { |
| start = data[index]; |
| end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive. |
| |
| if (start < HIGH_SURROGATE_MIN) { |
| |
| // The range starts and ends before the high surrogate range. |
| // E.g. (0, 0x10). |
| if (end < HIGH_SURROGATE_MIN) { |
| bmp.push(start, end + 1); |
| } |
| |
| // The range starts before the high surrogate range and ends within it. |
| // E.g. (0, 0xD855). |
| if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) { |
| bmp.push(start, HIGH_SURROGATE_MIN); |
| loneHighSurrogates.push(HIGH_SURROGATE_MIN, end + 1); |
| } |
| |
| // The range starts before the high surrogate range and ends in the low |
| // surrogate range. E.g. (0, 0xDCFF). |
| if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) { |
| bmp.push(start, HIGH_SURROGATE_MIN); |
| loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1); |
| loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1); |
| } |
| |
| // The range starts before the high surrogate range and ends after the |
| // low surrogate range. E.g. (0, 0x10FFFF). |
| if (end > LOW_SURROGATE_MAX) { |
| bmp.push(start, HIGH_SURROGATE_MIN); |
| loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1); |
| loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1); |
| if (end <= 0xFFFF) { |
| bmp.push(LOW_SURROGATE_MAX + 1, end + 1); |
| } else { |
| bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1); |
| astral.push(0xFFFF + 1, end + 1); |
| } |
| } |
| |
| } else if (start >= HIGH_SURROGATE_MIN && start <= HIGH_SURROGATE_MAX) { |
| |
| // The range starts and ends in the high surrogate range. |
| // E.g. (0xD855, 0xD866). |
| if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) { |
| loneHighSurrogates.push(start, end + 1); |
| } |
| |
| // The range starts in the high surrogate range and ends in the low |
| // surrogate range. E.g. (0xD855, 0xDCFF). |
| if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) { |
| loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1); |
| loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1); |
| } |
| |
| // The range starts in the high surrogate range and ends after the low |
| // surrogate range. E.g. (0xD855, 0x10FFFF). |
| if (end > LOW_SURROGATE_MAX) { |
| loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1); |
| loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1); |
| if (end <= 0xFFFF) { |
| bmp.push(LOW_SURROGATE_MAX + 1, end + 1); |
| } else { |
| bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1); |
| astral.push(0xFFFF + 1, end + 1); |
| } |
| } |
| |
| } else if (start >= LOW_SURROGATE_MIN && start <= LOW_SURROGATE_MAX) { |
| |
| // The range starts and ends in the low surrogate range. |
| // E.g. (0xDCFF, 0xDDFF). |
| if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) { |
| loneLowSurrogates.push(start, end + 1); |
| } |
| |
| // The range starts in the low surrogate range and ends after the low |
| // surrogate range. E.g. (0xDCFF, 0x10FFFF). |
| if (end > LOW_SURROGATE_MAX) { |
| loneLowSurrogates.push(start, LOW_SURROGATE_MAX + 1); |
| if (end <= 0xFFFF) { |
| bmp.push(LOW_SURROGATE_MAX + 1, end + 1); |
| } else { |
| bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1); |
| astral.push(0xFFFF + 1, end + 1); |
| } |
| } |
| |
| } else if (start > LOW_SURROGATE_MAX && start <= 0xFFFF) { |
| |
| // The range starts and ends after the low surrogate range. |
| // E.g. (0xFFAA, 0x10FFFF). |
| if (end <= 0xFFFF) { |
| bmp.push(start, end + 1); |
| } else { |
| bmp.push(start, 0xFFFF + 1); |
| astral.push(0xFFFF + 1, end + 1); |
| } |
| |
| } else { |
| |
| // The range starts and ends in the astral range. |
| astral.push(start, end + 1); |
| |
| } |
| |
| index += 2; |
| } |
| return { |
| 'loneHighSurrogates': loneHighSurrogates, |
| 'loneLowSurrogates': loneLowSurrogates, |
| 'bmp': bmp, |
| 'astral': astral |
| }; |
| }; |
| |
| var optimizeSurrogateMappings = function(surrogateMappings) { |
| var result = []; |
| var tmpLow = []; |
| var addLow = false; |
| var mapping; |
| var nextMapping; |
| var highSurrogates; |
| var lowSurrogates; |
| var nextHighSurrogates; |
| var nextLowSurrogates; |
| var index = -1; |
| var length = surrogateMappings.length; |
| while (++index < length) { |
| mapping = surrogateMappings[index]; |
| nextMapping = surrogateMappings[index + 1]; |
| if (!nextMapping) { |
| result.push(mapping); |
| continue; |
| } |
| highSurrogates = mapping[0]; |
| lowSurrogates = mapping[1]; |
| nextHighSurrogates = nextMapping[0]; |
| nextLowSurrogates = nextMapping[1]; |
| |
| // Check for identical high surrogate ranges. |
| tmpLow = lowSurrogates; |
| while ( |
| nextHighSurrogates && |
| highSurrogates[0] == nextHighSurrogates[0] && |
| highSurrogates[1] == nextHighSurrogates[1] |
| ) { |
| // Merge with the next item. |
| if (dataIsSingleton(nextLowSurrogates)) { |
| tmpLow = dataAdd(tmpLow, nextLowSurrogates[0]); |
| } else { |
| tmpLow = dataAddRange( |
| tmpLow, |
| nextLowSurrogates[0], |
| nextLowSurrogates[1] - 1 |
| ); |
| } |
| ++index; |
| mapping = surrogateMappings[index]; |
| highSurrogates = mapping[0]; |
| lowSurrogates = mapping[1]; |
| nextMapping = surrogateMappings[index + 1]; |
| nextHighSurrogates = nextMapping && nextMapping[0]; |
| nextLowSurrogates = nextMapping && nextMapping[1]; |
| addLow = true; |
| } |
| result.push([ |
| highSurrogates, |
| addLow ? tmpLow : lowSurrogates |
| ]); |
| addLow = false; |
| } |
| return optimizeByLowSurrogates(result); |
| }; |
| |
| var optimizeByLowSurrogates = function(surrogateMappings) { |
| if (surrogateMappings.length == 1) { |
| return surrogateMappings; |
| } |
| var index = -1; |
| var innerIndex = -1; |
| while (++index < surrogateMappings.length) { |
| var mapping = surrogateMappings[index]; |
| var lowSurrogates = mapping[1]; |
| var lowSurrogateStart = lowSurrogates[0]; |
| var lowSurrogateEnd = lowSurrogates[1]; |
| innerIndex = index; // Note: the loop starts at the next index. |
| while (++innerIndex < surrogateMappings.length) { |
| var otherMapping = surrogateMappings[innerIndex]; |
| var otherLowSurrogates = otherMapping[1]; |
| var otherLowSurrogateStart = otherLowSurrogates[0]; |
| var otherLowSurrogateEnd = otherLowSurrogates[1]; |
| if ( |
| lowSurrogateStart == otherLowSurrogateStart && |
| lowSurrogateEnd == otherLowSurrogateEnd && |
| otherLowSurrogates.length === 2 |
| ) { |
| // Add the code points in the other item to this one. |
| if (dataIsSingleton(otherMapping[0])) { |
| mapping[0] = dataAdd(mapping[0], otherMapping[0][0]); |
| } else { |
| mapping[0] = dataAddRange( |
| mapping[0], |
| otherMapping[0][0], |
| otherMapping[0][1] - 1 |
| ); |
| } |
| // Remove the other, now redundant, item. |
| surrogateMappings.splice(innerIndex, 1); |
| --innerIndex; |
| } |
| } |
| } |
| return surrogateMappings; |
| }; |
| |
| var surrogateSet = function(data) { |
| // Exit early if `data` is an empty set. |
| if (!data.length) { |
| return []; |
| } |
| |
| // Iterate over the data per `(start, end)` pair. |
| var index = 0; |
| var start; |
| var end; |
| var startHigh; |
| var startLow; |
| var endHigh; |
| var endLow; |
| var surrogateMappings = []; |
| var length = data.length; |
| while (index < length) { |
| start = data[index]; |
| end = data[index + 1] - 1; |
| |
| startHigh = highSurrogate(start); |
| startLow = lowSurrogate(start); |
| endHigh = highSurrogate(end); |
| endLow = lowSurrogate(end); |
| |
| var startsWithLowestLowSurrogate = startLow == LOW_SURROGATE_MIN; |
| var endsWithHighestLowSurrogate = endLow == LOW_SURROGATE_MAX; |
| var complete = false; |
| |
| // Append the previous high-surrogate-to-low-surrogate mappings. |
| // Step 1: `(startHigh, startLow)` to `(startHigh, LOW_SURROGATE_MAX)`. |
| if ( |
| startHigh == endHigh || |
| startsWithLowestLowSurrogate && endsWithHighestLowSurrogate |
| ) { |
| surrogateMappings.push([ |
| [startHigh, endHigh + 1], |
| [startLow, endLow + 1] |
| ]); |
| complete = true; |
| } else { |
| surrogateMappings.push([ |
| [startHigh, startHigh + 1], |
| [startLow, LOW_SURROGATE_MAX + 1] |
| ]); |
| } |
| |
| // Step 2: `(startHigh + 1, LOW_SURROGATE_MIN)` to |
| // `(endHigh - 1, LOW_SURROGATE_MAX)`. |
| if (!complete && startHigh + 1 < endHigh) { |
| if (endsWithHighestLowSurrogate) { |
| // Combine step 2 and step 3. |
| surrogateMappings.push([ |
| [startHigh + 1, endHigh + 1], |
| [LOW_SURROGATE_MIN, endLow + 1] |
| ]); |
| complete = true; |
| } else { |
| surrogateMappings.push([ |
| [startHigh + 1, endHigh], |
| [LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1] |
| ]); |
| } |
| } |
| |
| // Step 3. `(endHigh, LOW_SURROGATE_MIN)` to `(endHigh, endLow)`. |
| if (!complete) { |
| surrogateMappings.push([ |
| [endHigh, endHigh + 1], |
| [LOW_SURROGATE_MIN, endLow + 1] |
| ]); |
| } |
| |
| index += 2; |
| } |
| |
| // The format of `surrogateMappings` is as follows: |
| // |
| // [ surrogateMapping1, surrogateMapping2 ] |
| // |
| // i.e.: |
| // |
| // [ |
| // [ highSurrogates1, lowSurrogates1 ], |
| // [ highSurrogates2, lowSurrogates2 ] |
| // ] |
| return optimizeSurrogateMappings(surrogateMappings); |
| }; |
| |
| var createSurrogateCharacterClasses = function(surrogateMappings) { |
| var result = []; |
| forEach(surrogateMappings, function(surrogateMapping) { |
| var highSurrogates = surrogateMapping[0]; |
| var lowSurrogates = surrogateMapping[1]; |
| result.push( |
| createBMPCharacterClasses(highSurrogates) + |
| createBMPCharacterClasses(lowSurrogates) |
| ); |
| }); |
| return result.join('|'); |
| }; |
| |
| var createCharacterClassesFromData = function(data, bmpOnly, hasUnicodeFlag) { |
| if (hasUnicodeFlag) { |
| return createUnicodeCharacterClasses(data); |
| } |
| var result = []; |
| |
| var parts = splitAtBMP(data); |
| var loneHighSurrogates = parts.loneHighSurrogates; |
| var loneLowSurrogates = parts.loneLowSurrogates; |
| var bmp = parts.bmp; |
| var astral = parts.astral; |
| var hasLoneHighSurrogates = !dataIsEmpty(loneHighSurrogates); |
| var hasLoneLowSurrogates = !dataIsEmpty(loneLowSurrogates); |
| |
| var surrogateMappings = surrogateSet(astral); |
| |
| if (bmpOnly) { |
| bmp = dataAddData(bmp, loneHighSurrogates); |
| hasLoneHighSurrogates = false; |
| bmp = dataAddData(bmp, loneLowSurrogates); |
| hasLoneLowSurrogates = false; |
| } |
| |
| if (!dataIsEmpty(bmp)) { |
| // The data set contains BMP code points that are not high surrogates |
| // needed for astral code points in the set. |
| result.push(createBMPCharacterClasses(bmp)); |
| } |
| if (surrogateMappings.length) { |
| // The data set contains astral code points; append character classes |
| // based on their surrogate pairs. |
| result.push(createSurrogateCharacterClasses(surrogateMappings)); |
| } |
| // https://gist.github.com/mathiasbynens/bbe7f870208abcfec860 |
| if (hasLoneHighSurrogates) { |
| result.push( |
| createBMPCharacterClasses(loneHighSurrogates) + |
| // Make sure the high surrogates aren’t part of a surrogate pair. |
| '(?![\\uDC00-\\uDFFF])' |
| ); |
| } |
| if (hasLoneLowSurrogates) { |
| result.push( |
| // It is not possible to accurately assert the low surrogates aren’t |
| // part of a surrogate pair, since JavaScript regular expressions do |
| // not support lookbehind. |
| '(?:[^\\uD800-\\uDBFF]|^)' + |
| createBMPCharacterClasses(loneLowSurrogates) |
| ); |
| } |
| return result.join('|'); |
| }; |
| |
| /*--------------------------------------------------------------------------*/ |
| |
| // `regenerate` can be used as a constructor (and new methods can be added to |
| // its prototype) but also as a regular function, the latter of which is the |
| // documented and most common usage. For that reason, it’s not capitalized. |
| var regenerate = function(value) { |
| if (arguments.length > 1) { |
| value = slice.call(arguments); |
| } |
| if (this instanceof regenerate) { |
| this.data = []; |
| return value ? this.add(value) : this; |
| } |
| return (new regenerate).add(value); |
| }; |
| |
| regenerate.version = '1.4.2'; |
| |
| var proto = regenerate.prototype; |
| extend(proto, { |
| 'add': function(value) { |
| var $this = this; |
| if (value == null) { |
| return $this; |
| } |
| if (value instanceof regenerate) { |
| // Allow passing other Regenerate instances. |
| $this.data = dataAddData($this.data, value.data); |
| return $this; |
| } |
| if (arguments.length > 1) { |
| value = slice.call(arguments); |
| } |
| if (isArray(value)) { |
| forEach(value, function(item) { |
| $this.add(item); |
| }); |
| return $this; |
| } |
| $this.data = dataAdd( |
| $this.data, |
| isNumber(value) ? value : symbolToCodePoint(value) |
| ); |
| return $this; |
| }, |
| 'remove': function(value) { |
| var $this = this; |
| if (value == null) { |
| return $this; |
| } |
| if (value instanceof regenerate) { |
| // Allow passing other Regenerate instances. |
| $this.data = dataRemoveData($this.data, value.data); |
| return $this; |
| } |
| if (arguments.length > 1) { |
| value = slice.call(arguments); |
| } |
| if (isArray(value)) { |
| forEach(value, function(item) { |
| $this.remove(item); |
| }); |
| return $this; |
| } |
| $this.data = dataRemove( |
| $this.data, |
| isNumber(value) ? value : symbolToCodePoint(value) |
| ); |
| return $this; |
| }, |
| 'addRange': function(start, end) { |
| var $this = this; |
| $this.data = dataAddRange($this.data, |
| isNumber(start) ? start : symbolToCodePoint(start), |
| isNumber(end) ? end : symbolToCodePoint(end) |
| ); |
| return $this; |
| }, |
| 'removeRange': function(start, end) { |
| var $this = this; |
| var startCodePoint = isNumber(start) ? start : symbolToCodePoint(start); |
| var endCodePoint = isNumber(end) ? end : symbolToCodePoint(end); |
| $this.data = dataRemoveRange( |
| $this.data, |
| startCodePoint, |
| endCodePoint |
| ); |
| return $this; |
| }, |
| 'intersection': function(argument) { |
| var $this = this; |
| // Allow passing other Regenerate instances. |
| // TODO: Optimize this by writing and using `dataIntersectionData()`. |
| var array = argument instanceof regenerate ? |
| dataToArray(argument.data) : |
| argument; |
| $this.data = dataIntersection($this.data, array); |
| return $this; |
| }, |
| 'contains': function(codePoint) { |
| return dataContains( |
| this.data, |
| isNumber(codePoint) ? codePoint : symbolToCodePoint(codePoint) |
| ); |
| }, |
| 'clone': function() { |
| var set = new regenerate; |
| set.data = this.data.slice(0); |
| return set; |
| }, |
| 'toString': function(options) { |
| var result = createCharacterClassesFromData( |
| this.data, |
| options ? options.bmpOnly : false, |
| options ? options.hasUnicodeFlag : false |
| ); |
| if (!result) { |
| // For an empty set, return something that can be inserted `/here/` to |
| // form a valid regular expression. Avoid `(?:)` since that matches the |
| // empty string. |
| return '[]'; |
| } |
| // Use `\0` instead of `\x00` where possible. |
| return result.replace(regexNull, '\\0$1'); |
| }, |
| 'toRegExp': function(flags) { |
| var pattern = this.toString( |
| flags && flags.indexOf('u') != -1 ? |
| { 'hasUnicodeFlag': true } : |
| null |
| ); |
| return RegExp(pattern, flags || ''); |
| }, |
| 'valueOf': function() { // Note: `valueOf` is aliased as `toArray`. |
| return dataToArray(this.data); |
| } |
| }); |
| |
| proto.toArray = proto.valueOf; |
| |
| // Some AMD build optimizers, like r.js, check for specific condition patterns |
| // like the following: |
| if ( |
| typeof define == 'function' && |
| typeof define.amd == 'object' && |
| define.amd |
| ) { |
| define(function() { |
| return regenerate; |
| }); |
| } else if (freeExports && !freeExports.nodeType) { |
| if (freeModule) { // in Node.js, io.js, or RingoJS v0.8.0+ |
| freeModule.exports = regenerate; |
| } else { // in Narwhal or RingoJS v0.7.0- |
| freeExports.regenerate = regenerate; |
| } |
| } else { // in Rhino or a web browser |
| root.regenerate = regenerate; |
| } |
| |
| }(this)); |