| var types = require('./types'); |
| var sets = require('./sets'); |
| |
| |
| // All of these are private and only used by randexp. |
| // It's assumed that they will always be called with the correct input. |
| |
| var CTRL = '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^ ?'; |
| var SLSH = { '0': 0, 't': 9, 'n': 10, 'v': 11, 'f': 12, 'r': 13 }; |
| |
| /** |
| * Finds character representations in str and convert all to |
| * their respective characters |
| * |
| * @param {String} str |
| * @return {String} |
| */ |
| exports.strToChars = function(str) { |
| /* jshint maxlen: false */ |
| var chars_regex = /(\[\\b\])|(\\)?\\(?:u([A-F0-9]{4})|x([A-F0-9]{2})|(0?[0-7]{2})|c([@A-Z\[\\\]\^?])|([0tnvfr]))/g; |
| str = str.replace(chars_regex, function(s, b, lbs, a16, b16, c8, dctrl, eslsh) { |
| if (lbs) { |
| return s; |
| } |
| |
| var code = b ? 8 : |
| a16 ? parseInt(a16, 16) : |
| b16 ? parseInt(b16, 16) : |
| c8 ? parseInt(c8, 8) : |
| dctrl ? CTRL.indexOf(dctrl) : |
| SLSH[eslsh]; |
| |
| var c = String.fromCharCode(code); |
| |
| // Escape special regex characters. |
| if (/[\[\]{}\^$.|?*+()]/.test(c)) { |
| c = '\\' + c; |
| } |
| |
| return c; |
| }); |
| |
| return str; |
| }; |
| |
| |
| /** |
| * turns class into tokens |
| * reads str until it encounters a ] not preceeded by a \ |
| * |
| * @param {String} str |
| * @param {String} regexpStr |
| * @return {Array.<Array.<Object>, Number>} |
| */ |
| exports.tokenizeClass = function(str, regexpStr) { |
| /* jshint maxlen: false */ |
| var tokens = []; |
| var regexp = /\\(?:(w)|(d)|(s)|(W)|(D)|(S))|((?:(?:\\)(.)|([^\]\\]))-(?:\\)?([^\]]))|(\])|(?:\\)?(.)/g; |
| var rs, c; |
| |
| |
| while ((rs = regexp.exec(str)) != null) { |
| if (rs[1]) { |
| tokens.push(sets.words()); |
| |
| } else if (rs[2]) { |
| tokens.push(sets.ints()); |
| |
| } else if (rs[3]) { |
| tokens.push(sets.whitespace()); |
| |
| } else if (rs[4]) { |
| tokens.push(sets.notWords()); |
| |
| } else if (rs[5]) { |
| tokens.push(sets.notInts()); |
| |
| } else if (rs[6]) { |
| tokens.push(sets.notWhitespace()); |
| |
| } else if (rs[7]) { |
| tokens.push({ |
| type: types.RANGE, |
| from: (rs[8] || rs[9]).charCodeAt(0), |
| to: rs[10].charCodeAt(0), |
| }); |
| |
| } else if (c = rs[12]) { |
| tokens.push({ |
| type: types.CHAR, |
| value: c.charCodeAt(0), |
| }); |
| |
| } else { |
| return [tokens, regexp.lastIndex]; |
| } |
| } |
| |
| exports.error(regexpStr, 'Unterminated character class'); |
| }; |
| |
| |
| /** |
| * Shortcut to throw errors. |
| * |
| * @param {String} regexp |
| * @param {String} msg |
| */ |
| exports.error = function(regexp, msg) { |
| throw new SyntaxError('Invalid regular expression: /' + regexp + '/: ' + msg); |
| }; |