| var util = require('./util'); |
| var types = require('./types'); |
| var sets = require('./sets'); |
| var positions = require('./positions'); |
| |
| |
| module.exports = function(regexpStr) { |
| var i = 0, l, c, |
| start = { type: types.ROOT, stack: []}, |
| |
| // Keep track of last clause/group and stack. |
| lastGroup = start, |
| last = start.stack, |
| groupStack = []; |
| |
| |
| var repeatErr = function(i) { |
| util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1)); |
| }; |
| |
| // Decode a few escaped characters. |
| var str = util.strToChars(regexpStr); |
| l = str.length; |
| |
| // Iterate through each character in string. |
| while (i < l) { |
| c = str[i++]; |
| |
| switch (c) { |
| // Handle escaped characters, inclues a few sets. |
| case '\\': |
| c = str[i++]; |
| |
| switch (c) { |
| case 'b': |
| last.push(positions.wordBoundary()); |
| break; |
| |
| case 'B': |
| last.push(positions.nonWordBoundary()); |
| break; |
| |
| case 'w': |
| last.push(sets.words()); |
| break; |
| |
| case 'W': |
| last.push(sets.notWords()); |
| break; |
| |
| case 'd': |
| last.push(sets.ints()); |
| break; |
| |
| case 'D': |
| last.push(sets.notInts()); |
| break; |
| |
| case 's': |
| last.push(sets.whitespace()); |
| break; |
| |
| case 'S': |
| last.push(sets.notWhitespace()); |
| break; |
| |
| default: |
| // Check if c is integer. |
| // In which case it's a reference. |
| if (/\d/.test(c)) { |
| last.push({ type: types.REFERENCE, value: parseInt(c, 10) }); |
| |
| // Escaped character. |
| } else { |
| last.push({ type: types.CHAR, value: c.charCodeAt(0) }); |
| } |
| } |
| |
| break; |
| |
| |
| // Positionals. |
| case '^': |
| last.push(positions.begin()); |
| break; |
| |
| case '$': |
| last.push(positions.end()); |
| break; |
| |
| |
| // Handle custom sets. |
| case '[': |
| // Check if this class is 'anti' i.e. [^abc]. |
| var not; |
| if (str[i] === '^') { |
| not = true; |
| i++; |
| } else { |
| not = false; |
| } |
| |
| // Get all the characters in class. |
| var classTokens = util.tokenizeClass(str.slice(i), regexpStr); |
| |
| // Increase index by length of class. |
| i += classTokens[1]; |
| last.push({ |
| type: types.SET, |
| set: classTokens[0], |
| not: not, |
| }); |
| |
| break; |
| |
| |
| // Class of any character except \n. |
| case '.': |
| last.push(sets.anyChar()); |
| break; |
| |
| |
| // Push group onto stack. |
| case '(': |
| // Create group. |
| var group = { |
| type: types.GROUP, |
| stack: [], |
| remember: true, |
| }; |
| |
| c = str[i]; |
| |
| // If if this is a special kind of group. |
| if (c === '?') { |
| c = str[i + 1]; |
| i += 2; |
| |
| // Match if followed by. |
| if (c === '=') { |
| group.followedBy = true; |
| |
| // Match if not followed by. |
| } else if (c === '!') { |
| group.notFollowedBy = true; |
| |
| } else if (c !== ':') { |
| util.error(regexpStr, |
| 'Invalid group, character \'' + c + |
| '\' after \'?\' at column ' + (i - 1)); |
| } |
| |
| group.remember = false; |
| } |
| |
| // Insert subgroup into current group stack. |
| last.push(group); |
| |
| // Remember the current group for when the group closes. |
| groupStack.push(lastGroup); |
| |
| // Make this new group the current group. |
| lastGroup = group; |
| last = group.stack; |
| break; |
| |
| |
| // Pop group out of stack. |
| case ')': |
| if (groupStack.length === 0) { |
| util.error(regexpStr, 'Unmatched ) at column ' + (i - 1)); |
| } |
| lastGroup = groupStack.pop(); |
| |
| // Check if this group has a PIPE. |
| // To get back the correct last stack. |
| last = lastGroup.options ? |
| lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack; |
| break; |
| |
| |
| // Use pipe character to give more choices. |
| case '|': |
| // Create array where options are if this is the first PIPE |
| // in this clause. |
| if (!lastGroup.options) { |
| lastGroup.options = [lastGroup.stack]; |
| delete lastGroup.stack; |
| } |
| |
| // Create a new stack and add to options for rest of clause. |
| var stack = []; |
| lastGroup.options.push(stack); |
| last = stack; |
| break; |
| |
| |
| // Repetition. |
| // For every repetition, remove last element from last stack |
| // then insert back a RANGE object. |
| // This design is chosen because there could be more than |
| // one repetition symbols in a regex i.e. `a?+{2,3}`. |
| case '{': |
| var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max; |
| if (rs !== null) { |
| if (last.length === 0) { |
| repeatErr(i); |
| } |
| min = parseInt(rs[1], 10); |
| max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min; |
| i += rs[0].length; |
| |
| last.push({ |
| type: types.REPETITION, |
| min: min, |
| max: max, |
| value: last.pop(), |
| }); |
| } else { |
| last.push({ |
| type: types.CHAR, |
| value: 123, |
| }); |
| } |
| break; |
| |
| case '?': |
| if (last.length === 0) { |
| repeatErr(i); |
| } |
| last.push({ |
| type: types.REPETITION, |
| min: 0, |
| max: 1, |
| value: last.pop(), |
| }); |
| break; |
| |
| case '+': |
| if (last.length === 0) { |
| repeatErr(i); |
| } |
| last.push({ |
| type: types.REPETITION, |
| min: 1, |
| max: Infinity, |
| value: last.pop(), |
| }); |
| break; |
| |
| case '*': |
| if (last.length === 0) { |
| repeatErr(i); |
| } |
| last.push({ |
| type: types.REPETITION, |
| min: 0, |
| max: Infinity, |
| value: last.pop(), |
| }); |
| break; |
| |
| |
| // Default is a character that is not `\[](){}?+*^$`. |
| default: |
| last.push({ |
| type: types.CHAR, |
| value: c.charCodeAt(0), |
| }); |
| } |
| |
| } |
| |
| // Check if any groups have not been closed. |
| if (groupStack.length !== 0) { |
| util.error(regexpStr, 'Unterminated group'); |
| } |
| |
| return start; |
| }; |
| |
| module.exports.types = types; |