node_modules/css-tree/lib/tokenizer/index.js - nifi-fds - Git at Google

 var TokenStream = require('../common/TokenStream');
 var adoptBuffer = require('../common/adopt-buffer');

 var constants = require('./const');
 var TYPE = constants.TYPE;

 var charCodeDefinitions = require('./char-code-definitions');
 var isNewline = charCodeDefinitions.isNewline;
 var isName = charCodeDefinitions.isName;
 var isValidEscape = charCodeDefinitions.isValidEscape;
 var isNumberStart = charCodeDefinitions.isNumberStart;
 var isIdentifierStart = charCodeDefinitions.isIdentifierStart;
 var charCodeCategory = charCodeDefinitions.charCodeCategory;
 var isBOM = charCodeDefinitions.isBOM;

 var utils = require('./utils');
 var cmpStr = utils.cmpStr;
 var getNewlineLength = utils.getNewlineLength;
 var findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
 var consumeEscaped = utils.consumeEscaped;
 var consumeName = utils.consumeName;
 var consumeNumber = utils.consumeNumber;
 var consumeBadUrlRemnants = utils.consumeBadUrlRemnants;

 var OFFSET_MASK = 0x00FFFFFF;
 var TYPE_SHIFT = 24;

 function tokenize(source, stream) {
     function getCharCode(offset) {
         return offset < sourceLength ? source.charCodeAt(offset) : 0;
     }

     // § 4.3.3. Consume a numeric token
     function consumeNumericToken() {
         // Consume a number and let number be the result.
         offset = consumeNumber(source, offset);

         // If the next 3 input code points would start an identifier, then:
         if (isIdentifierStart(getCharCode(offset), getCharCode(offset + 1), getCharCode(offset + 2))) {
             // Create a <dimension-token> with the same value and type flag as number, and a unit set initially to the empty string.
             // Consume a name. Set the <dimension-token>’s unit to the returned value.
             // Return the <dimension-token>.
             type = TYPE.Dimension;
             offset = consumeName(source, offset);
             return;
         }

         // Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it.
         if (getCharCode(offset) === 0x0025) {
             // Create a <percentage-token> with the same value as number, and return it.
             type = TYPE.Percentage;
             offset++;
             return;
         }

         // Otherwise, create a <number-token> with the same value and type flag as number, and return it.
         type = TYPE.Number;
     }

     // § 4.3.4. Consume an ident-like token
     function consumeIdentLikeToken() {
         const nameStartOffset = offset;

         // Consume a name, and let string be the result.
         offset = consumeName(source, offset);

         // If string’s value is an ASCII case-insensitive match for "url",
         // and the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
         if (cmpStr(source, nameStartOffset, offset, 'url') && getCharCode(offset) === 0x0028) {
             // While the next two input code points are whitespace, consume the next input code point.
             offset = findWhiteSpaceEnd(source, offset + 1);

             // If the next one or two input code points are U+0022 QUOTATION MARK ("), U+0027 APOSTROPHE ('),
             // or whitespace followed by U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE ('),
             // then create a <function-token> with its value set to string and return it.
             if (getCharCode(offset) === 0x0022 ||
                 getCharCode(offset) === 0x0027) {
                 type = TYPE.Function;
                 offset = nameStartOffset + 4;
                 return;
             }

             // Otherwise, consume a url token, and return it.
             consumeUrlToken();
             return;
         }

         // Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
         // Create a <function-token> with its value set to string and return it.
         if (getCharCode(offset) === 0x0028) {
             type = TYPE.Function;
             offset++;
             return;
         }

         // Otherwise, create an <ident-token> with its value set to string and return it.
         type = TYPE.Ident;
     }

     // § 4.3.5. Consume a string token
     function consumeStringToken(endingCodePoint) {
         // This algorithm may be called with an ending code point, which denotes the code point
         // that ends the string. If an ending code point is not specified,
         // the current input code point is used.
         if (!endingCodePoint) {
             endingCodePoint = getCharCode(offset++);
         }

         // Initially create a <string-token> with its value set to the empty string.
         type = TYPE.String;

         // Repeatedly consume the next input code point from the stream:
         for (; offset < source.length; offset++) {
             var code = source.charCodeAt(offset);

             switch (charCodeCategory(code)) {
                 // ending code point
                 case endingCodePoint:
                     // Return the <string-token>.
                     offset++;
                     return;

                 // EOF
                 case charCodeCategory.Eof:
                     // This is a parse error. Return the <string-token>.
                     return;

                 // newline
                 case charCodeCategory.WhiteSpace:
                     if (isNewline(code)) {
                         // This is a parse error. Reconsume the current input code point,
                         // create a <bad-string-token>, and return it.
                         offset += getNewlineLength(source, offset, code);
                         type = TYPE.BadString;
                         return;
                     }
                     break;

                 // U+005C REVERSE SOLIDUS (\)
                 case 0x005C:
                     // If the next input code point is EOF, do nothing.
                     if (offset === source.length - 1) {
                         break;
                     }

                     var nextCode = getCharCode(offset + 1);

                     // Otherwise, if the next input code point is a newline, consume it.
                     if (isNewline(nextCode)) {
                         offset += getNewlineLength(source, offset + 1, nextCode);
                     } else if (isValidEscape(code, nextCode)) {
                         // Otherwise, (the stream starts with a valid escape) consume
                         // an escaped code point and append the returned code point to
                         // the <string-token>’s value.
                         offset = consumeEscaped(source, offset) - 1;
                     }
                     break;

                 // anything else
                 // Append the current input code point to the <string-token>’s value.
             }
         }
     }

     // § 4.3.6. Consume a url token
     // Note: This algorithm assumes that the initial "url(" has already been consumed.
     // This algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo).
     // A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like token
     // automatically handles this distinction; this algorithm shouldn’t be called directly otherwise.
     function consumeUrlToken() {
         // Initially create a <url-token> with its value set to the empty string.
         type = TYPE.Url;

         // Consume as much whitespace as possible.
         offset = findWhiteSpaceEnd(source, offset);

         // Repeatedly consume the next input code point from the stream:
         for (; offset < source.length; offset++) {
             var code = source.charCodeAt(offset);

             switch (charCodeCategory(code)) {
                 // U+0029 RIGHT PARENTHESIS ())
                 case 0x0029:
                     // Return the <url-token>.
                     offset++;
                     return;

                 // EOF
                 case charCodeCategory.Eof:
                     // This is a parse error. Return the <url-token>.
                     return;

                 // whitespace
                 case charCodeCategory.WhiteSpace:
                     // Consume as much whitespace as possible.
                     offset = findWhiteSpaceEnd(source, offset);

                     // If the next input code point is U+0029 RIGHT PARENTHESIS ()) or EOF,
                     // consume it and return the <url-token>
                     // (if EOF was encountered, this is a parse error);
                     if (getCharCode(offset) === 0x0029 || offset >= source.length) {
                         if (offset < source.length) {
                             offset++;
                         }
                         return;
                     }

                     // otherwise, consume the remnants of a bad url, create a <bad-url-token>,
                     // and return it.
                     offset = consumeBadUrlRemnants(source, offset);
                     type = TYPE.BadUrl;
                     return;

                 // U+0022 QUOTATION MARK (")
                 // U+0027 APOSTROPHE (')
                 // U+0028 LEFT PARENTHESIS (()
                 // non-printable code point
                 case 0x0022:
                 case 0x0027:
                 case 0x0028:
                 case charCodeCategory.NonPrintable:
                     // This is a parse error. Consume the remnants of a bad url,
                     // create a <bad-url-token>, and return it.
                     offset = consumeBadUrlRemnants(source, offset);
                     type = TYPE.BadUrl;
                     return;

                 // U+005C REVERSE SOLIDUS (\)
                 case 0x005C:
                     // If the stream starts with a valid escape, consume an escaped code point and
                     // append the returned code point to the <url-token>’s value.
                     if (isValidEscape(code, getCharCode(offset + 1))) {
                         offset = consumeEscaped(source, offset) - 1;
                         break;
                     }

                     // Otherwise, this is a parse error. Consume the remnants of a bad url,
                     // create a <bad-url-token>, and return it.
                     offset = consumeBadUrlRemnants(source, offset);
                     type = TYPE.BadUrl;
                     return;

                 // anything else
                 // Append the current input code point to the <url-token>’s value.
             }
         }
     }

     if (!stream) {
         stream = new TokenStream();
     }

     // ensure source is a string
     source = String(source || '');

     var sourceLength = source.length;
     var offsetAndType = adoptBuffer(stream.offsetAndType, sourceLength + 1); // +1 because of eof-token
     var balance = adoptBuffer(stream.balance, sourceLength + 1);
     var tokenCount = 0;
     var start = isBOM(getCharCode(0));
     var offset = start;
     var balanceCloseType = 0;
     var balanceStart = 0;
     var balancePrev = 0;

     // https://drafts.csswg.org/css-syntax-3/#consume-token
     // § 4.3.1. Consume a token
     while (offset < sourceLength) {
         var code = source.charCodeAt(offset);
         var type = 0;

         balance[tokenCount] = sourceLength;

         switch (charCodeCategory(code)) {
             // whitespace
             case charCodeCategory.WhiteSpace:
                 // Consume as much whitespace as possible. Return a <whitespace-token>.
                 type = TYPE.WhiteSpace;
                 offset = findWhiteSpaceEnd(source, offset + 1);
                 break;

             // U+0022 QUOTATION MARK (")
             case 0x0022:
                 // Consume a string token and return it.
                 consumeStringToken();
                 break;

             // U+0023 NUMBER SIGN (#)
             case 0x0023:
                 // If the next input code point is a name code point or the next two input code points are a valid escape, then:
                 if (isName(getCharCode(offset + 1)) || isValidEscape(getCharCode(offset + 1), getCharCode(offset + 2))) {
                     // Create a <hash-token>.
                     type = TYPE.Hash;

                     // If the next 3 input code points would start an identifier, set the <hash-token>’s type flag to "id".
                     // if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
                     //     // TODO: set id flag
                     // }

                     // Consume a name, and set the <hash-token>’s value to the returned string.
                     offset = consumeName(source, offset + 1);

                     // Return the <hash-token>.
                 } else {
                     // Otherwise, return a <delim-token> with its value set to the current input code point.
                     type = TYPE.Delim;
                     offset++;
                 }

                 break;

             // U+0027 APOSTROPHE (')
             case 0x0027:
                 // Consume a string token and return it.
                 consumeStringToken();
                 break;

             // U+0028 LEFT PARENTHESIS (()
             case 0x0028:
                 // Return a <(-token>.
                 type = TYPE.LeftParenthesis;
                 offset++;
                 break;

             // U+0029 RIGHT PARENTHESIS ())
             case 0x0029:
                 // Return a <)-token>.
                 type = TYPE.RightParenthesis;
                 offset++;
                 break;

             // U+002B PLUS SIGN (+)
             case 0x002B:
                 // If the input stream starts with a number, ...
                 if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
                     // ... reconsume the current input code point, consume a numeric token, and return it.
                     consumeNumericToken();
                 } else {
                     // Otherwise, return a <delim-token> with its value set to the current input code point.
                     type = TYPE.Delim;
                     offset++;
                 }
                 break;

             // U+002C COMMA (,)
             case 0x002C:
                 // Return a <comma-token>.
                 type = TYPE.Comma;
                 offset++;
                 break;

             // U+002D HYPHEN-MINUS (-)
             case 0x002D:
                 // If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it.
                 if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
                     consumeNumericToken();
                 } else {
                     // Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E GREATER-THAN SIGN (->), consume them and return a <CDC-token>.
                     if (getCharCode(offset + 1) === 0x002D &&
                         getCharCode(offset + 2) === 0x003E) {
                         type = TYPE.CDC;
                         offset = offset + 3;
                     } else {
                         // Otherwise, if the input stream starts with an identifier, ...
                         if (isIdentifierStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
                             // ... reconsume the current input code point, consume an ident-like token, and return it.
                             consumeIdentLikeToken();
                         } else {
                             // Otherwise, return a <delim-token> with its value set to the current input code point.
                             type = TYPE.Delim;
                             offset++;
                         }
                     }
                 }
                 break;

             // U+002E FULL STOP (.)
             case 0x002E:
                 // If the input stream starts with a number, ...
                 if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
                     // ... reconsume the current input code point, consume a numeric token, and return it.
                     consumeNumericToken();
                 } else {
                     // Otherwise, return a <delim-token> with its value set to the current input code point.
                     type = TYPE.Delim;
                     offset++;
                 }

                 break;

             // U+002F SOLIDUS (/)
             case 0x002F:
                 // If the next two input code point are U+002F SOLIDUS (/) followed by a U+002A ASTERISK (*),
                 if (getCharCode(offset + 1) === 0x002A) {
                     // ... consume them and all following code points up to and including the first U+002A ASTERISK (*)
                     // followed by a U+002F SOLIDUS (/), or up to an EOF code point.
                     type = TYPE.Comment;
                     offset = source.indexOf('*/', offset + 2) + 2;
                     if (offset === 1) {
                         offset = source.length;
                     }
                 } else {
                     type = TYPE.Delim;
                     offset++;
                 }
                 break;

             // U+003A COLON (:)
             case 0x003A:
                 // Return a <colon-token>.
                 type = TYPE.Colon;
                 offset++;
                 break;

             // U+003B SEMICOLON (;)
             case 0x003B:
                 // Return a <semicolon-token>.
                 type = TYPE.Semicolon;
                 offset++;
                 break;

             // U+003C LESS-THAN SIGN (<)
             case 0x003C:
                 // If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), ...
                 if (getCharCode(offset + 1) === 0x0021 &&
                     getCharCode(offset + 2) === 0x002D &&
                     getCharCode(offset + 3) === 0x002D) {
                     // ... consume them and return a <CDO-token>.
                     type = TYPE.CDO;
                     offset = offset + 4;
                 } else {
                     // Otherwise, return a <delim-token> with its value set to the current input code point.
                     type = TYPE.Delim;
                     offset++;
                 }

                 break;

             // U+0040 COMMERCIAL AT (@)
             case 0x0040:
                 // If the next 3 input code points would start an identifier, ...
                 if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
                     // ... consume a name, create an <at-keyword-token> with its value set to the returned value, and return it.
                     type = TYPE.AtKeyword;
                     offset = consumeName(source, offset + 1);
                 } else {
                     // Otherwise, return a <delim-token> with its value set to the current input code point.
                     type = TYPE.Delim;
                     offset++;
                 }

                 break;

             // U+005B LEFT SQUARE BRACKET ([)
             case 0x005B:
                 // Return a <[-token>.
                 type = TYPE.LeftSquareBracket;
                 offset++;
                 break;

             // U+005C REVERSE SOLIDUS (\)
             case 0x005C:
                 // If the input stream starts with a valid escape, ...
                 if (isValidEscape(code, getCharCode(offset + 1))) {
                     // ... reconsume the current input code point, consume an ident-like token, and return it.
                     consumeIdentLikeToken();
                 } else {
                     // Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point.
                     type = TYPE.Delim;
                     offset++;
                 }
                 break;

             // U+005D RIGHT SQUARE BRACKET (])
             case 0x005D:
                 // Return a <]-token>.
                 type = TYPE.RightSquareBracket;
                 offset++;
                 break;

             // U+007B LEFT CURLY BRACKET ({)
             case 0x007B:
                 // Return a <{-token>.
                 type = TYPE.LeftCurlyBracket;
                 offset++;
                 break;

             // U+007D RIGHT CURLY BRACKET (})
             case 0x007D:
                 // Return a <}-token>.
                 type = TYPE.RightCurlyBracket;
                 offset++;
                 break;

             // digit
             case charCodeCategory.Digit:
                 // Reconsume the current input code point, consume a numeric token, and return it.
                 consumeNumericToken();
                 break;

             // name-start code point
             case charCodeCategory.NameStart:
                 // Reconsume the current input code point, consume an ident-like token, and return it.
                 consumeIdentLikeToken();
                 break;

             // EOF
             case charCodeCategory.Eof:
                 // Return an <EOF-token>.
                 break;

             // anything else
             default:
                 // Return a <delim-token> with its value set to the current input code point.
                 type = TYPE.Delim;
                 offset++;
         }

         switch (type) {
             case balanceCloseType:
                 balancePrev = balanceStart & OFFSET_MASK;
                 balanceStart = balance[balancePrev];
                 balanceCloseType = balanceStart >> TYPE_SHIFT;
                 balance[tokenCount] = balancePrev;
                 balance[balancePrev++] = tokenCount;
                 for (; balancePrev < tokenCount; balancePrev++) {
                     if (balance[balancePrev] === sourceLength) {
                         balance[balancePrev] = tokenCount;
                     }
                 }
                 break;

             case TYPE.LeftParenthesis:
             case TYPE.Function:
                 balance[tokenCount] = balanceStart;
                 balanceCloseType = TYPE.RightParenthesis;
                 balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
                 break;

             case TYPE.LeftSquareBracket:
                 balance[tokenCount] = balanceStart;
                 balanceCloseType = TYPE.RightSquareBracket;
                 balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
                 break;

             case TYPE.LeftCurlyBracket:
                 balance[tokenCount] = balanceStart;
                 balanceCloseType = TYPE.RightCurlyBracket;
                 balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
                 break;
         }

         offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | offset;
     }

     // finalize buffers
     offsetAndType[tokenCount] = (TYPE.EOF << TYPE_SHIFT) | offset; // <EOF-token>
     balance[tokenCount] = sourceLength;
     balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
     while (balanceStart !== 0) {
         balancePrev = balanceStart & OFFSET_MASK;
         balanceStart = balance[balancePrev];
         balance[balancePrev] = sourceLength;
     }

     // update stream
     stream.source = source;
     stream.firstCharOffset = start;
     stream.offsetAndType = offsetAndType;
     stream.tokenCount = tokenCount;
     stream.balance = balance;
     stream.reset();
     stream.next();

     return stream;
 }

 // extend tokenizer with constants
 Object.keys(constants).forEach(function(key) {
     tokenize[key] = constants[key];
 });

 // extend tokenizer with static methods from utils
 Object.keys(charCodeDefinitions).forEach(function(key) {
     tokenize[key] = charCodeDefinitions[key];
 });
 Object.keys(utils).forEach(function(key) {
     tokenize[key] = utils[key];
 });

 module.exports = tokenize;
	var TokenStream = require('../common/TokenStream');
	var adoptBuffer = require('../common/adopt-buffer');

	var constants = require('./const');
	var TYPE = constants.TYPE;

	var charCodeDefinitions = require('./char-code-definitions');
	var isNewline = charCodeDefinitions.isNewline;
	var isName = charCodeDefinitions.isName;
	var isValidEscape = charCodeDefinitions.isValidEscape;
	var isNumberStart = charCodeDefinitions.isNumberStart;
	var isIdentifierStart = charCodeDefinitions.isIdentifierStart;
	var charCodeCategory = charCodeDefinitions.charCodeCategory;
	var isBOM = charCodeDefinitions.isBOM;

	var utils = require('./utils');
	var cmpStr = utils.cmpStr;
	var getNewlineLength = utils.getNewlineLength;
	var findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
	var consumeEscaped = utils.consumeEscaped;
	var consumeName = utils.consumeName;
	var consumeNumber = utils.consumeNumber;
	var consumeBadUrlRemnants = utils.consumeBadUrlRemnants;

	var OFFSET_MASK = 0x00FFFFFF;
	var TYPE_SHIFT = 24;

	function tokenize(source, stream) {
	function getCharCode(offset) {
	return offset < sourceLength ? source.charCodeAt(offset) : 0;
	}

	// § 4.3.3. Consume a numeric token
	function consumeNumericToken() {
	// Consume a number and let number be the result.
	offset = consumeNumber(source, offset);

	// If the next 3 input code points would start an identifier, then:
	if (isIdentifierStart(getCharCode(offset), getCharCode(offset + 1), getCharCode(offset + 2))) {
	// Create a <dimension-token> with the same value and type flag as number, and a unit set initially to the empty string.
	// Consume a name. Set the <dimension-token>’s unit to the returned value.
	// Return the <dimension-token>.
	type = TYPE.Dimension;
	offset = consumeName(source, offset);
	return;
	}

	// Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it.
	if (getCharCode(offset) === 0x0025) {
	// Create a <percentage-token> with the same value as number, and return it.
	type = TYPE.Percentage;
	offset++;
	return;
	}

	// Otherwise, create a <number-token> with the same value and type flag as number, and return it.
	type = TYPE.Number;
	}

	// § 4.3.4. Consume an ident-like token
	function consumeIdentLikeToken() {
	const nameStartOffset = offset;

	// Consume a name, and let string be the result.
	offset = consumeName(source, offset);

	// If string’s value is an ASCII case-insensitive match for "url",
	// and the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
	if (cmpStr(source, nameStartOffset, offset, 'url') && getCharCode(offset) === 0x0028) {
	// While the next two input code points are whitespace, consume the next input code point.
	offset = findWhiteSpaceEnd(source, offset + 1);

	// If the next one or two input code points are U+0022 QUOTATION MARK ("), U+0027 APOSTROPHE ('),
	// or whitespace followed by U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE ('),
	// then create a <function-token> with its value set to string and return it.
	if (getCharCode(offset) === 0x0022 \|\|
	getCharCode(offset) === 0x0027) {
	type = TYPE.Function;
	offset = nameStartOffset + 4;
	return;
	}

	// Otherwise, consume a url token, and return it.
	consumeUrlToken();
	return;
	}

	// Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
	// Create a <function-token> with its value set to string and return it.
	if (getCharCode(offset) === 0x0028) {
	type = TYPE.Function;
	offset++;
	return;
	}

	// Otherwise, create an <ident-token> with its value set to string and return it.
	type = TYPE.Ident;
	}

	// § 4.3.5. Consume a string token
	function consumeStringToken(endingCodePoint) {
	// This algorithm may be called with an ending code point, which denotes the code point
	// that ends the string. If an ending code point is not specified,
	// the current input code point is used.
	if (!endingCodePoint) {
	endingCodePoint = getCharCode(offset++);
	}

	// Initially create a <string-token> with its value set to the empty string.
	type = TYPE.String;

	// Repeatedly consume the next input code point from the stream:
	for (; offset < source.length; offset++) {
	var code = source.charCodeAt(offset);

	switch (charCodeCategory(code)) {
	// ending code point
	case endingCodePoint:
	// Return the <string-token>.
	offset++;
	return;

	// EOF
	case charCodeCategory.Eof:
	// This is a parse error. Return the <string-token>.
	return;

	// newline
	case charCodeCategory.WhiteSpace:
	if (isNewline(code)) {
	// This is a parse error. Reconsume the current input code point,
	// create a <bad-string-token>, and return it.
	offset += getNewlineLength(source, offset, code);
	type = TYPE.BadString;
	return;
	}
	break;

	// U+005C REVERSE SOLIDUS (\)
	case 0x005C:
	// If the next input code point is EOF, do nothing.
	if (offset === source.length - 1) {
	break;
	}

	var nextCode = getCharCode(offset + 1);

	// Otherwise, if the next input code point is a newline, consume it.
	if (isNewline(nextCode)) {
	offset += getNewlineLength(source, offset + 1, nextCode);
	} else if (isValidEscape(code, nextCode)) {
	// Otherwise, (the stream starts with a valid escape) consume
	// an escaped code point and append the returned code point to
	// the <string-token>’s value.
	offset = consumeEscaped(source, offset) - 1;
	}
	break;

	// anything else
	// Append the current input code point to the <string-token>’s value.
	}
	}
	}

	// § 4.3.6. Consume a url token
	// Note: This algorithm assumes that the initial "url(" has already been consumed.
	// This algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo).
	// A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like token
	// automatically handles this distinction; this algorithm shouldn’t be called directly otherwise.
	function consumeUrlToken() {
	// Initially create a <url-token> with its value set to the empty string.
	type = TYPE.Url;

	// Consume as much whitespace as possible.
	offset = findWhiteSpaceEnd(source, offset);

	// Repeatedly consume the next input code point from the stream:
	for (; offset < source.length; offset++) {
	var code = source.charCodeAt(offset);

	switch (charCodeCategory(code)) {
	// U+0029 RIGHT PARENTHESIS ())
	case 0x0029:
	// Return the <url-token>.
	offset++;
	return;

	// EOF
	case charCodeCategory.Eof:
	// This is a parse error. Return the <url-token>.
	return;

	// whitespace
	case charCodeCategory.WhiteSpace:
	// Consume as much whitespace as possible.
	offset = findWhiteSpaceEnd(source, offset);

	// If the next input code point is U+0029 RIGHT PARENTHESIS ()) or EOF,
	// consume it and return the <url-token>
	// (if EOF was encountered, this is a parse error);
	if (getCharCode(offset) === 0x0029 \|\| offset >= source.length) {
	if (offset < source.length) {
	offset++;
	}
	return;
	}

	// otherwise, consume the remnants of a bad url, create a <bad-url-token>,
	// and return it.
	offset = consumeBadUrlRemnants(source, offset);
	type = TYPE.BadUrl;
	return;

	// U+0022 QUOTATION MARK (")
	// U+0027 APOSTROPHE (')
	// U+0028 LEFT PARENTHESIS (()
	// non-printable code point
	case 0x0022:
	case 0x0027:
	case 0x0028:
	case charCodeCategory.NonPrintable:
	// This is a parse error. Consume the remnants of a bad url,
	// create a <bad-url-token>, and return it.
	offset = consumeBadUrlRemnants(source, offset);
	type = TYPE.BadUrl;
	return;

	// U+005C REVERSE SOLIDUS (\)
	case 0x005C:
	// If the stream starts with a valid escape, consume an escaped code point and
	// append the returned code point to the <url-token>’s value.
	if (isValidEscape(code, getCharCode(offset + 1))) {
	offset = consumeEscaped(source, offset) - 1;
	break;
	}

	// Otherwise, this is a parse error. Consume the remnants of a bad url,
	// create a <bad-url-token>, and return it.
	offset = consumeBadUrlRemnants(source, offset);
	type = TYPE.BadUrl;
	return;

	// anything else
	// Append the current input code point to the <url-token>’s value.
	}
	}
	}

	if (!stream) {
	stream = new TokenStream();
	}

	// ensure source is a string
	source = String(source \|\| '');

	var sourceLength = source.length;
	var offsetAndType = adoptBuffer(stream.offsetAndType, sourceLength + 1); // +1 because of eof-token
	var balance = adoptBuffer(stream.balance, sourceLength + 1);
	var tokenCount = 0;
	var start = isBOM(getCharCode(0));
	var offset = start;
	var balanceCloseType = 0;
	var balanceStart = 0;
	var balancePrev = 0;

	// https://drafts.csswg.org/css-syntax-3/#consume-token
	// § 4.3.1. Consume a token
	while (offset < sourceLength) {
	var code = source.charCodeAt(offset);
	var type = 0;

	balance[tokenCount] = sourceLength;

	switch (charCodeCategory(code)) {
	// whitespace
	case charCodeCategory.WhiteSpace:
	// Consume as much whitespace as possible. Return a <whitespace-token>.
	type = TYPE.WhiteSpace;
	offset = findWhiteSpaceEnd(source, offset + 1);
	break;

	// U+0022 QUOTATION MARK (")
	case 0x0022:
	// Consume a string token and return it.
	consumeStringToken();
	break;

	// U+0023 NUMBER SIGN (#)
	case 0x0023:
	// If the next input code point is a name code point or the next two input code points are a valid escape, then:
	if (isName(getCharCode(offset + 1)) \|\| isValidEscape(getCharCode(offset + 1), getCharCode(offset + 2))) {
	// Create a <hash-token>.
	type = TYPE.Hash;

	// If the next 3 input code points would start an identifier, set the <hash-token>’s type flag to "id".
	// if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
	// // TODO: set id flag
	// }

	// Consume a name, and set the <hash-token>’s value to the returned string.
	offset = consumeName(source, offset + 1);

	// Return the <hash-token>.
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = TYPE.Delim;
	offset++;
	}

	break;

	// U+0027 APOSTROPHE (')
	case 0x0027:
	// Consume a string token and return it.
	consumeStringToken();
	break;

	// U+0028 LEFT PARENTHESIS (()
	case 0x0028:
	// Return a <(-token>.
	type = TYPE.LeftParenthesis;
	offset++;
	break;

	// U+0029 RIGHT PARENTHESIS ())
	case 0x0029:
	// Return a <)-token>.
	type = TYPE.RightParenthesis;
	offset++;
	break;

	// U+002B PLUS SIGN (+)
	case 0x002B:
	// If the input stream starts with a number, ...
	if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	// ... reconsume the current input code point, consume a numeric token, and return it.
	consumeNumericToken();
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = TYPE.Delim;
	offset++;
	}
	break;

	// U+002C COMMA (,)
	case 0x002C:
	// Return a <comma-token>.
	type = TYPE.Comma;
	offset++;
	break;

	// U+002D HYPHEN-MINUS (-)
	case 0x002D:
	// If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it.
	if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	consumeNumericToken();
	} else {
	// Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E GREATER-THAN SIGN (->), consume them and return a <CDC-token>.
	if (getCharCode(offset + 1) === 0x002D &&
	getCharCode(offset + 2) === 0x003E) {
	type = TYPE.CDC;
	offset = offset + 3;
	} else {
	// Otherwise, if the input stream starts with an identifier, ...
	if (isIdentifierStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	// ... reconsume the current input code point, consume an ident-like token, and return it.
	consumeIdentLikeToken();
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = TYPE.Delim;
	offset++;
	}
	}
	}
	break;

	// U+002E FULL STOP (.)
	case 0x002E:
	// If the input stream starts with a number, ...
	if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	// ... reconsume the current input code point, consume a numeric token, and return it.
	consumeNumericToken();
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = TYPE.Delim;
	offset++;
	}

	break;

	// U+002F SOLIDUS (/)
	case 0x002F:
	// If the next two input code point are U+002F SOLIDUS (/) followed by a U+002A ASTERISK (*),
	if (getCharCode(offset + 1) === 0x002A) {
	// ... consume them and all following code points up to and including the first U+002A ASTERISK (*)
	// followed by a U+002F SOLIDUS (/), or up to an EOF code point.
	type = TYPE.Comment;
	offset = source.indexOf('*/', offset + 2) + 2;
	if (offset === 1) {
	offset = source.length;
	}
	} else {
	type = TYPE.Delim;
	offset++;
	}
	break;

	// U+003A COLON (:)
	case 0x003A:
	// Return a <colon-token>.
	type = TYPE.Colon;
	offset++;
	break;

	// U+003B SEMICOLON (;)
	case 0x003B:
	// Return a <semicolon-token>.
	type = TYPE.Semicolon;
	offset++;
	break;

	// U+003C LESS-THAN SIGN (<)
	case 0x003C:
	// If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), ...
	if (getCharCode(offset + 1) === 0x0021 &&
	getCharCode(offset + 2) === 0x002D &&
	getCharCode(offset + 3) === 0x002D) {
	// ... consume them and return a <CDO-token>.
	type = TYPE.CDO;
	offset = offset + 4;
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = TYPE.Delim;
	offset++;
	}

	break;

	// U+0040 COMMERCIAL AT (@)
	case 0x0040:
	// If the next 3 input code points would start an identifier, ...
	if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
	// ... consume a name, create an <at-keyword-token> with its value set to the returned value, and return it.
	type = TYPE.AtKeyword;
	offset = consumeName(source, offset + 1);
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = TYPE.Delim;
	offset++;
	}

	break;

	// U+005B LEFT SQUARE BRACKET ([)
	case 0x005B:
	// Return a <[-token>.
	type = TYPE.LeftSquareBracket;
	offset++;
	break;

	// U+005C REVERSE SOLIDUS (\)
	case 0x005C:
	// If the input stream starts with a valid escape, ...
	if (isValidEscape(code, getCharCode(offset + 1))) {
	// ... reconsume the current input code point, consume an ident-like token, and return it.
	consumeIdentLikeToken();
	} else {
	// Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point.
	type = TYPE.Delim;
	offset++;
	}
	break;

	// U+005D RIGHT SQUARE BRACKET (])
	case 0x005D:
	// Return a <]-token>.
	type = TYPE.RightSquareBracket;
	offset++;
	break;

	// U+007B LEFT CURLY BRACKET ({)
	case 0x007B:
	// Return a <{-token>.
	type = TYPE.LeftCurlyBracket;
	offset++;
	break;

	// U+007D RIGHT CURLY BRACKET (})
	case 0x007D:
	// Return a <}-token>.
	type = TYPE.RightCurlyBracket;
	offset++;
	break;

	// digit
	case charCodeCategory.Digit:
	// Reconsume the current input code point, consume a numeric token, and return it.
	consumeNumericToken();
	break;

	// name-start code point
	case charCodeCategory.NameStart:
	// Reconsume the current input code point, consume an ident-like token, and return it.
	consumeIdentLikeToken();
	break;

	// EOF
	case charCodeCategory.Eof:
	// Return an <EOF-token>.
	break;

	// anything else
	default:
	// Return a <delim-token> with its value set to the current input code point.
	type = TYPE.Delim;
	offset++;
	}

	switch (type) {
	case balanceCloseType:
	balancePrev = balanceStart & OFFSET_MASK;
	balanceStart = balance[balancePrev];
	balanceCloseType = balanceStart >> TYPE_SHIFT;
	balance[tokenCount] = balancePrev;
	balance[balancePrev++] = tokenCount;
	for (; balancePrev < tokenCount; balancePrev++) {
	if (balance[balancePrev] === sourceLength) {
	balance[balancePrev] = tokenCount;
	}
	}
	break;

	case TYPE.LeftParenthesis:
	case TYPE.Function:
	balance[tokenCount] = balanceStart;
	balanceCloseType = TYPE.RightParenthesis;
	balanceStart = (balanceCloseType << TYPE_SHIFT) \| tokenCount;
	break;

	case TYPE.LeftSquareBracket:
	balance[tokenCount] = balanceStart;
	balanceCloseType = TYPE.RightSquareBracket;
	balanceStart = (balanceCloseType << TYPE_SHIFT) \| tokenCount;
	break;

	case TYPE.LeftCurlyBracket:
	balance[tokenCount] = balanceStart;
	balanceCloseType = TYPE.RightCurlyBracket;
	balanceStart = (balanceCloseType << TYPE_SHIFT) \| tokenCount;
	break;
	}

	offsetAndType[tokenCount++] = (type << TYPE_SHIFT) \| offset;
	}

	// finalize buffers
	offsetAndType[tokenCount] = (TYPE.EOF << TYPE_SHIFT) \| offset; // <EOF-token>
	balance[tokenCount] = sourceLength;
	balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
	while (balanceStart !== 0) {
	balancePrev = balanceStart & OFFSET_MASK;
	balanceStart = balance[balancePrev];
	balance[balancePrev] = sourceLength;
	}

	// update stream
	stream.source = source;
	stream.firstCharOffset = start;
	stream.offsetAndType = offsetAndType;
	stream.tokenCount = tokenCount;
	stream.balance = balance;
	stream.reset();
	stream.next();

	return stream;
	}

	// extend tokenizer with constants
	Object.keys(constants).forEach(function(key) {
	tokenize[key] = constants[key];
	});

	// extend tokenizer with static methods from utils
	Object.keys(charCodeDefinitions).forEach(function(key) {
	tokenize[key] = charCodeDefinitions[key];
	});
	Object.keys(utils).forEach(function(key) {
	tokenize[key] = utils[key];
	});

	module.exports = tokenize;