node_modules/parse-entities/index.js - nifi-fds - Git at Google

 'use strict'

 var legacy = require('character-entities-legacy')
 var invalid = require('character-reference-invalid')
 var decimal = require('is-decimal')
 var hexadecimal = require('is-hexadecimal')
 var alphanumerical = require('is-alphanumerical')
 var decodeEntity = require('./decode-entity')

 module.exports = parseEntities

 var own = {}.hasOwnProperty
 var fromCharCode = String.fromCharCode
 var noop = Function.prototype

 // Default settings.
 var defaults = {
   warning: null,
   reference: null,
   text: null,
   warningContext: null,
   referenceContext: null,
   textContext: null,
   position: {},
   additional: null,
   attribute: false,
   nonTerminated: true
 }

 // Characters.
 var tab = 9 // '\t'
 var lineFeed = 10 // '\n'
 var formFeed = 12 //  '\f'
 var space = 32 // ' '
 var ampersand = 38 //  '&'
 var semicolon = 59 //  ';'
 var lessThan = 60 //  '<'
 var equalsTo = 61 //  '='
 var numberSign = 35 //  '#'
 var uppercaseX = 88 //  'X'
 var lowercaseX = 120 //  'x'
 var replacementCharacter = 65533 // '�'

 // Reference types.
 var name = 'named'
 var hexa = 'hexadecimal'
 var deci = 'decimal'

 // Map of bases.
 var bases = {}

 bases[hexa] = 16
 bases[deci] = 10

 // Map of types to tests.
 // Each type of character reference accepts different characters.
 // This test is used to detect whether a reference has ended (as the semicolon
 // is not strictly needed).
 var tests = {}

 tests[name] = alphanumerical
 tests[deci] = decimal
 tests[hexa] = hexadecimal

 // Warning types.
 var namedNotTerminated = 1
 var numericNotTerminated = 2
 var namedEmpty = 3
 var numericEmpty = 4
 var namedUnknown = 5
 var numericDisallowed = 6
 var numericProhibited = 7

 // Warning messages.
 var messages = {}

 messages[namedNotTerminated] =
   'Named character references must be terminated by a semicolon'
 messages[numericNotTerminated] =
   'Numeric character references must be terminated by a semicolon'
 messages[namedEmpty] = 'Named character references cannot be empty'
 messages[numericEmpty] = 'Numeric character references cannot be empty'
 messages[namedUnknown] = 'Named character references must be known'
 messages[numericDisallowed] =
   'Numeric character references cannot be disallowed'
 messages[numericProhibited] =
   'Numeric character references cannot be outside the permissible Unicode range'

 // Wrap to ensure clean parameters are given to `parse`.
 function parseEntities(value, options) {
   var settings = {}
   var option
   var key

   if (!options) {
     options = {}
   }

   for (key in defaults) {
     option = options[key]
     settings[key] =
       option === null || option === undefined ? defaults[key] : option
   }

   if (settings.position.indent || settings.position.start) {
     settings.indent = settings.position.indent || []
     settings.position = settings.position.start
   }

   return parse(value, settings)
 }

 // Parse entities.
 // eslint-disable-next-line complexity
 function parse(value, settings) {
   var additional = settings.additional
   var nonTerminated = settings.nonTerminated
   var handleText = settings.text
   var handleReference = settings.reference
   var handleWarning = settings.warning
   var textContext = settings.textContext
   var referenceContext = settings.referenceContext
   var warningContext = settings.warningContext
   var pos = settings.position
   var indent = settings.indent || []
   var length = value.length
   var index = 0
   var lines = -1
   var column = pos.column || 1
   var line = pos.line || 1
   var queue = ''
   var result = []
   var entityCharacters
   var namedEntity
   var terminated
   var characters
   var character
   var reference
   var following
   var warning
   var reason
   var output
   var entity
   var begin
   var start
   var type
   var test
   var prev
   var next
   var diff
   var end

   if (typeof additional === 'string') {
     additional = additional.charCodeAt(0)
   }

   // Cache the current point.
   prev = now()

   // Wrap `handleWarning`.
   warning = handleWarning ? parseError : noop

   // Ensure the algorithm walks over the first character and the end (inclusive).
   index--
   length++

   while (++index < length) {
     // If the previous character was a newline.
     if (character === lineFeed) {
       column = indent[lines] || 1
     }

     character = value.charCodeAt(index)

     if (character === ampersand) {
       following = value.charCodeAt(index + 1)

       // The behaviour depends on the identity of the next character.
       if (
         following === tab ||
         following === lineFeed ||
         following === formFeed ||
         following === space ||
         following === ampersand ||
         following === lessThan ||
         following !== following ||
         (additional && following === additional)
       ) {
         // Not a character reference.
         // No characters are consumed, and nothing is returned.
         // This is not an error, either.
         queue += fromCharCode(character)
         column++

         continue
       }

       start = index + 1
       begin = start
       end = start

       if (following === numberSign) {
         // Numerical entity.
         end = ++begin

         // The behaviour further depends on the next character.
         following = value.charCodeAt(end)

         if (following === uppercaseX || following === lowercaseX) {
           // ASCII hex digits.
           type = hexa
           end = ++begin
         } else {
           // ASCII digits.
           type = deci
         }
       } else {
         // Named entity.
         type = name
       }

       entityCharacters = ''
       entity = ''
       characters = ''
       test = tests[type]
       end--

       while (++end < length) {
         following = value.charCodeAt(end)

         if (!test(following)) {
           break
         }

         characters += fromCharCode(following)

         // Check if we can match a legacy named reference.
         // If so, we cache that as the last viable named reference.
         // This ensures we do not need to walk backwards later.
         if (type === name && own.call(legacy, characters)) {
           entityCharacters = characters
           entity = legacy[characters]
         }
       }

       terminated = value.charCodeAt(end) === semicolon

       if (terminated) {
         end++

         namedEntity = type === name ? decodeEntity(characters) : false

         if (namedEntity) {
           entityCharacters = characters
           entity = namedEntity
         }
       }

       diff = 1 + end - start

       if (!terminated && !nonTerminated) {
         // Empty.
       } else if (!characters) {
         // An empty (possible) entity is valid, unless it’s numeric (thus an
         // ampersand followed by an octothorp).
         if (type !== name) {
           warning(numericEmpty, diff)
         }
       } else if (type === name) {
         // An ampersand followed by anything unknown, and not terminated, is
         // invalid.
         if (terminated && !entity) {
           warning(namedUnknown, 1)
         } else {
           // If theres something after an entity name which is not known, cap
           // the reference.
           if (entityCharacters !== characters) {
             end = begin + entityCharacters.length
             diff = 1 + end - begin
             terminated = false
           }

           // If the reference is not terminated, warn.
           if (!terminated) {
             reason = entityCharacters ? namedNotTerminated : namedEmpty

             if (settings.attribute) {
               following = value.charCodeAt(end)

               if (following === equalsTo) {
                 warning(reason, diff)
                 entity = null
               } else if (alphanumerical(following)) {
                 entity = null
               } else {
                 warning(reason, diff)
               }
             } else {
               warning(reason, diff)
             }
           }
         }

         reference = entity
       } else {
         if (!terminated) {
           // All non-terminated numeric entities are not rendered, and trigger a
           // warning.
           warning(numericNotTerminated, diff)
         }

         // When terminated and number, parse as either hexadecimal or decimal.
         reference = parseInt(characters, bases[type])

         // Trigger a warning when the parsed number is prohibited, and replace
         // with replacement character.
         if (prohibited(reference)) {
           warning(numericProhibited, diff)
           reference = fromCharCode(replacementCharacter)
         } else if (reference in invalid) {
           // Trigger a warning when the parsed number is disallowed, and replace
           // by an alternative.
           warning(numericDisallowed, diff)
           reference = invalid[reference]
         } else {
           // Parse the number.
           output = ''

           // Trigger a warning when the parsed number should not be used.
           if (disallowed(reference)) {
             warning(numericDisallowed, diff)
           }

           // Stringify the number.
           if (reference > 0xffff) {
             reference -= 0x10000
             output += fromCharCode((reference >>> (10 & 0x3ff)) | 0xd800)
             reference = 0xdc00 | (reference & 0x3ff)
           }

           reference = output + fromCharCode(reference)
         }
       }

       // Found it!
       // First eat the queued characters as normal text, then eat an entity.
       if (reference) {
         flush()

         prev = now()
         index = end - 1
         column += end - start + 1
         result.push(reference)
         next = now()
         next.offset++

         if (handleReference) {
           handleReference.call(
             referenceContext,
             reference,
             {start: prev, end: next},
             value.slice(start - 1, end)
           )
         }

         prev = next
       } else {
         // If we could not find a reference, queue the checked characters (as
         // normal characters), and move the pointer to their end.
         // This is possible because we can be certain neither newlines nor
         // ampersands are included.
         characters = value.slice(start - 1, end)
         queue += characters
         column += characters.length
         index = end - 1
       }
     } else {
       // Handle anything other than an ampersand, including newlines and EOF.
       if (
         character === 10 // Line feed
       ) {
         line++
         lines++
         column = 0
       }

       if (character === character) {
         queue += fromCharCode(character)
         column++
       } else {
         flush()
       }
     }
   }

   // Return the reduced nodes, and any possible warnings.
   return result.join('')

   // Get current position.
   function now() {
     return {
       line: line,
       column: column,
       offset: index + (pos.offset || 0)
     }
   }

   // “Throw” a parse-error: a warning.
   function parseError(code, offset) {
     var position = now()

     position.column += offset
     position.offset += offset

     handleWarning.call(warningContext, messages[code], position, code)
   }

   // Flush `queue` (normal text).
   // Macro invoked before each entity and at the end of `value`.
   // Does nothing when `queue` is empty.
   function flush() {
     if (queue) {
       result.push(queue)

       if (handleText) {
         handleText.call(textContext, queue, {start: prev, end: now()})
       }

       queue = ''
     }
   }
 }

 // Check if `character` is outside the permissible unicode range.
 function prohibited(code) {
   return (code >= 0xd800 && code <= 0xdfff) || code > 0x10ffff
 }

 // Check if `character` is disallowed.
 function disallowed(code) {
   return (
     (code >= 0x0001 && code <= 0x0008) ||
     code === 0x000b ||
     (code >= 0x000d && code <= 0x001f) ||
     (code >= 0x007f && code <= 0x009f) ||
     (code >= 0xfdd0 && code <= 0xfdef) ||
     (code & 0xffff) === 0xffff ||
     (code & 0xffff) === 0xfffe
   )
 }
	'use strict'

	var legacy = require('character-entities-legacy')
	var invalid = require('character-reference-invalid')
	var decimal = require('is-decimal')
	var hexadecimal = require('is-hexadecimal')
	var alphanumerical = require('is-alphanumerical')
	var decodeEntity = require('./decode-entity')

	module.exports = parseEntities

	var own = {}.hasOwnProperty
	var fromCharCode = String.fromCharCode
	var noop = Function.prototype

	// Default settings.
	var defaults = {
	warning: null,
	reference: null,
	text: null,
	warningContext: null,
	referenceContext: null,
	textContext: null,
	position: {},
	additional: null,
	attribute: false,
	nonTerminated: true
	}

	// Characters.
	var tab = 9 // '\t'
	var lineFeed = 10 // '\n'
	var formFeed = 12 // '\f'
	var space = 32 // ' '
	var ampersand = 38 // '&'
	var semicolon = 59 // ';'
	var lessThan = 60 // '<'
	var equalsTo = 61 // '='
	var numberSign = 35 // '#'
	var uppercaseX = 88 // 'X'
	var lowercaseX = 120 // 'x'
	var replacementCharacter = 65533 // '�'

	// Reference types.
	var name = 'named'
	var hexa = 'hexadecimal'
	var deci = 'decimal'

	// Map of bases.
	var bases = {}

	bases[hexa] = 16
	bases[deci] = 10

	// Map of types to tests.
	// Each type of character reference accepts different characters.
	// This test is used to detect whether a reference has ended (as the semicolon
	// is not strictly needed).
	var tests = {}

	tests[name] = alphanumerical
	tests[deci] = decimal
	tests[hexa] = hexadecimal

	// Warning types.
	var namedNotTerminated = 1
	var numericNotTerminated = 2
	var namedEmpty = 3
	var numericEmpty = 4
	var namedUnknown = 5
	var numericDisallowed = 6
	var numericProhibited = 7

	// Warning messages.
	var messages = {}

	messages[namedNotTerminated] =
	'Named character references must be terminated by a semicolon'
	messages[numericNotTerminated] =
	'Numeric character references must be terminated by a semicolon'
	messages[namedEmpty] = 'Named character references cannot be empty'
	messages[numericEmpty] = 'Numeric character references cannot be empty'
	messages[namedUnknown] = 'Named character references must be known'
	messages[numericDisallowed] =
	'Numeric character references cannot be disallowed'
	messages[numericProhibited] =
	'Numeric character references cannot be outside the permissible Unicode range'

	// Wrap to ensure clean parameters are given to `parse`.
	function parseEntities(value, options) {
	var settings = {}
	var option
	var key

	if (!options) {
	options = {}
	}

	for (key in defaults) {
	option = options[key]
	settings[key] =
	option === null \|\| option === undefined ? defaults[key] : option
	}

	if (settings.position.indent \|\| settings.position.start) {
	settings.indent = settings.position.indent \|\| []
	settings.position = settings.position.start
	}

	return parse(value, settings)
	}

	// Parse entities.
	// eslint-disable-next-line complexity
	function parse(value, settings) {
	var additional = settings.additional
	var nonTerminated = settings.nonTerminated
	var handleText = settings.text
	var handleReference = settings.reference
	var handleWarning = settings.warning
	var textContext = settings.textContext
	var referenceContext = settings.referenceContext
	var warningContext = settings.warningContext
	var pos = settings.position
	var indent = settings.indent \|\| []
	var length = value.length
	var index = 0
	var lines = -1
	var column = pos.column \|\| 1
	var line = pos.line \|\| 1
	var queue = ''
	var result = []
	var entityCharacters
	var namedEntity
	var terminated
	var characters
	var character
	var reference
	var following
	var warning
	var reason
	var output
	var entity
	var begin
	var start
	var type
	var test
	var prev
	var next
	var diff
	var end

	if (typeof additional === 'string') {
	additional = additional.charCodeAt(0)
	}

	// Cache the current point.
	prev = now()

	// Wrap `handleWarning`.
	warning = handleWarning ? parseError : noop

	// Ensure the algorithm walks over the first character and the end (inclusive).
	index--
	length++

	while (++index < length) {
	// If the previous character was a newline.
	if (character === lineFeed) {
	column = indent[lines] \|\| 1
	}

	character = value.charCodeAt(index)

	if (character === ampersand) {
	following = value.charCodeAt(index + 1)

	// The behaviour depends on the identity of the next character.
	if (
	following === tab \|\|
	following === lineFeed \|\|
	following === formFeed \|\|
	following === space \|\|
	following === ampersand \|\|
	following === lessThan \|\|
	following !== following \|\|
	(additional && following === additional)
	) {
	// Not a character reference.
	// No characters are consumed, and nothing is returned.
	// This is not an error, either.
	queue += fromCharCode(character)
	column++

	continue
	}

	start = index + 1
	begin = start
	end = start

	if (following === numberSign) {
	// Numerical entity.
	end = ++begin

	// The behaviour further depends on the next character.
	following = value.charCodeAt(end)

	if (following === uppercaseX \|\| following === lowercaseX) {
	// ASCII hex digits.
	type = hexa
	end = ++begin
	} else {
	// ASCII digits.
	type = deci
	}
	} else {
	// Named entity.
	type = name
	}

	entityCharacters = ''
	entity = ''
	characters = ''
	test = tests[type]
	end--

	while (++end < length) {
	following = value.charCodeAt(end)

	if (!test(following)) {
	break
	}

	characters += fromCharCode(following)

	// Check if we can match a legacy named reference.
	// If so, we cache that as the last viable named reference.
	// This ensures we do not need to walk backwards later.
	if (type === name && own.call(legacy, characters)) {
	entityCharacters = characters
	entity = legacy[characters]
	}
	}

	terminated = value.charCodeAt(end) === semicolon

	if (terminated) {
	end++

	namedEntity = type === name ? decodeEntity(characters) : false

	if (namedEntity) {
	entityCharacters = characters
	entity = namedEntity
	}
	}

	diff = 1 + end - start

	if (!terminated && !nonTerminated) {
	// Empty.
	} else if (!characters) {
	// An empty (possible) entity is valid, unless it’s numeric (thus an
	// ampersand followed by an octothorp).
	if (type !== name) {
	warning(numericEmpty, diff)
	}
	} else if (type === name) {
	// An ampersand followed by anything unknown, and not terminated, is
	// invalid.
	if (terminated && !entity) {
	warning(namedUnknown, 1)
	} else {
	// If theres something after an entity name which is not known, cap
	// the reference.
	if (entityCharacters !== characters) {
	end = begin + entityCharacters.length
	diff = 1 + end - begin
	terminated = false
	}

	// If the reference is not terminated, warn.
	if (!terminated) {
	reason = entityCharacters ? namedNotTerminated : namedEmpty

	if (settings.attribute) {
	following = value.charCodeAt(end)

	if (following === equalsTo) {
	warning(reason, diff)
	entity = null
	} else if (alphanumerical(following)) {
	entity = null
	} else {
	warning(reason, diff)
	}
	} else {
	warning(reason, diff)
	}
	}
	}

	reference = entity
	} else {
	if (!terminated) {
	// All non-terminated numeric entities are not rendered, and trigger a
	// warning.
	warning(numericNotTerminated, diff)
	}

	// When terminated and number, parse as either hexadecimal or decimal.
	reference = parseInt(characters, bases[type])

	// Trigger a warning when the parsed number is prohibited, and replace
	// with replacement character.
	if (prohibited(reference)) {
	warning(numericProhibited, diff)
	reference = fromCharCode(replacementCharacter)
	} else if (reference in invalid) {
	// Trigger a warning when the parsed number is disallowed, and replace
	// by an alternative.
	warning(numericDisallowed, diff)
	reference = invalid[reference]
	} else {
	// Parse the number.
	output = ''

	// Trigger a warning when the parsed number should not be used.
	if (disallowed(reference)) {
	warning(numericDisallowed, diff)
	}

	// Stringify the number.
	if (reference > 0xffff) {
	reference -= 0x10000
	output += fromCharCode((reference >>> (10 & 0x3ff)) \| 0xd800)
	reference = 0xdc00 \| (reference & 0x3ff)
	}

	reference = output + fromCharCode(reference)
	}
	}

	// Found it!
	// First eat the queued characters as normal text, then eat an entity.
	if (reference) {
	flush()

	prev = now()
	index = end - 1
	column += end - start + 1
	result.push(reference)
	next = now()
	next.offset++

	if (handleReference) {
	handleReference.call(
	referenceContext,
	reference,
	{start: prev, end: next},
	value.slice(start - 1, end)
	)
	}

	prev = next
	} else {
	// If we could not find a reference, queue the checked characters (as
	// normal characters), and move the pointer to their end.
	// This is possible because we can be certain neither newlines nor
	// ampersands are included.
	characters = value.slice(start - 1, end)
	queue += characters
	column += characters.length
	index = end - 1
	}
	} else {
	// Handle anything other than an ampersand, including newlines and EOF.
	if (
	character === 10 // Line feed
	) {
	line++
	lines++
	column = 0
	}

	if (character === character) {
	queue += fromCharCode(character)
	column++
	} else {
	flush()
	}
	}
	}

	// Return the reduced nodes, and any possible warnings.
	return result.join('')

	// Get current position.
	function now() {
	return {
	line: line,
	column: column,
	offset: index + (pos.offset \|\| 0)
	}
	}

	// “Throw” a parse-error: a warning.
	function parseError(code, offset) {
	var position = now()

	position.column += offset
	position.offset += offset

	handleWarning.call(warningContext, messages[code], position, code)
	}

	// Flush `queue` (normal text).
	// Macro invoked before each entity and at the end of `value`.
	// Does nothing when `queue` is empty.
	function flush() {
	if (queue) {
	result.push(queue)

	if (handleText) {
	handleText.call(textContext, queue, {start: prev, end: now()})
	}

	queue = ''
	}
	}
	}

	// Check if `character` is outside the permissible unicode range.
	function prohibited(code) {
	return (code >= 0xd800 && code <= 0xdfff) \|\| code > 0x10ffff
	}

	// Check if `character` is disallowed.
	function disallowed(code) {
	return (
	(code >= 0x0001 && code <= 0x0008) \|\|
	code === 0x000b \|\|
	(code >= 0x000d && code <= 0x001f) \|\|
	(code >= 0x007f && code <= 0x009f) \|\|
	(code >= 0xfdd0 && code <= 0xfdef) \|\|
	(code & 0xffff) === 0xffff \|\|
	(code & 0xffff) === 0xfffe
	)
	}