env2/lib/python2.7/site-packages/pip/_vendor/webencodings/__init__.py - incubator-flagon-tap - Git at Google

 # coding: utf8
 """

     webencodings
     ~~~~~~~~~~~~

     This is a Python implementation of the `WHATWG Encoding standard
     <http://encoding.spec.whatwg.org/>`. See README for details.

     :copyright: Copyright 2012 by Simon Sapin
     :license: BSD, see LICENSE for details.

 """

 from __future__ import unicode_literals

 import codecs

 from .labels import LABELS


 VERSION = '0.5'


 # Some names in Encoding are not valid Python aliases. Remap these.
 PYTHON_NAMES = {
     'iso-8859-8-i': 'iso-8859-8',
     'x-mac-cyrillic': 'mac-cyrillic',
     'macintosh': 'mac-roman',
     'windows-874': 'cp874'}

 CACHE = {}


 def ascii_lower(string):
     r"""Transform (only) ASCII letters to lower case: A-Z is mapped to a-z.

     :param string: An Unicode string.
     :returns: A new Unicode string.

     This is used for `ASCII case-insensitive
     <http://encoding.spec.whatwg.org/#ascii-case-insensitive>`_
     matching of encoding labels.
     The same matching is also used, among other things,
     for `CSS keywords <http://dev.w3.org/csswg/css-values/#keywords>`_.

     This is different from the :meth:`~py:str.lower` method of Unicode strings
     which also affect non-ASCII characters,
     sometimes mapping them into the ASCII range:

         >>> keyword = u'Bac\N{KELVIN SIGN}ground'
         >>> assert keyword.lower() == u'background'
         >>> assert ascii_lower(keyword) != keyword.lower()
         >>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground'

     """
     # This turns out to be faster than unicode.translate()
     return string.encode('utf8').lower().decode('utf8')


 def lookup(label):
     """
     Look for an encoding by its label.
     This is the spec’s `get an encoding
     <http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm.
     Supported labels are listed there.

     :param label: A string.
     :returns:
         An :class:`Encoding` object, or :obj:`None` for an unknown label.

     """
     # Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020.
     label = ascii_lower(label.strip('\t\n\f\r '))
     name = LABELS.get(label)
     if name is None:
         return None
     encoding = CACHE.get(name)
     if encoding is None:
         if name == 'x-user-defined':
             from .x_user_defined import codec_info
         else:
             python_name = PYTHON_NAMES.get(name, name)
             # Any python_name value that gets to here should be valid.
             codec_info = codecs.lookup(python_name)
         encoding = Encoding(name, codec_info)
         CACHE[name] = encoding
     return encoding


 def _get_encoding(encoding_or_label):
     """
     Accept either an encoding object or label.

     :param encoding: An :class:`Encoding` object or a label string.
     :returns: An :class:`Encoding` object.
     :raises: :exc:`~exceptions.LookupError` for an unknown label.

     """
     if hasattr(encoding_or_label, 'codec_info'):
         return encoding_or_label

     encoding = lookup(encoding_or_label)
     if encoding is None:
         raise LookupError('Unknown encoding label: %r' % encoding_or_label)
     return encoding


 class Encoding(object):
     """Reresents a character encoding such as UTF-8,
     that can be used for decoding or encoding.

     .. attribute:: name

         Canonical name of the encoding

     .. attribute:: codec_info

         The actual implementation of the encoding,
         a stdlib :class:`~codecs.CodecInfo` object.
         See :func:`codecs.register`.

     """
     def __init__(self, name, codec_info):
         self.name = name
         self.codec_info = codec_info

     def __repr__(self):
         return '<Encoding %s>' % self.name


 #: The UTF-8 encoding. Should be used for new content and formats.
 UTF8 = lookup('utf-8')

 _UTF16LE = lookup('utf-16le')
 _UTF16BE = lookup('utf-16be')


 def decode(input, fallback_encoding, errors='replace'):
     """
     Decode a single string.

     :param input: A byte string
     :param fallback_encoding:
         An :class:`Encoding` object or a label string.
         The encoding to use if :obj:`input` does note have a BOM.
     :param errors: Type of error handling. See :func:`codecs.register`.
     :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
     :return:
         A ``(output, encoding)`` tuple of an Unicode string
         and an :obj:`Encoding`.

     """
     # Fail early if `encoding` is an invalid label.
     fallback_encoding = _get_encoding(fallback_encoding)
     bom_encoding, input = _detect_bom(input)
     encoding = bom_encoding or fallback_encoding
     return encoding.codec_info.decode(input, errors)[0], encoding


 def _detect_bom(input):
     """Return (bom_encoding, input), with any BOM removed from the input."""
     if input.startswith(b'\xFF\xFE'):
         return _UTF16LE, input[2:]
     if input.startswith(b'\xFE\xFF'):
         return _UTF16BE, input[2:]
     if input.startswith(b'\xEF\xBB\xBF'):
         return UTF8, input[3:]
     return None, input


 def encode(input, encoding=UTF8, errors='strict'):
     """
     Encode a single string.

     :param input: An Unicode string.
     :param encoding: An :class:`Encoding` object or a label string.
     :param errors: Type of error handling. See :func:`codecs.register`.
     :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
     :return: A byte string.

     """
     return _get_encoding(encoding).codec_info.encode(input, errors)[0]


 def iter_decode(input, fallback_encoding, errors='replace'):
     """
     "Pull"-based decoder.

     :param input:
         An iterable of byte strings.

         The input is first consumed just enough to determine the encoding
         based on the precense of a BOM,
         then consumed on demand when the return value is.
     :param fallback_encoding:
         An :class:`Encoding` object or a label string.
         The encoding to use if :obj:`input` does note have a BOM.
     :param errors: Type of error handling. See :func:`codecs.register`.
     :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
     :returns:
         An ``(output, encoding)`` tuple.
         :obj:`output` is an iterable of Unicode strings,
         :obj:`encoding` is the :obj:`Encoding` that is being used.

     """

     decoder = IncrementalDecoder(fallback_encoding, errors)
     generator = _iter_decode_generator(input, decoder)
     encoding = next(generator)
     return generator, encoding


 def _iter_decode_generator(input, decoder):
     """Return a generator that first yields the :obj:`Encoding`,
     then yields output chukns as Unicode strings.

     """
     decode = decoder.decode
     input = iter(input)
     for chunck in input:
         output = decode(chunck)
         if output:
             assert decoder.encoding is not None
             yield decoder.encoding
             yield output
             break
     else:
         # Input exhausted without determining the encoding
         output = decode(b'', final=True)
         assert decoder.encoding is not None
         yield decoder.encoding
         if output:
             yield output
         return

     for chunck in input:
         output = decode(chunck)
         if output:
             yield output
     output = decode(b'', final=True)
     if output:
         yield output


 def iter_encode(input, encoding=UTF8, errors='strict'):
     """
     “Pull”-based encoder.

     :param input: An iterable of Unicode strings.
     :param encoding: An :class:`Encoding` object or a label string.
     :param errors: Type of error handling. See :func:`codecs.register`.
     :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
     :returns: An iterable of byte strings.

     """
     # Fail early if `encoding` is an invalid label.
     encode = IncrementalEncoder(encoding, errors).encode
     return _iter_encode_generator(input, encode)


 def _iter_encode_generator(input, encode):
     for chunck in input:
         output = encode(chunck)
         if output:
             yield output
     output = encode('', final=True)
     if output:
         yield output


 class IncrementalDecoder(object):
     """
     “Push”-based decoder.

     :param fallback_encoding:
         An :class:`Encoding` object or a label string.
         The encoding to use if :obj:`input` does note have a BOM.
     :param errors: Type of error handling. See :func:`codecs.register`.
     :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.

     """
     def __init__(self, fallback_encoding, errors='replace'):
         # Fail early if `encoding` is an invalid label.
         self._fallback_encoding = _get_encoding(fallback_encoding)
         self._errors = errors
         self._buffer = b''
         self._decoder = None
         #: The actual :class:`Encoding` that is being used,
         #: or :obj:`None` if that is not determined yet.
         #: (Ie. if there is not enough input yet to determine
         #: if there is a BOM.)
         self.encoding = None  # Not known yet.

     def decode(self, input, final=False):
         """Decode one chunk of the input.

         :param input: A byte string.
         :param final:
             Indicate that no more input is available.
             Must be :obj:`True` if this is the last call.
         :returns: An Unicode string.

         """
         decoder = self._decoder
         if decoder is not None:
             return decoder(input, final)

         input = self._buffer + input
         encoding, input = _detect_bom(input)
         if encoding is None:
             if len(input) < 3 and not final:  # Not enough data yet.
                 self._buffer = input
                 return ''
             else:  # No BOM
                 encoding = self._fallback_encoding
         decoder = encoding.codec_info.incrementaldecoder(self._errors).decode
         self._decoder = decoder
         self.encoding = encoding
         return decoder(input, final)


 class IncrementalEncoder(object):
     """
     “Push”-based encoder.

     :param encoding: An :class:`Encoding` object or a label string.
     :param errors: Type of error handling. See :func:`codecs.register`.
     :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.

     .. method:: encode(input, final=False)

         :param input: An Unicode string.
         :param final:
             Indicate that no more input is available.
             Must be :obj:`True` if this is the last call.
         :returns: A byte string.

     """
     def __init__(self, encoding=UTF8, errors='strict'):
         encoding = _get_encoding(encoding)
         self.encode = encoding.codec_info.incrementalencoder(errors).encode
	# coding: utf8
	"""

	webencodings
	~~~~~~~~~~~~

	This is a Python implementation of the `WHATWG Encoding standard
	<http://encoding.spec.whatwg.org/>`. See README for details.

	:copyright: Copyright 2012 by Simon Sapin
	:license: BSD, see LICENSE for details.

	"""

	from __future__ import unicode_literals

	import codecs

	from .labels import LABELS


	VERSION = '0.5'


	# Some names in Encoding are not valid Python aliases. Remap these.
	PYTHON_NAMES = {
	'iso-8859-8-i': 'iso-8859-8',
	'x-mac-cyrillic': 'mac-cyrillic',
	'macintosh': 'mac-roman',
	'windows-874': 'cp874'}

	CACHE = {}


	def ascii_lower(string):
	r"""Transform (only) ASCII letters to lower case: A-Z is mapped to a-z.

	:param string: An Unicode string.
	:returns: A new Unicode string.

	This is used for `ASCII case-insensitive
	<http://encoding.spec.whatwg.org/#ascii-case-insensitive>`_
	matching of encoding labels.
	The same matching is also used, among other things,
	for `CSS keywords <http://dev.w3.org/csswg/css-values/#keywords>`_.

	This is different from the :meth:`~py:str.lower` method of Unicode strings
	which also affect non-ASCII characters,
	sometimes mapping them into the ASCII range:

	>>> keyword = u'Bac\N{KELVIN SIGN}ground'
	>>> assert keyword.lower() == u'background'
	>>> assert ascii_lower(keyword) != keyword.lower()
	>>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground'

	"""
	# This turns out to be faster than unicode.translate()
	return string.encode('utf8').lower().decode('utf8')


	def lookup(label):
	"""
	Look for an encoding by its label.
	This is the spec’s `get an encoding
	<http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm.
	Supported labels are listed there.

	:param label: A string.
	:returns:
	An :class:`Encoding` object, or :obj:`None` for an unknown label.

	"""
	# Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020.
	label = ascii_lower(label.strip('\t\n\f\r '))
	name = LABELS.get(label)
	if name is None:
	return None
	encoding = CACHE.get(name)
	if encoding is None:
	if name == 'x-user-defined':
	from .x_user_defined import codec_info
	else:
	python_name = PYTHON_NAMES.get(name, name)
	# Any python_name value that gets to here should be valid.
	codec_info = codecs.lookup(python_name)
	encoding = Encoding(name, codec_info)
	CACHE[name] = encoding
	return encoding


	def _get_encoding(encoding_or_label):
	"""
	Accept either an encoding object or label.

	:param encoding: An :class:`Encoding` object or a label string.
	:returns: An :class:`Encoding` object.
	:raises: :exc:`~exceptions.LookupError` for an unknown label.

	"""
	if hasattr(encoding_or_label, 'codec_info'):
	return encoding_or_label

	encoding = lookup(encoding_or_label)
	if encoding is None:
	raise LookupError('Unknown encoding label: %r' % encoding_or_label)
	return encoding


	class Encoding(object):
	"""Reresents a character encoding such as UTF-8,
	that can be used for decoding or encoding.

	.. attribute:: name

	Canonical name of the encoding

	.. attribute:: codec_info

	The actual implementation of the encoding,
	a stdlib :class:`~codecs.CodecInfo` object.
	See :func:`codecs.register`.

	"""
	def __init__(self, name, codec_info):
	self.name = name
	self.codec_info = codec_info

	def __repr__(self):
	return '<Encoding %s>' % self.name


	#: The UTF-8 encoding. Should be used for new content and formats.
	UTF8 = lookup('utf-8')

	_UTF16LE = lookup('utf-16le')
	_UTF16BE = lookup('utf-16be')


	def decode(input, fallback_encoding, errors='replace'):
	"""
	Decode a single string.

	:param input: A byte string
	:param fallback_encoding:
	An :class:`Encoding` object or a label string.
	The encoding to use if :obj:`input` does note have a BOM.
	:param errors: Type of error handling. See :func:`codecs.register`.
	:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
	:return:
	A ``(output, encoding)`` tuple of an Unicode string
	and an :obj:`Encoding`.

	"""
	# Fail early if `encoding` is an invalid label.
	fallback_encoding = _get_encoding(fallback_encoding)
	bom_encoding, input = _detect_bom(input)
	encoding = bom_encoding or fallback_encoding
	return encoding.codec_info.decode(input, errors)[0], encoding


	def _detect_bom(input):
	"""Return (bom_encoding, input), with any BOM removed from the input."""
	if input.startswith(b'\xFF\xFE'):
	return _UTF16LE, input[2:]
	if input.startswith(b'\xFE\xFF'):
	return _UTF16BE, input[2:]
	if input.startswith(b'\xEF\xBB\xBF'):
	return UTF8, input[3:]
	return None, input


	def encode(input, encoding=UTF8, errors='strict'):
	"""
	Encode a single string.

	:param input: An Unicode string.
	:param encoding: An :class:`Encoding` object or a label string.
	:param errors: Type of error handling. See :func:`codecs.register`.
	:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
	:return: A byte string.

	"""
	return _get_encoding(encoding).codec_info.encode(input, errors)[0]


	def iter_decode(input, fallback_encoding, errors='replace'):
	"""
	"Pull"-based decoder.

	:param input:
	An iterable of byte strings.

	The input is first consumed just enough to determine the encoding
	based on the precense of a BOM,
	then consumed on demand when the return value is.
	:param fallback_encoding:
	An :class:`Encoding` object or a label string.
	The encoding to use if :obj:`input` does note have a BOM.
	:param errors: Type of error handling. See :func:`codecs.register`.
	:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
	:returns:
	An ``(output, encoding)`` tuple.
	:obj:`output` is an iterable of Unicode strings,
	:obj:`encoding` is the :obj:`Encoding` that is being used.

	"""

	decoder = IncrementalDecoder(fallback_encoding, errors)
	generator = _iter_decode_generator(input, decoder)
	encoding = next(generator)
	return generator, encoding


	def _iter_decode_generator(input, decoder):
	"""Return a generator that first yields the :obj:`Encoding`,
	then yields output chukns as Unicode strings.

	"""
	decode = decoder.decode
	input = iter(input)
	for chunck in input:
	output = decode(chunck)
	if output:
	assert decoder.encoding is not None
	yield decoder.encoding
	yield output
	break
	else:
	# Input exhausted without determining the encoding
	output = decode(b'', final=True)
	assert decoder.encoding is not None
	yield decoder.encoding
	if output:
	yield output
	return

	for chunck in input:
	output = decode(chunck)
	if output:
	yield output
	output = decode(b'', final=True)
	if output:
	yield output


	def iter_encode(input, encoding=UTF8, errors='strict'):
	"""
	“Pull”-based encoder.

	:param input: An iterable of Unicode strings.
	:param encoding: An :class:`Encoding` object or a label string.
	:param errors: Type of error handling. See :func:`codecs.register`.
	:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
	:returns: An iterable of byte strings.

	"""
	# Fail early if `encoding` is an invalid label.
	encode = IncrementalEncoder(encoding, errors).encode
	return _iter_encode_generator(input, encode)


	def _iter_encode_generator(input, encode):
	for chunck in input:
	output = encode(chunck)
	if output:
	yield output
	output = encode('', final=True)
	if output:
	yield output


	class IncrementalDecoder(object):
	"""
	“Push”-based decoder.

	:param fallback_encoding:
	An :class:`Encoding` object or a label string.
	The encoding to use if :obj:`input` does note have a BOM.
	:param errors: Type of error handling. See :func:`codecs.register`.
	:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.

	"""
	def __init__(self, fallback_encoding, errors='replace'):
	# Fail early if `encoding` is an invalid label.
	self._fallback_encoding = _get_encoding(fallback_encoding)
	self._errors = errors
	self._buffer = b''
	self._decoder = None
	#: The actual :class:`Encoding` that is being used,
	#: or :obj:`None` if that is not determined yet.
	#: (Ie. if there is not enough input yet to determine
	#: if there is a BOM.)
	self.encoding = None # Not known yet.

	def decode(self, input, final=False):
	"""Decode one chunk of the input.

	:param input: A byte string.
	:param final:
	Indicate that no more input is available.
	Must be :obj:`True` if this is the last call.
	:returns: An Unicode string.

	"""
	decoder = self._decoder
	if decoder is not None:
	return decoder(input, final)

	input = self._buffer + input
	encoding, input = _detect_bom(input)
	if encoding is None:
	if len(input) < 3 and not final: # Not enough data yet.
	self._buffer = input
	return ''
	else: # No BOM
	encoding = self._fallback_encoding
	decoder = encoding.codec_info.incrementaldecoder(self._errors).decode
	self._decoder = decoder
	self.encoding = encoding
	return decoder(input, final)


	class IncrementalEncoder(object):
	"""
	“Push”-based encoder.

	:param encoding: An :class:`Encoding` object or a label string.
	:param errors: Type of error handling. See :func:`codecs.register`.
	:raises: :exc:`~exceptions.LookupError` for an unknown encoding label.

	.. method:: encode(input, final=False)

	:param input: An Unicode string.
	:param final:
	Indicate that no more input is available.
	Must be :obj:`True` if this is the last call.
	:returns: A byte string.

	"""
	def __init__(self, encoding=UTF8, errors='strict'):
	encoding = _get_encoding(encoding)
	self.encode = encoding.codec_info.incrementalencoder(errors).encode