python-docs-venv/lib/python3.11/site-packages/pygments/lexers/special.py - datasketches-python - Git at Google

 """
     pygments.lexers.special
     ~~~~~~~~~~~~~~~~~~~~~~~

     Special lexers.

     :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """

 import ast

 from pygments.lexer import Lexer, line_re
 from pygments.token import Token, Error, Text, Generic
 from pygments.util import get_choice_opt


 __all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer']


 class TextLexer(Lexer):
     """
     "Null" lexer, doesn't highlight anything.
     """
     name = 'Text only'
     aliases = ['text']
     filenames = ['*.txt']
     mimetypes = ['text/plain']
     priority = 0.01

     def get_tokens_unprocessed(self, text):
         yield 0, Text, text

     def analyse_text(text):
         return TextLexer.priority


 class OutputLexer(Lexer):
     """
     Simple lexer that highlights everything as ``Token.Generic.Output``.

     .. versionadded:: 2.10
     """
     name = 'Text output'
     aliases = ['output']

     def get_tokens_unprocessed(self, text):
         yield 0, Generic.Output, text


 _ttype_cache = {}


 class RawTokenLexer(Lexer):
     """
     Recreate a token stream formatted with the `RawTokenFormatter`.

     Additional options accepted:

     `compress`
         If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
         the given compression algorithm before lexing (default: ``""``).
     """
     name = 'Raw token data'
     aliases = []
     filenames = []
     mimetypes = ['application/x-pygments-tokens']

     def __init__(self, **options):
         self.compress = get_choice_opt(options, 'compress',
                                        ['', 'none', 'gz', 'bz2'], '')
         Lexer.__init__(self, **options)

     def get_tokens(self, text):
         if self.compress:
             if isinstance(text, str):
                 text = text.encode('latin1')
             try:
                 if self.compress == 'gz':
                     import gzip
                     text = gzip.decompress(text)
                 elif self.compress == 'bz2':
                     import bz2
                     text = bz2.decompress(text)
             except OSError:
                 yield Error, text.decode('latin1')
         if isinstance(text, bytes):
             text = text.decode('latin1')

         # do not call Lexer.get_tokens() because stripping is not optional.
         text = text.strip('\n') + '\n'
         for i, t, v in self.get_tokens_unprocessed(text):
             yield t, v

     def get_tokens_unprocessed(self, text):
         length = 0
         for match in line_re.finditer(text):
             try:
                 ttypestr, val = match.group().rstrip().split('\t', 1)
                 ttype = _ttype_cache.get(ttypestr)
                 if not ttype:
                     ttype = Token
                     ttypes = ttypestr.split('.')[1:]
                     for ttype_ in ttypes:
                         if not ttype_ or not ttype_[0].isupper():
                             raise ValueError('malformed token name')
                         ttype = getattr(ttype, ttype_)
                     _ttype_cache[ttypestr] = ttype
                 val = ast.literal_eval(val)
                 if not isinstance(val, str):
                     raise ValueError('expected str')
             except (SyntaxError, ValueError):
                 val = match.group()
                 ttype = Error
             yield length, ttype, val
             length += len(val)
	"""
	pygments.lexers.special
	~~~~~~~~~~~~~~~~~~~~~~~

	Special lexers.

	:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
	:license: BSD, see LICENSE for details.
	"""

	import ast

	from pygments.lexer import Lexer, line_re
	from pygments.token import Token, Error, Text, Generic
	from pygments.util import get_choice_opt


	__all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer']


	class TextLexer(Lexer):
	"""
	"Null" lexer, doesn't highlight anything.
	"""
	name = 'Text only'
	aliases = ['text']
	filenames = ['*.txt']
	mimetypes = ['text/plain']
	priority = 0.01

	def get_tokens_unprocessed(self, text):
	yield 0, Text, text

	def analyse_text(text):
	return TextLexer.priority


	class OutputLexer(Lexer):
	"""
	Simple lexer that highlights everything as ``Token.Generic.Output``.

	.. versionadded:: 2.10
	"""
	name = 'Text output'
	aliases = ['output']

	def get_tokens_unprocessed(self, text):
	yield 0, Generic.Output, text


	_ttype_cache = {}


	class RawTokenLexer(Lexer):
	"""
	Recreate a token stream formatted with the `RawTokenFormatter`.

	Additional options accepted:

	`compress`
	If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
	the given compression algorithm before lexing (default: ``""``).
	"""
	name = 'Raw token data'
	aliases = []
	filenames = []
	mimetypes = ['application/x-pygments-tokens']

	def __init__(self, **options):
	self.compress = get_choice_opt(options, 'compress',
	['', 'none', 'gz', 'bz2'], '')
	Lexer.__init__(self, **options)

	def get_tokens(self, text):
	if self.compress:
	if isinstance(text, str):
	text = text.encode('latin1')
	try:
	if self.compress == 'gz':
	import gzip
	text = gzip.decompress(text)
	elif self.compress == 'bz2':
	import bz2
	text = bz2.decompress(text)
	except OSError:
	yield Error, text.decode('latin1')
	if isinstance(text, bytes):
	text = text.decode('latin1')

	# do not call Lexer.get_tokens() because stripping is not optional.
	text = text.strip('\n') + '\n'
	for i, t, v in self.get_tokens_unprocessed(text):
	yield t, v

	def get_tokens_unprocessed(self, text):
	length = 0
	for match in line_re.finditer(text):
	try:
	ttypestr, val = match.group().rstrip().split('\t', 1)
	ttype = _ttype_cache.get(ttypestr)
	if not ttype:
	ttype = Token
	ttypes = ttypestr.split('.')[1:]
	for ttype_ in ttypes:
	if not ttype_ or not ttype_[0].isupper():
	raise ValueError('malformed token name')
	ttype = getattr(ttype, ttype_)
	_ttype_cache[ttypestr] = ttype
	val = ast.literal_eval(val)
	if not isinstance(val, str):
	raise ValueError('expected str')
	except (SyntaxError, ValueError):
	val = match.group()
	ttype = Error
	yield length, ttype, val
	length += len(val)