python-docs-venv/lib/python3.11/site-packages/pygments/lexers/grammar_notation.py - datasketches-python - Git at Google

 """
     pygments.lexers.grammar_notation
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

     Lexers for grammar notations like BNF.

     :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """

 from pygments.lexer import RegexLexer, bygroups, include, this, using, words
 from pygments.token import Comment, Keyword, Literal, Name, Number, \
     Operator, Punctuation, String, Text, Whitespace

 __all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']


 class BnfLexer(RegexLexer):
     """
     This lexer is for grammar notations which are similar to
     original BNF.

     In order to maximize a number of targets of this lexer,
     let's decide some designs:

     * We don't distinguish `Terminal Symbol`.

     * We do assume that `NonTerminal Symbol` are always enclosed
       with arrow brackets.

     * We do assume that `NonTerminal Symbol` may include
       any printable characters except arrow brackets and ASCII 0x20.
       This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.

     * We do assume that target notation doesn't support comment.

     * We don't distinguish any operators and punctuation except
       `::=`.

     Though these decision making might cause too minimal highlighting
     and you might be disappointed, but it is reasonable for us.

     .. versionadded:: 2.1
     """

     name = 'BNF'
     aliases = ['bnf']
     filenames = ['*.bnf']
     mimetypes = ['text/x-bnf']

     tokens = {
         'root': [
             (r'(<)([ -;=?-~]+)(>)',
              bygroups(Punctuation, Name.Class, Punctuation)),

             # an only operator
             (r'::=', Operator),

             # fallback
             (r'[^<>:]+', Text),  # for performance
             (r'.', Text),
         ],
     }


 class AbnfLexer(RegexLexer):
     """
     Lexer for IETF 7405 ABNF.

     (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars.

     .. versionadded:: 2.1
     """

     name = 'ABNF'
     url = 'http://www.ietf.org/rfc/rfc7405.txt'
     aliases = ['abnf']
     filenames = ['*.abnf']
     mimetypes = ['text/x-abnf']

     _core_rules = (
         'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',
         'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',
         'SP', 'VCHAR', 'WSP')

     tokens = {
         'root': [
             # comment
             (r';.*$', Comment.Single),

             # quoted
             #   double quote itself in this state, it is as '%x22'.
             (r'(%[si])?"[^"]*"', Literal),

             # binary (but i have never seen...)
             (r'%b[01]+\-[01]+\b', Literal),  # range
             (r'%b[01]+(\.[01]+)*\b', Literal),  # concat

             # decimal
             (r'%d[0-9]+\-[0-9]+\b', Literal),  # range
             (r'%d[0-9]+(\.[0-9]+)*\b', Literal),  # concat

             # hexadecimal
             (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal),  # range
             (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal),  # concat

             # repetition (<a>*<b>element) including nRule
             (r'\b[0-9]+\*[0-9]+', Operator),
             (r'\b[0-9]+\*', Operator),
             (r'\b[0-9]+', Operator),
             (r'\*', Operator),

             # Strictly speaking, these are not keyword but
             # are called `Core Rule'.
             (words(_core_rules, suffix=r'\b'), Keyword),

             # nonterminals (ALPHA *(ALPHA / DIGIT / "-"))
             (r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),

             # operators
             (r'(=/|=|/)', Operator),

             # punctuation
             (r'[\[\]()]', Punctuation),

             # fallback
             (r'\s+', Whitespace),
             (r'.', Text),
         ],
     }


 class JsgfLexer(RegexLexer):
     """
     For JSpeech Grammar Format grammars.

     .. versionadded:: 2.2
     """
     name = 'JSGF'
     url = 'https://www.w3.org/TR/jsgf/'
     aliases = ['jsgf']
     filenames = ['*.jsgf']
     mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']

     tokens = {
         'root': [
             include('comments'),
             include('non-comments'),
         ],
         'comments': [
             (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
             (r'/\*[\w\W]*?\*/', Comment.Multiline),
             (r'//.*$', Comment.Single),
         ],
         'non-comments': [
             (r'\A#JSGF[^;]*', Comment.Preproc),
             (r'\s+', Whitespace),
             (r';', Punctuation),
             (r'[=|()\[\]*+]', Operator),
             (r'/[^/]+/', Number.Float),
             (r'"', String.Double, 'string'),
             (r'\{', String.Other, 'tag'),
             (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
             (r'grammar\b', Keyword.Reserved, 'grammar name'),
             (r'(<)(NULL|VOID)(>)',
              bygroups(Punctuation, Name.Builtin, Punctuation)),
             (r'<', Punctuation, 'rulename'),
             (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),
         ],
         'string': [
             (r'"', String.Double, '#pop'),
             (r'\\.', String.Escape),
             (r'[^\\"]+', String.Double),
         ],
         'tag': [
             (r'\}', String.Other, '#pop'),
             (r'\\.', String.Escape),
             (r'[^\\}]+', String.Other),
         ],
         'grammar name': [
             (r';', Punctuation, '#pop'),
             (r'\s+', Whitespace),
             (r'\.', Punctuation),
             (r'[^;\s.]+', Name.Namespace),
         ],
         'rulename': [
             (r'>', Punctuation, '#pop'),
             (r'\*', Punctuation),
             (r'\s+', Whitespace),
             (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
             (r'[^.>]+', Name.Constant),
         ],
         'documentation comment': [
             (r'\*/', Comment.Multiline, '#pop'),
             (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'
              r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
              bygroups(Whitespace, Comment.Multiline, Whitespace, Comment.Special,
                       Whitespace, using(this, state='example'))),
             (r'(^\s*\*?\s*)(@\S*)',
              bygroups(Comment.Multiline, Comment.Special)),
             (r'[^*\n@]+|\w|\W', Comment.Multiline),
         ],
         'example': [
             (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),
             include('non-comments'),
             (r'.', Comment.Multiline),
         ],
     }


 class PegLexer(RegexLexer):
     """
     This lexer is for Parsing Expression Grammars (PEG).

     Various implementations of PEG have made different decisions
     regarding the syntax, so let's try to be accommodating:

     * `<-`, `←`, `:`, and `=` are all accepted as rule operators.

     * Both `|` and `/` are choice operators.

     * `^`, `↑`, and `~` are cut operators.

     * A single `a-z` character immediately before a string, or
       multiple `a-z` characters following a string, are part of the
       string (e.g., `r"..."` or `"..."ilmsuxa`).

     .. versionadded:: 2.6
     """

     name = 'PEG'
     url = 'https://bford.info/pub/lang/peg.pdf'
     aliases = ['peg']
     filenames = ['*.peg']
     mimetypes = ['text/x-peg']

     tokens = {
         'root': [
             # Comments
             (r'#.*$', Comment.Single),

             # All operators
             (r'<-|[←:=/|&!?*+^↑~]', Operator),

             # Other punctuation
             (r'[()]', Punctuation),

             # Keywords
             (r'\.', Keyword),

             # Character classes
             (r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])',
              bygroups(Punctuation, String, Punctuation)),

             # Single and double quoted strings (with optional modifiers)
             (r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double),
             (r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single),

             # Nonterminals are not whitespace, operators, or punctuation
             (r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class),

             # Fallback
             (r'.', Text),
         ],
     }
	"""
	pygments.lexers.grammar_notation
	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	Lexers for grammar notations like BNF.

	:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
	:license: BSD, see LICENSE for details.
	"""

	from pygments.lexer import RegexLexer, bygroups, include, this, using, words
	from pygments.token import Comment, Keyword, Literal, Name, Number, \
	Operator, Punctuation, String, Text, Whitespace

	__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']


	class BnfLexer(RegexLexer):
	"""
	This lexer is for grammar notations which are similar to
	original BNF.

	In order to maximize a number of targets of this lexer,
	let's decide some designs:

	* We don't distinguish `Terminal Symbol`.

	* We do assume that `NonTerminal Symbol` are always enclosed
	with arrow brackets.

	* We do assume that `NonTerminal Symbol` may include
	any printable characters except arrow brackets and ASCII 0x20.
	This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.

	* We do assume that target notation doesn't support comment.

	* We don't distinguish any operators and punctuation except
	`::=`.

	Though these decision making might cause too minimal highlighting
	and you might be disappointed, but it is reasonable for us.

	.. versionadded:: 2.1
	"""

	name = 'BNF'
	aliases = ['bnf']
	filenames = ['*.bnf']
	mimetypes = ['text/x-bnf']

	tokens = {
	'root': [
	(r'(<)([ -;=?-~]+)(>)',
	bygroups(Punctuation, Name.Class, Punctuation)),

	# an only operator
	(r'::=', Operator),

	# fallback
	(r'[^<>:]+', Text), # for performance
	(r'.', Text),
	],
	}


	class AbnfLexer(RegexLexer):
	"""
	Lexer for IETF 7405 ABNF.

	(Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars.

	.. versionadded:: 2.1
	"""

	name = 'ABNF'
	url = 'http://www.ietf.org/rfc/rfc7405.txt'
	aliases = ['abnf']
	filenames = ['*.abnf']
	mimetypes = ['text/x-abnf']

	_core_rules = (
	'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',
	'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',
	'SP', 'VCHAR', 'WSP')

	tokens = {
	'root': [
	# comment
	(r';.*$', Comment.Single),

	# quoted
	# double quote itself in this state, it is as '%x22'.
	(r'(%[si])?"[^"]*"', Literal),

	# binary (but i have never seen...)
	(r'%b[01]+\-[01]+\b', Literal), # range
	(r'%b[01]+(\.[01]+)*\b', Literal), # concat

	# decimal
	(r'%d[0-9]+\-[0-9]+\b', Literal), # range
	(r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat

	# hexadecimal
	(r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range
	(r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat

	# repetition (<a>*<b>element) including nRule
	(r'\b[0-9]+\*[0-9]+', Operator),
	(r'\b[0-9]+\*', Operator),
	(r'\b[0-9]+', Operator),
	(r'\*', Operator),

	# Strictly speaking, these are not keyword but
	# are called `Core Rule'.
	(words(_core_rules, suffix=r'\b'), Keyword),

	# nonterminals (ALPHA *(ALPHA / DIGIT / "-"))
	(r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),

	# operators
	(r'(=/\|=\|/)', Operator),

	# punctuation
	(r'[\[\]()]', Punctuation),

	# fallback
	(r'\s+', Whitespace),
	(r'.', Text),
	],
	}


	class JsgfLexer(RegexLexer):
	"""
	For JSpeech Grammar Format grammars.

	.. versionadded:: 2.2
	"""
	name = 'JSGF'
	url = 'https://www.w3.org/TR/jsgf/'
	aliases = ['jsgf']
	filenames = ['*.jsgf']
	mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']

	tokens = {
	'root': [
	include('comments'),
	include('non-comments'),
	],
	'comments': [
	(r'/\\(?!/)', Comment.Multiline, 'documentation comment'),
	(r'/\[\w\W]?\*/', Comment.Multiline),
	(r'//.*$', Comment.Single),
	],
	'non-comments': [
	(r'\A#JSGF[^;]*', Comment.Preproc),
	(r'\s+', Whitespace),
	(r';', Punctuation),
	(r'[=\|()\[\]*+]', Operator),
	(r'/[^/]+/', Number.Float),
	(r'"', String.Double, 'string'),
	(r'\{', String.Other, 'tag'),
	(words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
	(r'grammar\b', Keyword.Reserved, 'grammar name'),
	(r'(<)(NULL\|VOID)(>)',
	bygroups(Punctuation, Name.Builtin, Punctuation)),
	(r'<', Punctuation, 'rulename'),
	(r'\w+\|[^\s;=\|()\[\]*+/"{<\w]+', Text),
	],
	'string': [
	(r'"', String.Double, '#pop'),
	(r'\\.', String.Escape),
	(r'[^\\"]+', String.Double),
	],
	'tag': [
	(r'\}', String.Other, '#pop'),
	(r'\\.', String.Escape),
	(r'[^\\}]+', String.Other),
	],
	'grammar name': [
	(r';', Punctuation, '#pop'),
	(r'\s+', Whitespace),
	(r'\.', Punctuation),
	(r'[^;\s.]+', Name.Namespace),
	],
	'rulename': [
	(r'>', Punctuation, '#pop'),
	(r'\*', Punctuation),
	(r'\s+', Whitespace),
	(r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
	(r'[^.>]+', Name.Constant),
	],
	'documentation comment': [
	(r'\*/', Comment.Multiline, '#pop'),
	(r'^(\s)(\?)(\s*)(@(?:example\|see))(\s+)'
	r'([\w\W]?(?=(?:^\s\?\s@\|\*/)))',
	bygroups(Whitespace, Comment.Multiline, Whitespace, Comment.Special,
	Whitespace, using(this, state='example'))),
	(r'(^\s\?\s)(@\S)',
	bygroups(Comment.Multiline, Comment.Special)),
	(r'[^*\n@]+\|\w\|\W', Comment.Multiline),
	],
	'example': [
	(r'(\n\s)(\)', bygroups(Whitespace, Comment.Multiline)),
	include('non-comments'),
	(r'.', Comment.Multiline),
	],
	}


	class PegLexer(RegexLexer):
	"""
	This lexer is for Parsing Expression Grammars (PEG).

	Various implementations of PEG have made different decisions
	regarding the syntax, so let's try to be accommodating:

	* `<-`, `←`, `:`, and `=` are all accepted as rule operators.

	* Both `\|` and `/` are choice operators.

	* `^`, `↑`, and `~` are cut operators.

	* A single `a-z` character immediately before a string, or
	multiple `a-z` characters following a string, are part of the
	string (e.g., `r"..."` or `"..."ilmsuxa`).

	.. versionadded:: 2.6
	"""

	name = 'PEG'
	url = 'https://bford.info/pub/lang/peg.pdf'
	aliases = ['peg']
	filenames = ['*.peg']
	mimetypes = ['text/x-peg']

	tokens = {
	'root': [
	# Comments
	(r'#.*$', Comment.Single),

	# All operators
	(r'<-\|[←:=/\|&!?*+^↑~]', Operator),

	# Other punctuation
	(r'[()]', Punctuation),

	# Keywords
	(r'\.', Keyword),

	# Character classes
	(r'(\[)([^\]](?:\\.[^\]\\])*)(\])',
	bygroups(Punctuation, String, Punctuation)),

	# Single and double quoted strings (with optional modifiers)
	(r'[a-z]?"[^"\\](?:\\.[^"\\])"[a-z]', String.Double),
	(r"[a-z]?'[^'\\](?:\\.[^'\\])'[a-z]", String.Single),

	# Nonterminals are not whitespace, operators, or punctuation
	(r'[^\s<←:=/\|&!?*+\^↑~()\[\]"\'#]+', Name.Class),

	# Fallback
	(r'.', Text),
	],
	}