| """ |
| pygments.lexers.markup |
| ~~~~~~~~~~~~~~~~~~~~~~ |
| |
| Lexers for non-HTML markup languages. |
| |
| :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. |
| :license: BSD, see LICENSE for details. |
| """ |
| |
| import re |
| |
| from pygments.lexers.html import XmlLexer |
| from pygments.lexers.javascript import JavascriptLexer |
| from pygments.lexers.css import CssLexer |
| from pygments.lexers.lilypond import LilyPondLexer |
| from pygments.lexers.data import JsonLexer |
| |
| from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \ |
| using, this, do_insertions, default, words |
| from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
| Number, Punctuation, Generic, Other, Whitespace |
| from pygments.util import get_bool_opt, ClassNotFound |
| |
| __all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer', |
| 'MozPreprocHashLexer', 'MozPreprocPercentLexer', |
| 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer', |
| 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer', 'WikitextLexer'] |
| |
| |
| class BBCodeLexer(RegexLexer): |
| """ |
| A lexer that highlights BBCode(-like) syntax. |
| |
| .. versionadded:: 0.6 |
| """ |
| |
| name = 'BBCode' |
| aliases = ['bbcode'] |
| mimetypes = ['text/x-bbcode'] |
| |
| tokens = { |
| 'root': [ |
| (r'[^[]+', Text), |
| # tag/end tag begin |
| (r'\[/?\w+', Keyword, 'tag'), |
| # stray bracket |
| (r'\[', Text), |
| ], |
| 'tag': [ |
| (r'\s+', Text), |
| # attribute with value |
| (r'(\w+)(=)("?[^\s"\]]+"?)', |
| bygroups(Name.Attribute, Operator, String)), |
| # tag argument (a la [color=green]) |
| (r'(=)("?[^\s"\]]+"?)', |
| bygroups(Operator, String)), |
| # tag end |
| (r'\]', Keyword, '#pop'), |
| ], |
| } |
| |
| |
| class MoinWikiLexer(RegexLexer): |
| """ |
| For MoinMoin (and Trac) Wiki markup. |
| |
| .. versionadded:: 0.7 |
| """ |
| |
| name = 'MoinMoin/Trac Wiki markup' |
| aliases = ['trac-wiki', 'moin'] |
| filenames = [] |
| mimetypes = ['text/x-trac-wiki'] |
| flags = re.MULTILINE | re.IGNORECASE |
| |
| tokens = { |
| 'root': [ |
| (r'^#.*$', Comment), |
| (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next |
| # Titles |
| (r'^(=+)([^=]+)(=+)(\s*#.+)?$', |
| bygroups(Generic.Heading, using(this), Generic.Heading, String)), |
| # Literal code blocks, with optional shebang |
| (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), |
| (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting |
| # Lists |
| (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), |
| (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), |
| # Other Formatting |
| (r'\[\[\w+.*?\]\]', Keyword), # Macro |
| (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', |
| bygroups(Keyword, String, Keyword)), # Link |
| (r'^----+$', Keyword), # Horizontal rules |
| (r'[^\n\'\[{!_~^,|]+', Text), |
| (r'\n', Text), |
| (r'.', Text), |
| ], |
| 'codeblock': [ |
| (r'\}\}\}', Name.Builtin, '#pop'), |
| # these blocks are allowed to be nested in Trac, but not MoinMoin |
| (r'\{\{\{', Text, '#push'), |
| (r'[^{}]+', Comment.Preproc), # slurp boring text |
| (r'.', Comment.Preproc), # allow loose { or } |
| ], |
| } |
| |
| |
| class RstLexer(RegexLexer): |
| """ |
| For reStructuredText markup. |
| |
| .. versionadded:: 0.7 |
| |
| Additional options accepted: |
| |
| `handlecodeblocks` |
| Highlight the contents of ``.. sourcecode:: language``, |
| ``.. code:: language`` and ``.. code-block:: language`` |
| directives with a lexer for the given language (default: |
| ``True``). |
| |
| .. versionadded:: 0.8 |
| """ |
| name = 'reStructuredText' |
| url = 'https://docutils.sourceforge.io/rst.html' |
| aliases = ['restructuredtext', 'rst', 'rest'] |
| filenames = ['*.rst', '*.rest'] |
| mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] |
| flags = re.MULTILINE |
| |
| def _handle_sourcecode(self, match): |
| from pygments.lexers import get_lexer_by_name |
| |
| # section header |
| yield match.start(1), Punctuation, match.group(1) |
| yield match.start(2), Text, match.group(2) |
| yield match.start(3), Operator.Word, match.group(3) |
| yield match.start(4), Punctuation, match.group(4) |
| yield match.start(5), Text, match.group(5) |
| yield match.start(6), Keyword, match.group(6) |
| yield match.start(7), Text, match.group(7) |
| |
| # lookup lexer if wanted and existing |
| lexer = None |
| if self.handlecodeblocks: |
| try: |
| lexer = get_lexer_by_name(match.group(6).strip()) |
| except ClassNotFound: |
| pass |
| indention = match.group(8) |
| indention_size = len(indention) |
| code = (indention + match.group(9) + match.group(10) + match.group(11)) |
| |
| # no lexer for this language. handle it like it was a code block |
| if lexer is None: |
| yield match.start(8), String, code |
| return |
| |
| # highlight the lines with the lexer. |
| ins = [] |
| codelines = code.splitlines(True) |
| code = '' |
| for line in codelines: |
| if len(line) > indention_size: |
| ins.append((len(code), [(0, Text, line[:indention_size])])) |
| code += line[indention_size:] |
| else: |
| code += line |
| yield from do_insertions(ins, lexer.get_tokens_unprocessed(code)) |
| |
| # from docutils.parsers.rst.states |
| closers = '\'")]}>\u2019\u201d\xbb!?' |
| unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0' |
| end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' |
| % (re.escape(unicode_delimiters), |
| re.escape(closers))) |
| |
| tokens = { |
| 'root': [ |
| # Heading with overline |
| (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' |
| r'(.+)(\n)(\1)(\n)', |
| bygroups(Generic.Heading, Text, Generic.Heading, |
| Text, Generic.Heading, Text)), |
| # Plain heading |
| (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' |
| r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', |
| bygroups(Generic.Heading, Text, Generic.Heading, Text)), |
| # Bulleted lists |
| (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', |
| bygroups(Text, Number, using(this, state='inline'))), |
| # Numbered lists |
| (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', |
| bygroups(Text, Number, using(this, state='inline'))), |
| (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', |
| bygroups(Text, Number, using(this, state='inline'))), |
| # Numbered, but keep words at BOL from becoming lists |
| (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', |
| bygroups(Text, Number, using(this, state='inline'))), |
| (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', |
| bygroups(Text, Number, using(this, state='inline'))), |
| # Line blocks |
| (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', |
| bygroups(Text, Operator, using(this, state='inline'))), |
| # Sourcecode directives |
| (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' |
| r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)', |
| _handle_sourcecode), |
| # A directive |
| (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', |
| bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, |
| using(this, state='inline'))), |
| # A reference target |
| (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', |
| bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), |
| # A footnote/citation target |
| (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', |
| bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), |
| # A substitution def |
| (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', |
| bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, |
| Punctuation, Text, using(this, state='inline'))), |
| # Comments |
| (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), |
| # Field list marker |
| (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)', |
| bygroups(Text, Name.Class, Text)), |
| # Definition list |
| (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', |
| bygroups(using(this, state='inline'), using(this, state='inline'))), |
| # Code blocks |
| (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)', |
| bygroups(String.Escape, Text, String, String, Text, String)), |
| include('inline'), |
| ], |
| 'inline': [ |
| (r'\\.', Text), # escape |
| (r'``', String, 'literal'), # code |
| (r'(`.+?)(<.+?>)(`__?)', # reference with inline target |
| bygroups(String, String.Interpol, String)), |
| (r'`.+?`__?', String), # reference |
| (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', |
| bygroups(Name.Variable, Name.Attribute)), # role |
| (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', |
| bygroups(Name.Attribute, Name.Variable)), # role (content first) |
| (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis |
| (r'\*.+?\*', Generic.Emph), # Emphasis |
| (r'\[.*?\]_', String), # Footnote or citation |
| (r'<.+?>', Name.Tag), # Hyperlink |
| (r'[^\\\n\[*`:]+', Text), |
| (r'.', Text), |
| ], |
| 'literal': [ |
| (r'[^`]+', String), |
| (r'``' + end_string_suffix, String, '#pop'), |
| (r'`', String), |
| ] |
| } |
| |
| def __init__(self, **options): |
| self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) |
| RegexLexer.__init__(self, **options) |
| |
| def analyse_text(text): |
| if text[:2] == '..' and text[2:3] != '.': |
| return 0.3 |
| p1 = text.find("\n") |
| p2 = text.find("\n", p1 + 1) |
| if (p2 > -1 and # has two lines |
| p1 * 2 + 1 == p2 and # they are the same length |
| text[p1+1] in '-=' and # the next line both starts and ends with |
| text[p1+1] == text[p2-1]): # ...a sufficiently high header |
| return 0.5 |
| |
| |
| class TexLexer(RegexLexer): |
| """ |
| Lexer for the TeX and LaTeX typesetting languages. |
| """ |
| |
| name = 'TeX' |
| aliases = ['tex', 'latex'] |
| filenames = ['*.tex', '*.aux', '*.toc'] |
| mimetypes = ['text/x-tex', 'text/x-latex'] |
| |
| tokens = { |
| 'general': [ |
| (r'%.*?\n', Comment), |
| (r'[{}]', Name.Builtin), |
| (r'[&_^]', Name.Builtin), |
| ], |
| 'root': [ |
| (r'\\\[', String.Backtick, 'displaymath'), |
| (r'\\\(', String, 'inlinemath'), |
| (r'\$\$', String.Backtick, 'displaymath'), |
| (r'\$', String, 'inlinemath'), |
| (r'\\([a-zA-Z]+|.)', Keyword, 'command'), |
| (r'\\$', Keyword), |
| include('general'), |
| (r'[^\\$%&_^{}]+', Text), |
| ], |
| 'math': [ |
| (r'\\([a-zA-Z]+|.)', Name.Variable), |
| include('general'), |
| (r'[0-9]+', Number), |
| (r'[-=!+*/()\[\]]', Operator), |
| (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), |
| ], |
| 'inlinemath': [ |
| (r'\\\)', String, '#pop'), |
| (r'\$', String, '#pop'), |
| include('math'), |
| ], |
| 'displaymath': [ |
| (r'\\\]', String, '#pop'), |
| (r'\$\$', String, '#pop'), |
| (r'\$', Name.Builtin), |
| include('math'), |
| ], |
| 'command': [ |
| (r'\[.*?\]', Name.Attribute), |
| (r'\*', Keyword), |
| default('#pop'), |
| ], |
| } |
| |
| def analyse_text(text): |
| for start in ("\\documentclass", "\\input", "\\documentstyle", |
| "\\relax"): |
| if text[:len(start)] == start: |
| return True |
| |
| |
| class GroffLexer(RegexLexer): |
| """ |
| Lexer for the (g)roff typesetting language, supporting groff |
| extensions. Mainly useful for highlighting manpage sources. |
| |
| .. versionadded:: 0.6 |
| """ |
| |
| name = 'Groff' |
| aliases = ['groff', 'nroff', 'man'] |
| filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm'] |
| mimetypes = ['application/x-troff', 'text/troff'] |
| |
| tokens = { |
| 'root': [ |
| (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), |
| (r'\.', Punctuation, 'request'), |
| # Regular characters, slurp till we find a backslash or newline |
| (r'[^\\\n]+', Text, 'textline'), |
| default('textline'), |
| ], |
| 'textline': [ |
| include('escapes'), |
| (r'[^\\\n]+', Text), |
| (r'\n', Text, '#pop'), |
| ], |
| 'escapes': [ |
| # groff has many ways to write escapes. |
| (r'\\"[^\n]*', Comment), |
| (r'\\[fn]\w', String.Escape), |
| (r'\\\(.{2}', String.Escape), |
| (r'\\.\[.*\]', String.Escape), |
| (r'\\.', String.Escape), |
| (r'\\\n', Text, 'request'), |
| ], |
| 'request': [ |
| (r'\n', Text, '#pop'), |
| include('escapes'), |
| (r'"[^\n"]+"', String.Double), |
| (r'\d+', Number), |
| (r'\S+', String), |
| (r'\s+', Text), |
| ], |
| } |
| |
| def analyse_text(text): |
| if text[:1] != '.': |
| return False |
| if text[:3] == '.\\"': |
| return True |
| if text[:4] == '.TH ': |
| return True |
| if text[1:3].isalnum() and text[3].isspace(): |
| return 0.9 |
| |
| |
| class MozPreprocHashLexer(RegexLexer): |
| """ |
| Lexer for Mozilla Preprocessor files (with '#' as the marker). |
| |
| Other data is left untouched. |
| |
| .. versionadded:: 2.0 |
| """ |
| name = 'mozhashpreproc' |
| aliases = [name] |
| filenames = [] |
| mimetypes = [] |
| |
| tokens = { |
| 'root': [ |
| (r'^#', Comment.Preproc, ('expr', 'exprstart')), |
| (r'.+', Other), |
| ], |
| 'exprstart': [ |
| (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'), |
| (words(( |
| 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif', |
| 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter', |
| 'include', 'includesubst', 'error')), |
| Comment.Preproc, '#pop'), |
| ], |
| 'expr': [ |
| (words(('!', '!=', '==', '&&', '||')), Operator), |
| (r'(defined)(\()', bygroups(Keyword, Punctuation)), |
| (r'\)', Punctuation), |
| (r'[0-9]+', Number.Decimal), |
| (r'__\w+?__', Name.Variable), |
| (r'@\w+?@', Name.Class), |
| (r'\w+', Name), |
| (r'\n', Text, '#pop'), |
| (r'\s+', Text), |
| (r'\S', Punctuation), |
| ], |
| } |
| |
| |
| class MozPreprocPercentLexer(MozPreprocHashLexer): |
| """ |
| Lexer for Mozilla Preprocessor files (with '%' as the marker). |
| |
| Other data is left untouched. |
| |
| .. versionadded:: 2.0 |
| """ |
| name = 'mozpercentpreproc' |
| aliases = [name] |
| filenames = [] |
| mimetypes = [] |
| |
| tokens = { |
| 'root': [ |
| (r'^%', Comment.Preproc, ('expr', 'exprstart')), |
| (r'.+', Other), |
| ], |
| } |
| |
| |
| class MozPreprocXulLexer(DelegatingLexer): |
| """ |
| Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the |
| `XmlLexer`. |
| |
| .. versionadded:: 2.0 |
| """ |
| name = "XUL+mozpreproc" |
| aliases = ['xul+mozpreproc'] |
| filenames = ['*.xul.in'] |
| mimetypes = [] |
| |
| def __init__(self, **options): |
| super().__init__(XmlLexer, MozPreprocHashLexer, **options) |
| |
| |
| class MozPreprocJavascriptLexer(DelegatingLexer): |
| """ |
| Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the |
| `JavascriptLexer`. |
| |
| .. versionadded:: 2.0 |
| """ |
| name = "Javascript+mozpreproc" |
| aliases = ['javascript+mozpreproc'] |
| filenames = ['*.js.in'] |
| mimetypes = [] |
| |
| def __init__(self, **options): |
| super().__init__(JavascriptLexer, MozPreprocHashLexer, **options) |
| |
| |
| class MozPreprocCssLexer(DelegatingLexer): |
| """ |
| Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the |
| `CssLexer`. |
| |
| .. versionadded:: 2.0 |
| """ |
| name = "CSS+mozpreproc" |
| aliases = ['css+mozpreproc'] |
| filenames = ['*.css.in'] |
| mimetypes = [] |
| |
| def __init__(self, **options): |
| super().__init__(CssLexer, MozPreprocPercentLexer, **options) |
| |
| |
| class MarkdownLexer(RegexLexer): |
| """ |
| For Markdown markup. |
| |
| .. versionadded:: 2.2 |
| """ |
| name = 'Markdown' |
| url = 'https://daringfireball.net/projects/markdown/' |
| aliases = ['markdown', 'md'] |
| filenames = ['*.md', '*.markdown'] |
| mimetypes = ["text/x-markdown"] |
| flags = re.MULTILINE |
| |
| def _handle_codeblock(self, match): |
| from pygments.lexers import get_lexer_by_name |
| |
| yield match.start('initial'), String.Backtick, match.group('initial') |
| yield match.start('lang'), String.Backtick, match.group('lang') |
| if match.group('afterlang') is not None: |
| yield match.start('whitespace'), Whitespace, match.group('whitespace') |
| yield match.start('extra'), Text, match.group('extra') |
| yield match.start('newline'), Whitespace, match.group('newline') |
| |
| # lookup lexer if wanted and existing |
| lexer = None |
| if self.handlecodeblocks: |
| try: |
| lexer = get_lexer_by_name(match.group('lang').strip()) |
| except ClassNotFound: |
| pass |
| code = match.group('code') |
| # no lexer for this language. handle it like it was a code block |
| if lexer is None: |
| yield match.start('code'), String, code |
| else: |
| # FIXME: aren't the offsets wrong? |
| yield from do_insertions([], lexer.get_tokens_unprocessed(code)) |
| |
| yield match.start('terminator'), String.Backtick, match.group('terminator') |
| |
| tokens = { |
| 'root': [ |
| # heading with '#' prefix (atx-style) |
| (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)), |
| # subheading with '#' prefix (atx-style) |
| (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)), |
| # heading with '=' underlines (Setext-style) |
| (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)), |
| # subheading with '-' underlines (Setext-style) |
| (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)), |
| # task list |
| (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', |
| bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))), |
| # bulleted list |
| (r'^(\s*)([*-])(\s)(.+\n)', |
| bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))), |
| # numbered list |
| (r'^(\s*)([0-9]+\.)( .+\n)', |
| bygroups(Whitespace, Keyword, using(this, state='inline'))), |
| # quote |
| (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), |
| # code block fenced by 3 backticks |
| (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick), |
| # code block with language |
| # Some tools include extra stuff after the language name, just |
| # highlight that as text. For example: https://docs.enola.dev/use/execmd |
| (r'''(?x) |
| ^(?P<initial>\s*```) |
| (?P<lang>[\w\-]+) |
| (?P<afterlang> |
| (?P<whitespace>[^\S\n]+) |
| (?P<extra>.*))? |
| (?P<newline>\n) |
| (?P<code>(.|\n)*?) |
| (?P<terminator>^\s*```$\n) |
| ''', |
| _handle_codeblock), |
| |
| include('inline'), |
| ], |
| 'inline': [ |
| # escape |
| (r'\\.', Text), |
| # inline code |
| (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)), |
| # warning: the following rules eat outer tags. |
| # eg. **foo _bar_ baz** => foo and baz are not recognized as bold |
| # bold fenced by '**' |
| (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)), |
| # bold fenced by '__' |
| (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)), |
| # italics fenced by '*' |
| (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)), |
| # italics fenced by '_' |
| (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)), |
| # strikethrough |
| (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)), |
| # mentions and topics (twitter and github stuff) |
| (r'[@#][\w/:]+', Name.Entity), |
| # (image?) links eg:  |
| (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', |
| bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), |
| # reference-style links, e.g.: |
| # [an example][id] |
| # [id]: http://example.com/ |
| (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', |
| bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), |
| (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', |
| bygroups(Text, Name.Label, Text, Name.Attribute)), |
| |
| # general text, must come last! |
| (r'[^\\\s]+', Text), |
| (r'.', Text), |
| ], |
| } |
| |
| def __init__(self, **options): |
| self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) |
| RegexLexer.__init__(self, **options) |
| |
| |
| class TiddlyWiki5Lexer(RegexLexer): |
| """ |
| For TiddlyWiki5 markup. |
| |
| .. versionadded:: 2.7 |
| """ |
| name = 'tiddler' |
| url = 'https://tiddlywiki.com/#TiddlerFiles' |
| aliases = ['tid'] |
| filenames = ['*.tid'] |
| mimetypes = ["text/vnd.tiddlywiki"] |
| flags = re.MULTILINE |
| |
| def _handle_codeblock(self, match): |
| """ |
| match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks |
| """ |
| from pygments.lexers import get_lexer_by_name |
| |
| # section header |
| yield match.start(1), String, match.group(1) |
| yield match.start(2), String, match.group(2) |
| yield match.start(3), Text, match.group(3) |
| |
| # lookup lexer if wanted and existing |
| lexer = None |
| if self.handlecodeblocks: |
| try: |
| lexer = get_lexer_by_name(match.group(2).strip()) |
| except ClassNotFound: |
| pass |
| code = match.group(4) |
| |
| # no lexer for this language. handle it like it was a code block |
| if lexer is None: |
| yield match.start(4), String, code |
| return |
| |
| yield from do_insertions([], lexer.get_tokens_unprocessed(code)) |
| |
| yield match.start(5), String, match.group(5) |
| |
| def _handle_cssblock(self, match): |
| """ |
| match args: 1:style tag 2:newline, 3:code, 4:closing style tag |
| """ |
| from pygments.lexers import get_lexer_by_name |
| |
| # section header |
| yield match.start(1), String, match.group(1) |
| yield match.start(2), String, match.group(2) |
| |
| lexer = None |
| if self.handlecodeblocks: |
| try: |
| lexer = get_lexer_by_name('css') |
| except ClassNotFound: |
| pass |
| code = match.group(3) |
| |
| # no lexer for this language. handle it like it was a code block |
| if lexer is None: |
| yield match.start(3), String, code |
| return |
| |
| yield from do_insertions([], lexer.get_tokens_unprocessed(code)) |
| |
| yield match.start(4), String, match.group(4) |
| |
| tokens = { |
| 'root': [ |
| # title in metadata section |
| (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)), |
| # headings |
| (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)), |
| (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), |
| # bulleted or numbered lists or single-line block quotes |
| # (can be mixed) |
| (r'^(\s*)([*#>]+)(\s*)(.+\n)', |
| bygroups(Text, Keyword, Text, using(this, state='inline'))), |
| # multi-line block quotes |
| (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)), |
| # table header |
| (r'^(\|.*?\|h)$', bygroups(Generic.Strong)), |
| # table footer or caption |
| (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)), |
| # table class |
| (r'^(\|.*?\|k)$', bygroups(Name.Tag)), |
| # definitions |
| (r'^(;.*)$', bygroups(Generic.Strong)), |
| # text block |
| (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), |
| # code block with language |
| (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), |
| # CSS style block |
| (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock), |
| |
| include('keywords'), |
| include('inline'), |
| ], |
| 'keywords': [ |
| (words(( |
| '\\define', '\\end', 'caption', 'created', 'modified', 'tags', |
| 'title', 'type'), prefix=r'^', suffix=r'\b'), |
| Keyword), |
| ], |
| 'inline': [ |
| # escape |
| (r'\\.', Text), |
| # created or modified date |
| (r'\d{17}', Number.Integer), |
| # italics |
| (r'(\s)(//[^/]+//)((?=\W|\n))', |
| bygroups(Text, Generic.Emph, Text)), |
| # superscript |
| (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)), |
| # subscript |
| (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)), |
| # underscore |
| (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)), |
| # bold |
| (r"(\s)(''[^']+'')((?=\W|\n))", |
| bygroups(Text, Generic.Strong, Text)), |
| # strikethrough |
| (r'(\s)(~~[^~]+~~)((?=\W|\n))', |
| bygroups(Text, Generic.Deleted, Text)), |
| # TiddlyWiki variables |
| (r'<<[^>]+>>', Name.Tag), |
| (r'\$\$[^$]+\$\$', Name.Tag), |
| (r'\$\([^)]+\)\$', Name.Tag), |
| # TiddlyWiki style or class |
| (r'^@@.*$', Name.Tag), |
| # HTML tags |
| (r'</?[^>]+>', Name.Tag), |
| # inline code |
| (r'`[^`]+`', String.Backtick), |
| # HTML escaped symbols |
| (r'&\S*?;', String.Regex), |
| # Wiki links |
| (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)), |
| # External links |
| (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})', |
| bygroups(Text, Name.Tag, Text, Name.Attribute, Text)), |
| # Transclusion |
| (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)), |
| # URLs |
| (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)), |
| |
| # general text, must come last! |
| (r'[\w]+', Text), |
| (r'.', Text) |
| ], |
| } |
| |
| def __init__(self, **options): |
| self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) |
| RegexLexer.__init__(self, **options) |
| |
| |
| class WikitextLexer(RegexLexer): |
| """ |
| For MediaWiki Wikitext. |
| |
| Parsing Wikitext is tricky, and results vary between different MediaWiki |
| installations, so we only highlight common syntaxes (built-in or from |
| popular extensions), and also assume templates produce no unbalanced |
| syntaxes. |
| |
| .. versionadded:: 2.15 |
| """ |
| name = 'Wikitext' |
| url = 'https://www.mediawiki.org/wiki/Wikitext' |
| aliases = ['wikitext', 'mediawiki'] |
| filenames = [] |
| mimetypes = ['text/x-wiki'] |
| flags = re.MULTILINE |
| |
| def nowiki_tag_rules(tag_name): |
| return [ |
| (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation, |
| Name.Tag, Whitespace, Punctuation), '#pop'), |
| include('entity'), |
| include('text'), |
| ] |
| |
| def plaintext_tag_rules(tag_name): |
| return [ |
| (r'(?si)(.*?)(</)({})(\s*)(>)'.format(tag_name), bygroups(Text, |
| Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'), |
| ] |
| |
| def delegate_tag_rules(tag_name, lexer): |
| return [ |
| (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation, |
| Name.Tag, Whitespace, Punctuation), '#pop'), |
| (r'(?si).+?(?=</{}\s*>)'.format(tag_name), using(lexer)), |
| ] |
| |
| def text_rules(token): |
| return [ |
| (r'\w+', token), |
| (r'[^\S\n]+', token), |
| (r'(?s).', token), |
| ] |
| |
| def handle_syntaxhighlight(self, match, ctx): |
| from pygments.lexers import get_lexer_by_name |
| |
| attr_content = match.group() |
| start = 0 |
| index = 0 |
| while True: |
| index = attr_content.find('>', start) |
| # Exclude comment end (-->) |
| if attr_content[index-2:index] != '--': |
| break |
| start = index + 1 |
| |
| if index == -1: |
| # No tag end |
| yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) |
| return |
| attr = attr_content[:index] |
| yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) |
| yield match.start(3) + index, Punctuation, '>' |
| |
| lexer = None |
| content = attr_content[index+1:] |
| lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) |
| |
| if len(lang_match) >= 1: |
| # Pick the last match in case of multiple matches |
| lang = lang_match[-1][1] |
| try: |
| lexer = get_lexer_by_name(lang) |
| except ClassNotFound: |
| pass |
| |
| if lexer is None: |
| yield match.start() + index + 1, Text, content |
| else: |
| yield from lexer.get_tokens_unprocessed(content) |
| |
| def handle_score(self, match, ctx): |
| attr_content = match.group() |
| start = 0 |
| index = 0 |
| while True: |
| index = attr_content.find('>', start) |
| # Exclude comment end (-->) |
| if attr_content[index-2:index] != '--': |
| break |
| start = index + 1 |
| |
| if index == -1: |
| # No tag end |
| yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) |
| return |
| attr = attr_content[:index] |
| content = attr_content[index+1:] |
| yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) |
| yield match.start(3) + index, Punctuation, '>' |
| |
| lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) |
| # Pick the last match in case of multiple matches |
| lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond' |
| |
| if lang == 'lilypond': # Case sensitive |
| yield from LilyPondLexer().get_tokens_unprocessed(content) |
| else: # ABC |
| # FIXME: Use ABC lexer in the future |
| yield match.start() + index + 1, Text, content |
| |
| # a-z removed to prevent linter from complaining, REMEMBER to use (?i) |
| title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF' |
| nbsp_char = r'(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])' |
| link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])' |
| link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]' |
| double_slashes_i = { |
| '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__', |
| '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__', |
| } |
| double_slashes = { |
| '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__', |
| '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__', |
| '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__', |
| } |
| protocols = { |
| 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://', |
| 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://', |
| 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:', |
| 'worldwind://', 'xmpp:', '//', |
| } |
| non_relative_protocols = protocols - {'//'} |
| html_tags = { |
| 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', |
| 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', |
| 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp', |
| 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', |
| 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr', |
| } |
| parser_tags = { |
| 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math', |
| 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages', |
| 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar', |
| 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery', |
| 'maplink', 'ce', 'references', |
| } |
| variant_langs = { |
| # ZhConverter.php |
| 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw', |
| # UnConverter.php |
| 'uz', 'uz-latn', 'uz-cyrl', |
| # TlyConverter.php |
| 'tly', 'tly-cyrl', |
| # TgConverter.php |
| 'tg', 'tg-latn', |
| # SrConverter.php |
| 'sr', 'sr-ec', 'sr-el', |
| # ShiConverter.php |
| 'shi', 'shi-tfng', 'shi-latn', |
| # ShConverter.php |
| 'sh-latn', 'sh-cyrl', |
| # KuConverter.php |
| 'ku', 'ku-arab', 'ku-latn', |
| # KkConverter.php |
| 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn', |
| # IuConverter.php |
| 'iu', 'ike-cans', 'ike-latn', |
| # GanConverter.php |
| 'gan', 'gan-hans', 'gan-hant', |
| # EnConverter.php |
| 'en', 'en-x-piglatin', |
| # CrhConverter.php |
| 'crh', 'crh-cyrl', 'crh-latn', |
| # BanConverter.php |
| 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku', |
| } |
| magic_vars_i = { |
| 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH', |
| } |
| magic_vars = { |
| '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE', |
| 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR', |
| 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME', |
| 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK', |
| 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY', |
| 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2', |
| 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME', |
| 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER', |
| 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS', |
| 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE', |
| 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1', |
| 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', |
| 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', |
| 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', |
| 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', |
| } |
| parser_functions_i = { |
| 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM', |
| 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL', |
| 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST', |
| 'URLENCODE', |
| } |
| parser_functions = { |
| 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY', |
| 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE', |
| 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', |
| 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', |
| 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY', |
| 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', |
| 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', |
| 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', |
| 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', |
| 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', |
| 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS', |
| } |
| |
| tokens = { |
| 'root': [ |
| # Redirects |
| (r"""(?xi) |
| (\A\s*?)(\#REDIRECT:?) # may contain a colon |
| (\s+)(\[\[) (?=[^\]\n]* \]\]$) |
| """, |
| bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'), |
| # Subheadings |
| (r'^(={2,6})(.+?)(\1)(\s*$\n)', |
| bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)), |
| # Headings |
| (r'^(=.+?=)(\s*$\n)', |
| bygroups(Generic.Heading, Whitespace)), |
| # Double-slashed magic words |
| (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic), |
| (words(double_slashes), Name.Function.Magic), |
| # Raw URLs |
| (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols), |
| link_address, link_char_class), Name.Label), |
| # Magic links |
| (r'\b(?:RFC|PMID){}+[0-9]+\b'.format(nbsp_char), |
| Name.Function.Magic), |
| (r"""(?x) |
| \bISBN {nbsp_char} |
| (?: 97[89] {nbsp_dash}? )? |
| (?: [0-9] {nbsp_dash}? ){{9}} # escape format() |
| [0-9Xx]\b |
| """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic), |
| include('list'), |
| include('inline'), |
| include('text'), |
| ], |
| 'redirect-inner': [ |
| (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'), |
| (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)), |
| (r'(?i)[{}]+'.format(title_char), Name.Tag), |
| ], |
| 'list': [ |
| # Description lists |
| (r'^;', Keyword, 'dt'), |
| # Ordered lists, unordered lists and indents |
| (r'^[#:*]+', Keyword), |
| # Horizontal rules |
| (r'^-{4,}', Keyword), |
| ], |
| 'inline': [ |
| # Signatures |
| (r'~{3,5}', Keyword), |
| # Entities |
| include('entity'), |
| # Bold & italic |
| (r"('')(''')(?!')", bygroups(Generic.Emph, |
| Generic.EmphStrong), 'inline-italic-bold'), |
| (r"'''(?!')", Generic.Strong, 'inline-bold'), |
| (r"''(?!')", Generic.Emph, 'inline-italic'), |
| # Comments & parameters & templates |
| include('replaceable'), |
| # Media links |
| ( |
| r"""(?xi) |
| (\[\[) |
| (File|Image) (:) |
| ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*) |
| (?: (\#) ([%s]*?) )? |
| """ % (title_char, f'{title_char}#'), |
| bygroups(Punctuation, Name.Namespace, Punctuation, |
| using(this, state=['wikilink-name']), Punctuation, Name.Label), |
| 'medialink-inner' |
| ), |
| # Wikilinks |
| ( |
| r"""(?xi) |
| (\[\[)(?!%s) # Should not contain URLs |
| (?: ([%s]*) (:))? |
| ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*?) |
| (?: (\#) ([%s]*?) )? |
| (\]\]) |
| """ % ('|'.join(protocols), title_char.replace('/', ''), |
| title_char, f'{title_char}#'), |
| bygroups(Punctuation, Name.Namespace, Punctuation, |
| using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation) |
| ), |
| ( |
| r"""(?xi) |
| (\[\[)(?!%s) |
| (?: ([%s]*) (:))? |
| ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*?) |
| (?: (\#) ([%s]*?) )? |
| (\|) |
| """ % ('|'.join(protocols), title_char.replace('/', ''), |
| title_char, f'{title_char}#'), |
| bygroups(Punctuation, Name.Namespace, Punctuation, |
| using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation), |
| 'wikilink-inner' |
| ), |
| # External links |
| ( |
| r"""(?xi) |
| (\[) |
| ((?:{}) {} {}*) |
| (\s*) |
| """.format('|'.join(protocols), link_address, link_char_class), |
| bygroups(Punctuation, Name.Label, Whitespace), |
| 'extlink-inner' |
| ), |
| # Tables |
| (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword, |
| Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'), |
| # HTML tags |
| (r'(?i)(<)({})\b'.format('|'.join(html_tags)), |
| bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), |
| (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)), |
| bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), |
| # <nowiki> |
| (r'(?i)(<)(nowiki)\b', bygroups(Punctuation, |
| Name.Tag), ('tag-nowiki', 'tag-inner')), |
| # <pre> |
| (r'(?i)(<)(pre)\b', bygroups(Punctuation, |
| Name.Tag), ('tag-pre', 'tag-inner')), |
| # <categorytree> |
| (r'(?i)(<)(categorytree)\b', bygroups( |
| Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')), |
| # <hiero> |
| (r'(?i)(<)(hiero)\b', bygroups(Punctuation, |
| Name.Tag), ('tag-hiero', 'tag-inner')), |
| # <math> |
| (r'(?i)(<)(math)\b', bygroups(Punctuation, |
| Name.Tag), ('tag-math', 'tag-inner')), |
| # <chem> |
| (r'(?i)(<)(chem)\b', bygroups(Punctuation, |
| Name.Tag), ('tag-chem', 'tag-inner')), |
| # <ce> |
| (r'(?i)(<)(ce)\b', bygroups(Punctuation, |
| Name.Tag), ('tag-ce', 'tag-inner')), |
| # <charinsert> |
| (r'(?i)(<)(charinsert)\b', bygroups( |
| Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')), |
| # <templatedata> |
| (r'(?i)(<)(templatedata)\b', bygroups( |
| Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')), |
| # <gallery> |
| (r'(?i)(<)(gallery)\b', bygroups( |
| Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')), |
| # <graph> |
| (r'(?i)(<)(gallery)\b', bygroups( |
| Punctuation, Name.Tag), ('tag-graph', 'tag-inner')), |
| # <dynamicpagelist> |
| (r'(?i)(<)(dynamicpagelist)\b', bygroups( |
| Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')), |
| # <inputbox> |
| (r'(?i)(<)(inputbox)\b', bygroups( |
| Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')), |
| # <rss> |
| (r'(?i)(<)(rss)\b', bygroups( |
| Punctuation, Name.Tag), ('tag-rss', 'tag-inner')), |
| # <imagemap> |
| (r'(?i)(<)(imagemap)\b', bygroups( |
| Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')), |
| # <syntaxhighlight> |
| (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)', |
| bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), |
| (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', |
| bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), |
| # <syntaxhighlight>: Fallback case for self-closing tags |
| (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( |
| Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), |
| # <source> |
| (r'(?i)(</)(source)\b(\s*)(>)', |
| bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), |
| (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', |
| bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), |
| # <source>: Fallback case for self-closing tags |
| (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( |
| Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), |
| # <score> |
| (r'(?i)(</)(score)\b(\s*)(>)', |
| bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), |
| (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', |
| bygroups(Punctuation, Name.Tag, handle_score)), |
| # <score>: Fallback case for self-closing tags |
| (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( |
| Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), |
| # Other parser tags |
| (r'(?i)(<)({})\b'.format('|'.join(parser_tags)), |
| bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), |
| (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)), |
| bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), |
| # LanguageConverter markups |
| ( |
| r"""(?xi) |
| (-\{{) # Escape format() |
| (?: ([^|]) (\|))? |
| (?: (\s* (?:{variants}) \s*) (=>))? |
| (\s* (?:{variants}) \s*) (:) |
| """.format(variants='|'.join(variant_langs)), |
| bygroups(Punctuation, Keyword, Punctuation, |
| Name.Label, Operator, Name.Label, Punctuation), |
| 'lc-inner' |
| ), |
| (r'-\{(?!\{)', Punctuation, 'lc-raw'), |
| ], |
| 'wikilink-name': [ |
| include('replaceable'), |
| (r'[^{<]+', Name.Tag), |
| (r'(?s).', Name.Tag), |
| ], |
| 'wikilink-inner': [ |
| # Quit in case of another wikilink |
| (r'(?=\[\[)', Punctuation, '#pop'), |
| (r'\]\]', Punctuation, '#pop'), |
| include('inline'), |
| include('text'), |
| ], |
| 'medialink-inner': [ |
| (r'\]\]', Punctuation, '#pop'), |
| (r'(\|)([^\n=|]*)(=)', |
| bygroups(Punctuation, Name.Attribute, Operator)), |
| (r'\|', Punctuation), |
| include('inline'), |
| include('text'), |
| ], |
| 'quote-common': [ |
| # Quit in case of link/template endings |
| (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'), |
| (r'\n', Text, '#pop'), |
| ], |
| 'inline-italic': [ |
| include('quote-common'), |
| (r"('')(''')(?!')", bygroups(Generic.Emph, |
| Generic.Strong), ('#pop', 'inline-bold')), |
| (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')), |
| (r"''(?!')", Generic.Emph, '#pop'), |
| include('inline'), |
| include('text-italic'), |
| ], |
| 'inline-bold': [ |
| include('quote-common'), |
| (r"(''')('')(?!')", bygroups( |
| Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')), |
| (r"'''(?!')", Generic.Strong, '#pop'), |
| (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')), |
| include('inline'), |
| include('text-bold'), |
| ], |
| 'inline-bold-italic': [ |
| include('quote-common'), |
| (r"('')(''')(?!')", bygroups(Generic.EmphStrong, |
| Generic.Strong), '#pop'), |
| (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')), |
| (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')), |
| include('inline'), |
| include('text-bold-italic'), |
| ], |
| 'inline-italic-bold': [ |
| include('quote-common'), |
| (r"(''')('')(?!')", bygroups( |
| Generic.EmphStrong, Generic.Emph), '#pop'), |
| (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')), |
| (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')), |
| include('inline'), |
| include('text-bold-italic'), |
| ], |
| 'lc-inner': [ |
| ( |
| r"""(?xi) |
| (;) |
| (?: (\s* (?:{variants}) \s*) (=>))? |
| (\s* (?:{variants}) \s*) (:) |
| """.format(variants='|'.join(variant_langs)), |
| bygroups(Punctuation, Name.Label, |
| Operator, Name.Label, Punctuation) |
| ), |
| (r';?\s*?\}-', Punctuation, '#pop'), |
| include('inline'), |
| include('text'), |
| ], |
| 'lc-raw': [ |
| (r'\}-', Punctuation, '#pop'), |
| include('inline'), |
| include('text'), |
| ], |
| 'replaceable': [ |
| # Comments |
| (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), |
| # Parameters |
| ( |
| r"""(?x) |
| (\{{3}) |
| ([^|]*?) |
| (?=\}{3}|\|) |
| """, |
| bygroups(Punctuation, Name.Variable), |
| 'parameter-inner', |
| ), |
| # Magic variables |
| (r'(?i)(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars_i), |
| bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), |
| (r'(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars), |
| bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), |
| # Parser functions & templates |
| (r'\{\{', Punctuation, 'template-begin-space'), |
| # <tvar> legacy syntax |
| (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation, |
| Name.Tag, Punctuation, String, Punctuation)), |
| (r'</>', Punctuation, '#pop'), |
| # <tvar> |
| (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), |
| (r'(?i)(</)(tvar)\b(\s*)(>)', |
| bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), |
| ], |
| 'parameter-inner': [ |
| (r'\}{3}', Punctuation, '#pop'), |
| (r'\|', Punctuation), |
| include('inline'), |
| include('text'), |
| ], |
| 'template-begin-space': [ |
| # Templates allow line breaks at the beginning, and due to how MediaWiki handles |
| # comments, an extra state is required to handle things like {{\n<!---->\n name}} |
| (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), |
| (r'\s+', Whitespace), |
| # Parser functions |
| ( |
| r'(?i)(\#[%s]*?|%s)(:)' % (title_char, |
| '|'.join(parser_functions_i)), |
| bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') |
| ), |
| ( |
| r'(%s)(:)' % ('|'.join(parser_functions)), |
| bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') |
| ), |
| # Templates |
| ( |
| r'(?i)([%s]*?)(:)' % title_char, |
| bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name') |
| ), |
| default(('#pop', 'template-name'),), |
| ], |
| 'template-name': [ |
| (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')), |
| (r'\}\}', Punctuation, '#pop'), |
| (r'\n', Text, '#pop'), |
| include('replaceable'), |
| *text_rules(Name.Tag), |
| ], |
| 'template-inner': [ |
| (r'\}\}', Punctuation, '#pop'), |
| (r'\|', Punctuation), |
| ( |
| r"""(?x) |
| (?<=\|) |
| ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags |
| (=) |
| """, |
| bygroups(Name.Label, Operator) |
| ), |
| include('inline'), |
| include('text'), |
| ], |
| 'table': [ |
| # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior |
| # Endings |
| (r'^([ \t\n\r\0\x0B]*?)(\|\})', |
| bygroups(Whitespace, Punctuation), '#pop'), |
| # Table rows |
| (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation, |
| using(this, state=['root', 'attr']))), |
| # Captions |
| ( |
| r"""(?x) |
| ^([ \t\n\r\0\x0B]*?)(\|\+) |
| # Exclude links, template and tags |
| (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )? |
| (.*?)$ |
| """, |
| bygroups(Whitespace, Punctuation, using(this, state=[ |
| 'root', 'attr']), Punctuation, Generic.Heading), |
| ), |
| # Table data |
| ( |
| r"""(?x) |
| ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| ) |
| (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? |
| """, |
| bygroups(Punctuation, using(this, state=[ |
| 'root', 'attr']), Punctuation), |
| ), |
| # Table headers |
| ( |
| r"""(?x) |
| ( ^(?:[ \t\n\r\0\x0B]*?)! ) |
| (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? |
| """, |
| bygroups(Punctuation, using(this, state=[ |
| 'root', 'attr']), Punctuation), |
| 'table-header', |
| ), |
| include('list'), |
| include('inline'), |
| include('text'), |
| ], |
| 'table-header': [ |
| # Requires another state for || handling inside headers |
| (r'\n', Text, '#pop'), |
| ( |
| r"""(?x) |
| (!!|\|\|) |
| (?: |
| ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? ) |
| (\|)(?!\|) |
| )? |
| """, |
| bygroups(Punctuation, using(this, state=[ |
| 'root', 'attr']), Punctuation) |
| ), |
| *text_rules(Generic.Subheading), |
| ], |
| 'entity': [ |
| (r'&\S*?;', Name.Entity), |
| ], |
| 'dt': [ |
| (r'\n', Text, '#pop'), |
| include('inline'), |
| (r':', Keyword, '#pop'), |
| include('text'), |
| ], |
| 'extlink-inner': [ |
| (r'\]', Punctuation, '#pop'), |
| include('inline'), |
| include('text'), |
| ], |
| 'nowiki-ish': [ |
| include('entity'), |
| include('text'), |
| ], |
| 'attr': [ |
| include('replaceable'), |
| (r'\s+', Whitespace), |
| (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'), |
| (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'), |
| (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'), |
| (r'[\w:-]+', Name.Attribute), |
| |
| ], |
| 'attr-val-0': [ |
| (r'\s', Whitespace, '#pop'), |
| include('replaceable'), |
| *text_rules(String), |
| ], |
| 'attr-val-1': [ |
| (r"'", String.Single, '#pop'), |
| include('replaceable'), |
| *text_rules(String.Single), |
| ], |
| 'attr-val-2': [ |
| (r'"', String.Double, '#pop'), |
| include('replaceable'), |
| *text_rules(String.Double), |
| ], |
| 'tag-inner-ordinary': [ |
| (r'/?\s*>', Punctuation, '#pop'), |
| include('tag-attr'), |
| ], |
| 'tag-inner': [ |
| # Return to root state for self-closing tags |
| (r'/\s*>', Punctuation, '#pop:2'), |
| (r'\s*>', Punctuation, '#pop'), |
| include('tag-attr'), |
| ], |
| # There states below are just like their non-tag variants, the key difference is |
| # they forcibly quit when encountering tag closing markup |
| 'tag-attr': [ |
| include('replaceable'), |
| (r'\s+', Whitespace), |
| (r'(=)(\s*)(")', bygroups(Operator, |
| Whitespace, String.Double), 'tag-attr-val-2'), |
| (r"(=)(\s*)(')", bygroups(Operator, |
| Whitespace, String.Single), 'tag-attr-val-1'), |
| (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'), |
| (r'[\w:-]+', Name.Attribute), |
| |
| ], |
| 'tag-attr-val-0': [ |
| (r'\s', Whitespace, '#pop'), |
| (r'/?>', Punctuation, '#pop:2'), |
| include('replaceable'), |
| *text_rules(String), |
| ], |
| 'tag-attr-val-1': [ |
| (r"'", String.Single, '#pop'), |
| (r'/?>', Punctuation, '#pop:2'), |
| include('replaceable'), |
| *text_rules(String.Single), |
| ], |
| 'tag-attr-val-2': [ |
| (r'"', String.Double, '#pop'), |
| (r'/?>', Punctuation, '#pop:2'), |
| include('replaceable'), |
| *text_rules(String.Double), |
| ], |
| 'tag-nowiki': nowiki_tag_rules('nowiki'), |
| 'tag-pre': nowiki_tag_rules('pre'), |
| 'tag-categorytree': plaintext_tag_rules('categorytree'), |
| 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'), |
| 'tag-hiero': plaintext_tag_rules('hiero'), |
| 'tag-inputbox': plaintext_tag_rules('inputbox'), |
| 'tag-imagemap': plaintext_tag_rules('imagemap'), |
| 'tag-charinsert': plaintext_tag_rules('charinsert'), |
| 'tag-timeline': plaintext_tag_rules('timeline'), |
| 'tag-gallery': plaintext_tag_rules('gallery'), |
| 'tag-graph': plaintext_tag_rules('graph'), |
| 'tag-rss': plaintext_tag_rules('rss'), |
| 'tag-math': delegate_tag_rules('math', TexLexer), |
| 'tag-chem': delegate_tag_rules('chem', TexLexer), |
| 'tag-ce': delegate_tag_rules('ce', TexLexer), |
| 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer), |
| 'text-italic': text_rules(Generic.Emph), |
| 'text-bold': text_rules(Generic.Strong), |
| 'text-bold-italic': text_rules(Generic.EmphStrong), |
| 'text': text_rules(Text), |
| } |