| """ |
| pygments.lexers.julia |
| ~~~~~~~~~~~~~~~~~~~~~ |
| |
| Lexers for the Julia language. |
| |
| :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. |
| :license: BSD, see LICENSE for details. |
| """ |
| |
| from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \ |
| words, include |
| from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
| Number, Punctuation, Generic, Whitespace |
| from pygments.util import shebang_matches |
| from pygments.lexers._julia_builtins import OPERATORS_LIST, DOTTED_OPERATORS_LIST, \ |
| KEYWORD_LIST, BUILTIN_LIST, LITERAL_LIST |
| |
| __all__ = ['JuliaLexer', 'JuliaConsoleLexer'] |
| |
| # see https://docs.julialang.org/en/v1/manual/variables/#Allowed-Variable-Names |
| allowed_variable = \ |
| '(?:[a-zA-Z_\u00A1-\U0010ffff][a-zA-Z_0-9!\u00A1-\U0010ffff]*)' |
| # see https://github.com/JuliaLang/julia/blob/master/src/flisp/julia_opsuffs.h |
| operator_suffixes = r'[²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ′″‴‵‶‷⁗⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ]*' |
| |
| class JuliaLexer(RegexLexer): |
| """ |
| For Julia source code. |
| |
| .. versionadded:: 1.6 |
| """ |
| |
| name = 'Julia' |
| url = 'https://julialang.org/' |
| aliases = ['julia', 'jl'] |
| filenames = ['*.jl'] |
| mimetypes = ['text/x-julia', 'application/x-julia'] |
| |
| tokens = { |
| 'root': [ |
| (r'\n', Whitespace), |
| (r'[^\S\n]+', Whitespace), |
| (r'#=', Comment.Multiline, "blockcomment"), |
| (r'#.*$', Comment), |
| (r'[\[\](),;]', Punctuation), |
| |
| # symbols |
| # intercept range expressions first |
| (r'(' + allowed_variable + r')(\s*)(:)(' + allowed_variable + ')', |
| bygroups(Name, Whitespace, Operator, Name)), |
| # then match :name which does not follow closing brackets, digits, or the |
| # ::, <:, and :> operators |
| (r'(?<![\]):<>\d.])(:' + allowed_variable + ')', String.Symbol), |
| |
| # type assertions - excludes expressions like ::typeof(sin) and ::avec[1] |
| (r'(?<=::)(\s*)(' + allowed_variable + r')\b(?![(\[])', |
| bygroups(Whitespace, Keyword.Type)), |
| # type comparisons |
| # - MyType <: A or MyType >: A |
| ('(' + allowed_variable + r')(\s*)([<>]:)(\s*)(' + allowed_variable + r')\b(?![(\[])', |
| bygroups(Keyword.Type, Whitespace, Operator, Whitespace, Keyword.Type)), |
| # - <: B or >: B |
| (r'([<>]:)(\s*)(' + allowed_variable + r')\b(?![(\[])', |
| bygroups(Operator, Whitespace, Keyword.Type)), |
| # - A <: or A >: |
| (r'\b(' + allowed_variable + r')(\s*)([<>]:)', |
| bygroups(Keyword.Type, Whitespace, Operator)), |
| |
| # operators |
| # Suffixes aren't actually allowed on all operators, but we'll ignore that |
| # since those cases are invalid Julia code. |
| (words([*OPERATORS_LIST, *DOTTED_OPERATORS_LIST], |
| suffix=operator_suffixes), Operator), |
| (words(['.' + o for o in DOTTED_OPERATORS_LIST], |
| suffix=operator_suffixes), Operator), |
| (words(['...', '..']), Operator), |
| |
| # NOTE |
| # Patterns below work only for definition sites and thus hardly reliable. |
| # |
| # functions |
| # (r'(function)(\s+)(' + allowed_variable + ')', |
| # bygroups(Keyword, Text, Name.Function)), |
| |
| # chars |
| (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|" |
| r"\\U[a-fA-F0-9]{1,6}|[^\\\'\n])'", String.Char), |
| |
| # try to match trailing transpose |
| (r'(?<=[.\w)\]])(\'' + operator_suffixes + ')+', Operator), |
| |
| # raw strings |
| (r'(raw)(""")', bygroups(String.Affix, String), 'tqrawstring'), |
| (r'(raw)(")', bygroups(String.Affix, String), 'rawstring'), |
| # regular expressions |
| (r'(r)(""")', bygroups(String.Affix, String.Regex), 'tqregex'), |
| (r'(r)(")', bygroups(String.Affix, String.Regex), 'regex'), |
| # other strings |
| (r'(' + allowed_variable + ')?(""")', |
| bygroups(String.Affix, String), 'tqstring'), |
| (r'(' + allowed_variable + ')?(")', |
| bygroups(String.Affix, String), 'string'), |
| |
| # backticks |
| (r'(' + allowed_variable + ')?(```)', |
| bygroups(String.Affix, String.Backtick), 'tqcommand'), |
| (r'(' + allowed_variable + ')?(`)', |
| bygroups(String.Affix, String.Backtick), 'command'), |
| |
| # type names |
| # - names that begin a curly expression |
| ('(' + allowed_variable + r')(\{)', |
| bygroups(Keyword.Type, Punctuation), 'curly'), |
| # - names as part of bare 'where' |
| (r'(where)(\s+)(' + allowed_variable + ')', |
| bygroups(Keyword, Whitespace, Keyword.Type)), |
| # - curly expressions in general |
| (r'(\{)', Punctuation, 'curly'), |
| # - names as part of type declaration |
| (r'(abstract|primitive)([ \t]+)(type\b)([\s()]+)(' + |
| allowed_variable + r')', |
| bygroups(Keyword, Whitespace, Keyword, Text, Keyword.Type)), |
| (r'(mutable(?=[ \t]))?([ \t]+)?(struct\b)([\s()]+)(' + |
| allowed_variable + r')', |
| bygroups(Keyword, Whitespace, Keyword, Text, Keyword.Type)), |
| |
| # macros |
| (r'@' + allowed_variable, Name.Decorator), |
| (words([*OPERATORS_LIST, '..', '.', *DOTTED_OPERATORS_LIST], |
| prefix='@', suffix=operator_suffixes), Name.Decorator), |
| |
| # keywords |
| (words(KEYWORD_LIST, suffix=r'\b'), Keyword), |
| # builtin types |
| (words(BUILTIN_LIST, suffix=r'\b'), Keyword.Type), |
| # builtin literals |
| (words(LITERAL_LIST, suffix=r'\b'), Name.Builtin), |
| |
| # names |
| (allowed_variable, Name), |
| |
| # numbers |
| (r'(\d+((_\d+)+)?\.(?!\.)(\d+((_\d+)+)?)?|\.\d+((_\d+)+)?)([eEf][+-]?[0-9]+)?', Number.Float), |
| (r'\d+((_\d+)+)?[eEf][+-]?[0-9]+', Number.Float), |
| (r'0x[a-fA-F0-9]+((_[a-fA-F0-9]+)+)?(\.([a-fA-F0-9]+((_[a-fA-F0-9]+)+)?)?)?p[+-]?\d+', Number.Float), |
| (r'0b[01]+((_[01]+)+)?', Number.Bin), |
| (r'0o[0-7]+((_[0-7]+)+)?', Number.Oct), |
| (r'0x[a-fA-F0-9]+((_[a-fA-F0-9]+)+)?', Number.Hex), |
| (r'\d+((_\d+)+)?', Number.Integer), |
| |
| # single dot operator matched last to permit e.g. ".1" as a float |
| (words(['.']), Operator), |
| ], |
| |
| "blockcomment": [ |
| (r'[^=#]', Comment.Multiline), |
| (r'#=', Comment.Multiline, '#push'), |
| (r'=#', Comment.Multiline, '#pop'), |
| (r'[=#]', Comment.Multiline), |
| ], |
| |
| 'curly': [ |
| (r'\{', Punctuation, '#push'), |
| (r'\}', Punctuation, '#pop'), |
| (allowed_variable, Keyword.Type), |
| include('root'), |
| ], |
| |
| 'tqrawstring': [ |
| (r'"""', String, '#pop'), |
| (r'([^"]|"[^"][^"])+', String), |
| ], |
| 'rawstring': [ |
| (r'"', String, '#pop'), |
| (r'\\"', String.Escape), |
| (r'([^"\\]|\\[^"])+', String), |
| ], |
| |
| # Interpolation is defined as "$" followed by the shortest full |
| # expression, which is something we can't parse. Include the most |
| # common cases here: $word, and $(paren'd expr). |
| 'interp': [ |
| (r'\$' + allowed_variable, String.Interpol), |
| (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'), |
| ], |
| 'in-intp': [ |
| (r'\(', Punctuation, '#push'), |
| (r'\)', Punctuation, '#pop'), |
| include('root'), |
| ], |
| |
| 'string': [ |
| (r'(")(' + allowed_variable + r'|\d+)?', |
| bygroups(String, String.Affix), '#pop'), |
| # FIXME: This escape pattern is not perfect. |
| (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape), |
| include('interp'), |
| # @printf and @sprintf formats |
| (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]', |
| String.Interpol), |
| (r'[^"$%\\]+', String), |
| (r'.', String), |
| ], |
| 'tqstring': [ |
| (r'(""")(' + allowed_variable + r'|\d+)?', |
| bygroups(String, String.Affix), '#pop'), |
| (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape), |
| include('interp'), |
| (r'[^"$%\\]+', String), |
| (r'.', String), |
| ], |
| |
| 'regex': [ |
| (r'(")([imsxa]*)?', bygroups(String.Regex, String.Affix), '#pop'), |
| (r'\\"', String.Regex), |
| (r'[^\\"]+', String.Regex), |
| ], |
| |
| 'tqregex': [ |
| (r'(""")([imsxa]*)?', bygroups(String.Regex, String.Affix), '#pop'), |
| (r'[^"]+', String.Regex), |
| ], |
| |
| 'command': [ |
| (r'(`)(' + allowed_variable + r'|\d+)?', |
| bygroups(String.Backtick, String.Affix), '#pop'), |
| (r'\\[`$]', String.Escape), |
| include('interp'), |
| (r'[^\\`$]+', String.Backtick), |
| (r'.', String.Backtick), |
| ], |
| 'tqcommand': [ |
| (r'(```)(' + allowed_variable + r'|\d+)?', |
| bygroups(String.Backtick, String.Affix), '#pop'), |
| (r'\\\$', String.Escape), |
| include('interp'), |
| (r'[^\\`$]+', String.Backtick), |
| (r'.', String.Backtick), |
| ], |
| } |
| |
| def analyse_text(text): |
| return shebang_matches(text, r'julia') |
| |
| |
| class JuliaConsoleLexer(Lexer): |
| """ |
| For Julia console sessions. Modeled after MatlabSessionLexer. |
| |
| .. versionadded:: 1.6 |
| """ |
| name = 'Julia console' |
| aliases = ['jlcon', 'julia-repl'] |
| |
| def get_tokens_unprocessed(self, text): |
| jllexer = JuliaLexer(**self.options) |
| start = 0 |
| curcode = '' |
| insertions = [] |
| output = False |
| error = False |
| |
| for line in text.splitlines(keepends=True): |
| if line.startswith('julia>'): |
| insertions.append((len(curcode), [(0, Generic.Prompt, line[:6])])) |
| curcode += line[6:] |
| output = False |
| error = False |
| elif line.startswith('help?>') or line.startswith('shell>'): |
| yield start, Generic.Prompt, line[:6] |
| yield start + 6, Text, line[6:] |
| output = False |
| error = False |
| elif line.startswith(' ') and not output: |
| insertions.append((len(curcode), [(0, Whitespace, line[:6])])) |
| curcode += line[6:] |
| else: |
| if curcode: |
| yield from do_insertions( |
| insertions, jllexer.get_tokens_unprocessed(curcode)) |
| curcode = '' |
| insertions = [] |
| if line.startswith('ERROR: ') or error: |
| yield start, Generic.Error, line |
| error = True |
| else: |
| yield start, Generic.Output, line |
| output = True |
| start += len(line) |
| |
| if curcode: |
| yield from do_insertions( |
| insertions, jllexer.get_tokens_unprocessed(curcode)) |