| """ |
| pygments.lexers.ml |
| ~~~~~~~~~~~~~~~~~~ |
| |
| Lexers for ML family languages. |
| |
| :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. |
| :license: BSD, see LICENSE for details. |
| """ |
| |
| import re |
| |
| from pygments.lexer import RegexLexer, include, bygroups, default, words |
| from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
| Number, Punctuation, Error |
| |
| __all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer'] |
| |
| |
| class SMLLexer(RegexLexer): |
| """ |
| For the Standard ML language. |
| |
| .. versionadded:: 1.5 |
| """ |
| |
| name = 'Standard ML' |
| aliases = ['sml'] |
| filenames = ['*.sml', '*.sig', '*.fun'] |
| mimetypes = ['text/x-standardml', 'application/x-standardml'] |
| |
| alphanumid_reserved = { |
| # Core |
| 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else', |
| 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', |
| 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse', |
| 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', |
| # Modules |
| 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', |
| 'struct', 'structure', 'where', |
| } |
| |
| symbolicid_reserved = { |
| # Core |
| ':', r'\|', '=', '=>', '->', '#', |
| # Modules |
| ':>', |
| } |
| |
| nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'} |
| |
| alphanumid_re = r"[a-zA-Z][\w']*" |
| symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+" |
| |
| # A character constant is a sequence of the form #s, where s is a string |
| # constant denoting a string of size one character. This setup just parses |
| # the entire string as either a String.Double or a String.Char (depending |
| # on the argument), even if the String.Char is an erroneous |
| # multiple-character string. |
| def stringy(whatkind): |
| return [ |
| (r'[^"\\]', whatkind), |
| (r'\\[\\"abtnvfr]', String.Escape), |
| # Control-character notation is used for codes < 32, |
| # where \^@ == \000 |
| (r'\\\^[\x40-\x5e]', String.Escape), |
| # Docs say 'decimal digits' |
| (r'\\[0-9]{3}', String.Escape), |
| (r'\\u[0-9a-fA-F]{4}', String.Escape), |
| (r'\\\s+\\', String.Interpol), |
| (r'"', whatkind, '#pop'), |
| ] |
| |
| # Callbacks for distinguishing tokens and reserved words |
| def long_id_callback(self, match): |
| if match.group(1) in self.alphanumid_reserved: |
| token = Error |
| else: |
| token = Name.Namespace |
| yield match.start(1), token, match.group(1) |
| yield match.start(2), Punctuation, match.group(2) |
| |
| def end_id_callback(self, match): |
| if match.group(1) in self.alphanumid_reserved: |
| token = Error |
| elif match.group(1) in self.symbolicid_reserved: |
| token = Error |
| else: |
| token = Name |
| yield match.start(1), token, match.group(1) |
| |
| def id_callback(self, match): |
| str = match.group(1) |
| if str in self.alphanumid_reserved: |
| token = Keyword.Reserved |
| elif str in self.symbolicid_reserved: |
| token = Punctuation |
| else: |
| token = Name |
| yield match.start(1), token, str |
| |
| tokens = { |
| # Whitespace and comments are (almost) everywhere |
| 'whitespace': [ |
| (r'\s+', Text), |
| (r'\(\*', Comment.Multiline, 'comment'), |
| ], |
| |
| 'delimiters': [ |
| # This lexer treats these delimiters specially: |
| # Delimiters define scopes, and the scope is how the meaning of |
| # the `|' is resolved - is it a case/handle expression, or function |
| # definition by cases? (This is not how the Definition works, but |
| # it's how MLton behaves, see http://mlton.org/SMLNJDeviations) |
| (r'\(|\[|\{', Punctuation, 'main'), |
| (r'\)|\]|\}', Punctuation, '#pop'), |
| (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')), |
| (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'), |
| (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'), |
| ], |
| |
| 'core': [ |
| # Punctuation that doesn't overlap symbolic identifiers |
| (r'(%s)' % '|'.join(re.escape(z) for z in nonid_reserved), |
| Punctuation), |
| |
| # Special constants: strings, floats, numbers in decimal and hex |
| (r'#"', String.Char, 'char'), |
| (r'"', String.Double, 'string'), |
| (r'~?0x[0-9a-fA-F]+', Number.Hex), |
| (r'0wx[0-9a-fA-F]+', Number.Hex), |
| (r'0w\d+', Number.Integer), |
| (r'~?\d+\.\d+[eE]~?\d+', Number.Float), |
| (r'~?\d+\.\d+', Number.Float), |
| (r'~?\d+[eE]~?\d+', Number.Float), |
| (r'~?\d+', Number.Integer), |
| |
| # Labels |
| (r'#\s*[1-9][0-9]*', Name.Label), |
| (r'#\s*(%s)' % alphanumid_re, Name.Label), |
| (r'#\s+(%s)' % symbolicid_re, Name.Label), |
| # Some reserved words trigger a special, local lexer state change |
| (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'), |
| (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'), |
| (r'\b(functor|include|open|signature|structure)\b(?!\')', |
| Keyword.Reserved, 'sname'), |
| (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'), |
| |
| # Regular identifiers, long and otherwise |
| (r'\'[\w\']*', Name.Decorator), |
| (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"), |
| (r'(%s)' % alphanumid_re, id_callback), |
| (r'(%s)' % symbolicid_re, id_callback), |
| ], |
| 'dotted': [ |
| (r'(%s)(\.)' % alphanumid_re, long_id_callback), |
| (r'(%s)' % alphanumid_re, end_id_callback, "#pop"), |
| (r'(%s)' % symbolicid_re, end_id_callback, "#pop"), |
| (r'\s+', Error), |
| (r'\S+', Error), |
| ], |
| |
| |
| # Main parser (prevents errors in files that have scoping errors) |
| 'root': [ |
| default('main') |
| ], |
| |
| # In this scope, I expect '|' to not be followed by a function name, |
| # and I expect 'and' to be followed by a binding site |
| 'main': [ |
| include('whitespace'), |
| |
| # Special behavior of val/and/fun |
| (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'), |
| (r'\b(fun)\b(?!\')', Keyword.Reserved, |
| ('#pop', 'main-fun', 'fname')), |
| |
| include('delimiters'), |
| include('core'), |
| (r'\S+', Error), |
| ], |
| |
| # In this scope, I expect '|' and 'and' to be followed by a function |
| 'main-fun': [ |
| include('whitespace'), |
| |
| (r'\s', Text), |
| (r'\(\*', Comment.Multiline, 'comment'), |
| |
| # Special behavior of val/and/fun |
| (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'), |
| (r'\b(val)\b(?!\')', Keyword.Reserved, |
| ('#pop', 'main', 'vname')), |
| |
| # Special behavior of '|' and '|'-manipulating keywords |
| (r'\|', Punctuation, 'fname'), |
| (r'\b(case|handle)\b(?!\')', Keyword.Reserved, |
| ('#pop', 'main')), |
| |
| include('delimiters'), |
| include('core'), |
| (r'\S+', Error), |
| ], |
| |
| # Character and string parsers |
| 'char': stringy(String.Char), |
| 'string': stringy(String.Double), |
| |
| 'breakout': [ |
| (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'), |
| ], |
| |
| # Dealing with what comes after module system keywords |
| 'sname': [ |
| include('whitespace'), |
| include('breakout'), |
| |
| (r'(%s)' % alphanumid_re, Name.Namespace), |
| default('#pop'), |
| ], |
| |
| # Dealing with what comes after the 'fun' (or 'and' or '|') keyword |
| 'fname': [ |
| include('whitespace'), |
| (r'\'[\w\']*', Name.Decorator), |
| (r'\(', Punctuation, 'tyvarseq'), |
| |
| (r'(%s)' % alphanumid_re, Name.Function, '#pop'), |
| (r'(%s)' % symbolicid_re, Name.Function, '#pop'), |
| |
| # Ignore interesting function declarations like "fun (x + y) = ..." |
| default('#pop'), |
| ], |
| |
| # Dealing with what comes after the 'val' (or 'and') keyword |
| 'vname': [ |
| include('whitespace'), |
| (r'\'[\w\']*', Name.Decorator), |
| (r'\(', Punctuation, 'tyvarseq'), |
| |
| (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re), |
| bygroups(Name.Variable, Text, Punctuation), '#pop'), |
| (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re), |
| bygroups(Name.Variable, Text, Punctuation), '#pop'), |
| (r'(%s)' % alphanumid_re, Name.Variable, '#pop'), |
| (r'(%s)' % symbolicid_re, Name.Variable, '#pop'), |
| |
| # Ignore interesting patterns like 'val (x, y)' |
| default('#pop'), |
| ], |
| |
| # Dealing with what comes after the 'type' (or 'and') keyword |
| 'tname': [ |
| include('whitespace'), |
| include('breakout'), |
| |
| (r'\'[\w\']*', Name.Decorator), |
| (r'\(', Punctuation, 'tyvarseq'), |
| (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')), |
| |
| (r'(%s)' % alphanumid_re, Keyword.Type), |
| (r'(%s)' % symbolicid_re, Keyword.Type), |
| (r'\S+', Error, '#pop'), |
| ], |
| |
| # A type binding includes most identifiers |
| 'typbind': [ |
| include('whitespace'), |
| |
| (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), |
| |
| include('breakout'), |
| include('core'), |
| (r'\S+', Error, '#pop'), |
| ], |
| |
| # Dealing with what comes after the 'datatype' (or 'and') keyword |
| 'dname': [ |
| include('whitespace'), |
| include('breakout'), |
| |
| (r'\'[\w\']*', Name.Decorator), |
| (r'\(', Punctuation, 'tyvarseq'), |
| (r'(=)(\s*)(datatype)', |
| bygroups(Punctuation, Text, Keyword.Reserved), '#pop'), |
| (r'=(?!%s)' % symbolicid_re, Punctuation, |
| ('#pop', 'datbind', 'datcon')), |
| |
| (r'(%s)' % alphanumid_re, Keyword.Type), |
| (r'(%s)' % symbolicid_re, Keyword.Type), |
| (r'\S+', Error, '#pop'), |
| ], |
| |
| # common case - A | B | C of int |
| 'datbind': [ |
| include('whitespace'), |
| |
| (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')), |
| (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), |
| (r'\b(of)\b(?!\')', Keyword.Reserved), |
| |
| (r'(\|)(\s*)(%s)' % alphanumid_re, |
| bygroups(Punctuation, Text, Name.Class)), |
| (r'(\|)(\s+)(%s)' % symbolicid_re, |
| bygroups(Punctuation, Text, Name.Class)), |
| |
| include('breakout'), |
| include('core'), |
| (r'\S+', Error), |
| ], |
| |
| # Dealing with what comes after an exception |
| 'ename': [ |
| include('whitespace'), |
| |
| (r'(and\b)(\s+)(%s)' % alphanumid_re, |
| bygroups(Keyword.Reserved, Text, Name.Class)), |
| (r'(and\b)(\s*)(%s)' % symbolicid_re, |
| bygroups(Keyword.Reserved, Text, Name.Class)), |
| (r'\b(of)\b(?!\')', Keyword.Reserved), |
| (r'(%s)|(%s)' % (alphanumid_re, symbolicid_re), Name.Class), |
| |
| default('#pop'), |
| ], |
| |
| 'datcon': [ |
| include('whitespace'), |
| (r'(%s)' % alphanumid_re, Name.Class, '#pop'), |
| (r'(%s)' % symbolicid_re, Name.Class, '#pop'), |
| (r'\S+', Error, '#pop'), |
| ], |
| |
| # Series of type variables |
| 'tyvarseq': [ |
| (r'\s', Text), |
| (r'\(\*', Comment.Multiline, 'comment'), |
| |
| (r'\'[\w\']*', Name.Decorator), |
| (alphanumid_re, Name), |
| (r',', Punctuation), |
| (r'\)', Punctuation, '#pop'), |
| (symbolicid_re, Name), |
| ], |
| |
| 'comment': [ |
| (r'[^(*)]', Comment.Multiline), |
| (r'\(\*', Comment.Multiline, '#push'), |
| (r'\*\)', Comment.Multiline, '#pop'), |
| (r'[(*)]', Comment.Multiline), |
| ], |
| } |
| |
| |
| class OcamlLexer(RegexLexer): |
| """ |
| For the OCaml language. |
| |
| .. versionadded:: 0.7 |
| """ |
| |
| name = 'OCaml' |
| url = 'https://ocaml.org/' |
| aliases = ['ocaml'] |
| filenames = ['*.ml', '*.mli', '*.mll', '*.mly'] |
| mimetypes = ['text/x-ocaml'] |
| |
| keywords = ( |
| 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', |
| 'downto', 'else', 'end', 'exception', 'external', 'false', |
| 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', |
| 'inherit', 'initializer', 'lazy', 'let', 'match', 'method', |
| 'module', 'mutable', 'new', 'object', 'of', 'open', 'private', |
| 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', |
| 'type', 'value', 'val', 'virtual', 'when', 'while', 'with', |
| ) |
| keyopts = ( |
| '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-', |
| r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<', |
| '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>', |
| r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~' |
| ) |
| |
| operators = r'[!$%&*+\./:<=>?@^|~-]' |
| word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or') |
| prefix_syms = r'[!?~]' |
| infix_syms = r'[=<>@^|&+\*/$%-]' |
| primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') |
| |
| tokens = { |
| 'escape-sequence': [ |
| (r'\\[\\"\'ntbr]', String.Escape), |
| (r'\\[0-9]{3}', String.Escape), |
| (r'\\x[0-9a-fA-F]{2}', String.Escape), |
| ], |
| 'root': [ |
| (r'\s+', Text), |
| (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), |
| (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), |
| (r'\b([A-Z][\w\']*)', Name.Class), |
| (r'\(\*(?![)])', Comment, 'comment'), |
| (r'\b(%s)\b' % '|'.join(keywords), Keyword), |
| (r'(%s)' % '|'.join(keyopts[::-1]), Operator), |
| (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), |
| (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), |
| (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), |
| |
| (r"[^\W\d][\w']*", Name), |
| |
| (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), |
| (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), |
| (r'0[oO][0-7][0-7_]*', Number.Oct), |
| (r'0[bB][01][01_]*', Number.Bin), |
| (r'\d[\d_]*', Number.Integer), |
| |
| (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", |
| String.Char), |
| (r"'.'", String.Char), |
| (r"'", Keyword), # a stray quote is another syntax element |
| |
| (r'"', String.Double, 'string'), |
| |
| (r'[~?][a-z][\w\']*:', Name.Variable), |
| ], |
| 'comment': [ |
| (r'[^(*)]+', Comment), |
| (r'\(\*', Comment, '#push'), |
| (r'\*\)', Comment, '#pop'), |
| (r'[(*)]', Comment), |
| ], |
| 'string': [ |
| (r'[^\\"]+', String.Double), |
| include('escape-sequence'), |
| (r'\\\n', String.Double), |
| (r'"', String.Double, '#pop'), |
| ], |
| 'dotted': [ |
| (r'\s+', Text), |
| (r'\.', Punctuation), |
| (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), |
| (r'[A-Z][\w\']*', Name.Class, '#pop'), |
| (r'[a-z_][\w\']*', Name, '#pop'), |
| default('#pop'), |
| ], |
| } |
| |
| |
| class OpaLexer(RegexLexer): |
| """ |
| Lexer for the Opa language. |
| |
| .. versionadded:: 1.5 |
| """ |
| |
| name = 'Opa' |
| aliases = ['opa'] |
| filenames = ['*.opa'] |
| mimetypes = ['text/x-opa'] |
| |
| # most of these aren't strictly keywords |
| # but if you color only real keywords, you might just |
| # as well not color anything |
| keywords = ( |
| 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do', |
| 'else', 'end', 'external', 'forall', 'function', 'if', 'import', |
| 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then', |
| 'type', 'val', 'with', 'xml_parser', |
| ) |
| |
| # matches both stuff and `stuff` |
| ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))' |
| |
| op_re = r'[.=\-<>,@~%/+?*&^!]' |
| punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere |
| # because they are also used for inserts |
| |
| tokens = { |
| # copied from the caml lexer, should be adapted |
| 'escape-sequence': [ |
| (r'\\[\\"\'ntr}]', String.Escape), |
| (r'\\[0-9]{3}', String.Escape), |
| (r'\\x[0-9a-fA-F]{2}', String.Escape), |
| ], |
| |
| # factorizing these rules, because they are inserted many times |
| 'comments': [ |
| (r'/\*', Comment, 'nested-comment'), |
| (r'//.*?$', Comment), |
| ], |
| 'comments-and-spaces': [ |
| include('comments'), |
| (r'\s+', Text), |
| ], |
| |
| 'root': [ |
| include('comments-and-spaces'), |
| # keywords |
| (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword), |
| # directives |
| # we could parse the actual set of directives instead of anything |
| # starting with @, but this is troublesome |
| # because it needs to be adjusted all the time |
| # and assuming we parse only sources that compile, it is useless |
| (r'@' + ident_re + r'\b', Name.Builtin.Pseudo), |
| |
| # number literals |
| (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float), |
| (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float), |
| (r'-?\d+[eE][+\-]?\d+', Number.Float), |
| (r'0[xX][\da-fA-F]+', Number.Hex), |
| (r'0[oO][0-7]+', Number.Oct), |
| (r'0[bB][01]+', Number.Bin), |
| (r'\d+', Number.Integer), |
| # color literals |
| (r'#[\da-fA-F]{3,6}', Number.Integer), |
| |
| # string literals |
| (r'"', String.Double, 'string'), |
| # char literal, should be checked because this is the regexp from |
| # the caml lexer |
| (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'", |
| String.Char), |
| |
| # this is meant to deal with embedded exprs in strings |
| # every time we find a '}' we pop a state so that if we were |
| # inside a string, we are back in the string state |
| # as a consequence, we must also push a state every time we find a |
| # '{' or else we will have errors when parsing {} for instance |
| (r'\{', Operator, '#push'), |
| (r'\}', Operator, '#pop'), |
| |
| # html literals |
| # this is a much more strict that the actual parser, |
| # since a<b would not be parsed as html |
| # but then again, the parser is way too lax, and we can't hope |
| # to have something as tolerant |
| (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'), |
| |
| # db path |
| # matching the '[_]' in '/a[_]' because it is a part |
| # of the syntax of the db path definition |
| # unfortunately, i don't know how to match the ']' in |
| # /a[1], so this is somewhat inconsistent |
| (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable), |
| # putting the same color on <- as on db path, since |
| # it can be used only to mean Db.write |
| (r'<-(?!'+op_re+r')', Name.Variable), |
| |
| # 'modules' |
| # although modules are not distinguished by their names as in caml |
| # the standard library seems to follow the convention that modules |
| # only area capitalized |
| (r'\b([A-Z]\w*)(?=\.)', Name.Namespace), |
| |
| # operators |
| # = has a special role because this is the only |
| # way to syntactic distinguish binding constructions |
| # unfortunately, this colors the equal in {x=2} too |
| (r'=(?!'+op_re+r')', Keyword), |
| (r'(%s)+' % op_re, Operator), |
| (r'(%s)+' % punc_re, Operator), |
| |
| # coercions |
| (r':', Operator, 'type'), |
| # type variables |
| # we need this rule because we don't parse specially type |
| # definitions so in "type t('a) = ...", "'a" is parsed by 'root' |
| ("'"+ident_re, Keyword.Type), |
| |
| # id literal, #something, or #{expr} |
| (r'#'+ident_re, String.Single), |
| (r'#(?=\{)', String.Single), |
| |
| # identifiers |
| # this avoids to color '2' in 'a2' as an integer |
| (ident_re, Text), |
| |
| # default, not sure if that is needed or not |
| # (r'.', Text), |
| ], |
| |
| # it is quite painful to have to parse types to know where they end |
| # this is the general rule for a type |
| # a type is either: |
| # * -> ty |
| # * type-with-slash |
| # * type-with-slash -> ty |
| # * type-with-slash (, type-with-slash)+ -> ty |
| # |
| # the code is pretty funky in here, but this code would roughly |
| # translate in caml to: |
| # let rec type stream = |
| # match stream with |
| # | [< "->"; stream >] -> type stream |
| # | [< ""; stream >] -> |
| # type_with_slash stream |
| # type_lhs_1 stream; |
| # and type_1 stream = ... |
| 'type': [ |
| include('comments-and-spaces'), |
| (r'->', Keyword.Type), |
| default(('#pop', 'type-lhs-1', 'type-with-slash')), |
| ], |
| |
| # parses all the atomic or closed constructions in the syntax of type |
| # expressions: record types, tuple types, type constructors, basic type |
| # and type variables |
| 'type-1': [ |
| include('comments-and-spaces'), |
| (r'\(', Keyword.Type, ('#pop', 'type-tuple')), |
| (r'~?\{', Keyword.Type, ('#pop', 'type-record')), |
| (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')), |
| (ident_re, Keyword.Type, '#pop'), |
| ("'"+ident_re, Keyword.Type), |
| # this case is not in the syntax but sometimes |
| # we think we are parsing types when in fact we are parsing |
| # some css, so we just pop the states until we get back into |
| # the root state |
| default('#pop'), |
| ], |
| |
| # type-with-slash is either: |
| # * type-1 |
| # * type-1 (/ type-1)+ |
| 'type-with-slash': [ |
| include('comments-and-spaces'), |
| default(('#pop', 'slash-type-1', 'type-1')), |
| ], |
| 'slash-type-1': [ |
| include('comments-and-spaces'), |
| ('/', Keyword.Type, ('#pop', 'type-1')), |
| # same remark as above |
| default('#pop'), |
| ], |
| |
| # we go in this state after having parsed a type-with-slash |
| # while trying to parse a type |
| # and at this point we must determine if we are parsing an arrow |
| # type (in which case we must continue parsing) or not (in which |
| # case we stop) |
| 'type-lhs-1': [ |
| include('comments-and-spaces'), |
| (r'->', Keyword.Type, ('#pop', 'type')), |
| (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')), |
| default('#pop'), |
| ], |
| 'type-arrow': [ |
| include('comments-and-spaces'), |
| # the look ahead here allows to parse f(x : int, y : float -> truc) |
| # correctly |
| (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'), |
| (r'->', Keyword.Type, ('#pop', 'type')), |
| # same remark as above |
| default('#pop'), |
| ], |
| |
| # no need to do precise parsing for tuples and records |
| # because they are closed constructions, so we can simply |
| # find the closing delimiter |
| # note that this function would be not work if the source |
| # contained identifiers like `{)` (although it could be patched |
| # to support it) |
| 'type-tuple': [ |
| include('comments-and-spaces'), |
| (r'[^()/*]+', Keyword.Type), |
| (r'[/*]', Keyword.Type), |
| (r'\(', Keyword.Type, '#push'), |
| (r'\)', Keyword.Type, '#pop'), |
| ], |
| 'type-record': [ |
| include('comments-and-spaces'), |
| (r'[^{}/*]+', Keyword.Type), |
| (r'[/*]', Keyword.Type), |
| (r'\{', Keyword.Type, '#push'), |
| (r'\}', Keyword.Type, '#pop'), |
| ], |
| |
| # 'type-tuple': [ |
| # include('comments-and-spaces'), |
| # (r'\)', Keyword.Type, '#pop'), |
| # default(('#pop', 'type-tuple-1', 'type-1')), |
| # ], |
| # 'type-tuple-1': [ |
| # include('comments-and-spaces'), |
| # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,) |
| # (r',', Keyword.Type, 'type-1'), |
| # ], |
| # 'type-record':[ |
| # include('comments-and-spaces'), |
| # (r'\}', Keyword.Type, '#pop'), |
| # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'), |
| # ], |
| # 'type-record-field-expr': [ |
| # |
| # ], |
| |
| 'nested-comment': [ |
| (r'[^/*]+', Comment), |
| (r'/\*', Comment, '#push'), |
| (r'\*/', Comment, '#pop'), |
| (r'[/*]', Comment), |
| ], |
| |
| # the copy pasting between string and single-string |
| # is kinda sad. Is there a way to avoid that?? |
| 'string': [ |
| (r'[^\\"{]+', String.Double), |
| (r'"', String.Double, '#pop'), |
| (r'\{', Operator, 'root'), |
| include('escape-sequence'), |
| ], |
| 'single-string': [ |
| (r'[^\\\'{]+', String.Double), |
| (r'\'', String.Double, '#pop'), |
| (r'\{', Operator, 'root'), |
| include('escape-sequence'), |
| ], |
| |
| # all the html stuff |
| # can't really reuse some existing html parser |
| # because we must be able to parse embedded expressions |
| |
| # we are in this state after someone parsed the '<' that |
| # started the html literal |
| 'html-open-tag': [ |
| (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')), |
| (r'>', String.Single, ('#pop', 'html-content')), |
| ], |
| |
| # we are in this state after someone parsed the '</' that |
| # started the end of the closing tag |
| 'html-end-tag': [ |
| # this is a star, because </> is allowed |
| (r'[\w\-:]*>', String.Single, '#pop'), |
| ], |
| |
| # we are in this state after having parsed '<ident(:ident)?' |
| # we thus parse a possibly empty list of attributes |
| 'html-attr': [ |
| (r'\s+', Text), |
| (r'[\w\-:]+=', String.Single, 'html-attr-value'), |
| (r'/>', String.Single, '#pop'), |
| (r'>', String.Single, ('#pop', 'html-content')), |
| ], |
| |
| 'html-attr-value': [ |
| (r"'", String.Single, ('#pop', 'single-string')), |
| (r'"', String.Single, ('#pop', 'string')), |
| (r'#'+ident_re, String.Single, '#pop'), |
| (r'#(?=\{)', String.Single, ('#pop', 'root')), |
| (r'[^"\'{`=<>]+', String.Single, '#pop'), |
| (r'\{', Operator, ('#pop', 'root')), # this is a tail call! |
| ], |
| |
| # we should probably deal with '\' escapes here |
| 'html-content': [ |
| (r'<!--', Comment, 'html-comment'), |
| (r'</', String.Single, ('#pop', 'html-end-tag')), |
| (r'<', String.Single, 'html-open-tag'), |
| (r'\{', Operator, 'root'), |
| (r'[^<{]+', String.Single), |
| ], |
| |
| 'html-comment': [ |
| (r'-->', Comment, '#pop'), |
| (r'[^\-]+|-', Comment), |
| ], |
| } |
| |
| |
| class ReasonLexer(RegexLexer): |
| """ |
| For the ReasonML language. |
| |
| .. versionadded:: 2.6 |
| """ |
| |
| name = 'ReasonML' |
| url = 'https://reasonml.github.io/' |
| aliases = ['reasonml', 'reason'] |
| filenames = ['*.re', '*.rei'] |
| mimetypes = ['text/x-reasonml'] |
| |
| keywords = ( |
| 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto', |
| 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun', |
| 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy', |
| 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of', |
| 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', |
| 'type', 'val', 'virtual', 'when', 'while', 'with', |
| ) |
| keyopts = ( |
| '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-', |
| r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<', |
| '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>', |
| r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~' |
| ) |
| |
| operators = r'[!$%&*+\./:<=>?@^|~-]' |
| word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or') |
| prefix_syms = r'[!?~]' |
| infix_syms = r'[=<>@^|&+\*/$%-]' |
| primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') |
| |
| tokens = { |
| 'escape-sequence': [ |
| (r'\\[\\"\'ntbr]', String.Escape), |
| (r'\\[0-9]{3}', String.Escape), |
| (r'\\x[0-9a-fA-F]{2}', String.Escape), |
| ], |
| 'root': [ |
| (r'\s+', Text), |
| (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), |
| (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), |
| (r'\b([A-Z][\w\']*)', Name.Class), |
| (r'//.*?\n', Comment.Single), |
| (r'\/\*(?!/)', Comment.Multiline, 'comment'), |
| (r'\b(%s)\b' % '|'.join(keywords), Keyword), |
| (r'(%s)' % '|'.join(keyopts[::-1]), Operator.Word), |
| (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), |
| (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), |
| (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), |
| |
| (r"[^\W\d][\w']*", Name), |
| |
| (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), |
| (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), |
| (r'0[oO][0-7][0-7_]*', Number.Oct), |
| (r'0[bB][01][01_]*', Number.Bin), |
| (r'\d[\d_]*', Number.Integer), |
| |
| (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", |
| String.Char), |
| (r"'.'", String.Char), |
| (r"'", Keyword), |
| |
| (r'"', String.Double, 'string'), |
| |
| (r'[~?][a-z][\w\']*:', Name.Variable), |
| ], |
| 'comment': [ |
| (r'[^/*]+', Comment.Multiline), |
| (r'\/\*', Comment.Multiline, '#push'), |
| (r'\*\/', Comment.Multiline, '#pop'), |
| (r'\*', Comment.Multiline), |
| ], |
| 'string': [ |
| (r'[^\\"]+', String.Double), |
| include('escape-sequence'), |
| (r'\\\n', String.Double), |
| (r'"', String.Double, '#pop'), |
| ], |
| 'dotted': [ |
| (r'\s+', Text), |
| (r'\.', Punctuation), |
| (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), |
| (r'[A-Z][\w\']*', Name.Class, '#pop'), |
| (r'[a-z_][\w\']*', Name, '#pop'), |
| default('#pop'), |
| ], |
| } |
| |
| |
| class FStarLexer(RegexLexer): |
| """ |
| For the F* language. |
| .. versionadded:: 2.7 |
| """ |
| |
| name = 'FStar' |
| url = 'https://www.fstar-lang.org/' |
| aliases = ['fstar'] |
| filenames = ['*.fst', '*.fsti'] |
| mimetypes = ['text/x-fstar'] |
| |
| keywords = ( |
| 'abstract', 'attributes', 'noeq', 'unopteq', 'and' |
| 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures', |
| 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if', |
| 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible', |
| 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract', |
| 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable', |
| 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect', |
| 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable', |
| 'val', 'when', 'with', 'not' |
| ) |
| decl_keywords = ('let', 'rec') |
| assume_keywords = ('assume', 'admit', 'assert', 'calc') |
| keyopts = ( |
| r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'\(\|', r'\|\)', r'#', r'u#', |
| r'&', r'\(', r'\)', r'\(\)', r',', r'~>', r'->', r'<-', r'<--', r'<==>', |
| r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|', |
| r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{', |
| r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$' |
| ) |
| |
| operators = r'[!$%&*+\./:<=>?@^|~-]' |
| prefix_syms = r'[!?~]' |
| infix_syms = r'[=<>@^|&+\*/$%-]' |
| primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') |
| |
| tokens = { |
| 'escape-sequence': [ |
| (r'\\[\\"\'ntbr]', String.Escape), |
| (r'\\[0-9]{3}', String.Escape), |
| (r'\\x[0-9a-fA-F]{2}', String.Escape), |
| ], |
| 'root': [ |
| (r'\s+', Text), |
| (r'false|true|False|True|\(\)|\[\]', Name.Builtin.Pseudo), |
| (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), |
| (r'\b([A-Z][\w\']*)', Name.Class), |
| (r'\(\*(?![)])', Comment, 'comment'), |
| (r'\/\/.+$', Comment), |
| (r'\b(%s)\b' % '|'.join(keywords), Keyword), |
| (r'\b(%s)\b' % '|'.join(assume_keywords), Name.Exception), |
| (r'\b(%s)\b' % '|'.join(decl_keywords), Keyword.Declaration), |
| (r'(%s)' % '|'.join(keyopts[::-1]), Operator), |
| (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), |
| (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), |
| |
| (r"[^\W\d][\w']*", Name), |
| |
| (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), |
| (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), |
| (r'0[oO][0-7][0-7_]*', Number.Oct), |
| (r'0[bB][01][01_]*', Number.Bin), |
| (r'\d[\d_]*', Number.Integer), |
| |
| (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", |
| String.Char), |
| (r"'.'", String.Char), |
| (r"'", Keyword), # a stray quote is another syntax element |
| (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications |
| (r"\`", Keyword), # for quoting |
| (r'"', String.Double, 'string'), |
| |
| (r'[~?][a-z][\w\']*:', Name.Variable), |
| ], |
| 'comment': [ |
| (r'[^(*)]+', Comment), |
| (r'\(\*', Comment, '#push'), |
| (r'\*\)', Comment, '#pop'), |
| (r'[(*)]', Comment), |
| ], |
| 'string': [ |
| (r'[^\\"]+', String.Double), |
| include('escape-sequence'), |
| (r'\\\n', String.Double), |
| (r'"', String.Double, '#pop'), |
| ], |
| 'dotted': [ |
| (r'\s+', Text), |
| (r'\.', Punctuation), |
| (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), |
| (r'[A-Z][\w\']*', Name.Class, '#pop'), |
| (r'[a-z_][\w\']*', Name, '#pop'), |
| default('#pop'), |
| ], |
| } |