| """ |
| pygments.lexers.rdf |
| ~~~~~~~~~~~~~~~~~~~ |
| |
| Lexers for semantic web and RDF query languages and markup. |
| |
| :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. |
| :license: BSD, see LICENSE for details. |
| """ |
| |
| import re |
| |
| from pygments.lexer import RegexLexer, bygroups, default |
| from pygments.token import Keyword, Punctuation, String, Number, Operator, \ |
| Generic, Whitespace, Name, Literal, Comment, Text |
| |
| __all__ = ['SparqlLexer', 'TurtleLexer', 'ShExCLexer'] |
| |
| |
| class SparqlLexer(RegexLexer): |
| """ |
| Lexer for `SPARQL <https://www.w3.org/TR/sparql11-query/>`_ query language. |
| |
| .. versionadded:: 2.0 |
| """ |
| name = 'SPARQL' |
| aliases = ['sparql'] |
| filenames = ['*.rq', '*.sparql'] |
| mimetypes = ['application/sparql-query'] |
| |
| # character group definitions :: |
| |
| PN_CHARS_BASE_GRP = ('a-zA-Z' |
| '\u00c0-\u00d6' |
| '\u00d8-\u00f6' |
| '\u00f8-\u02ff' |
| '\u0370-\u037d' |
| '\u037f-\u1fff' |
| '\u200c-\u200d' |
| '\u2070-\u218f' |
| '\u2c00-\u2fef' |
| '\u3001-\ud7ff' |
| '\uf900-\ufdcf' |
| '\ufdf0-\ufffd') |
| |
| PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') |
| |
| PN_CHARS_GRP = (PN_CHARS_U_GRP + |
| r'\-' + |
| r'0-9' + |
| '\u00b7' + |
| '\u0300-\u036f' + |
| '\u203f-\u2040') |
| |
| HEX_GRP = '0-9A-Fa-f' |
| |
| PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%' |
| |
| # terminal productions :: |
| |
| PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']' |
| |
| PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']' |
| |
| PN_CHARS = '[' + PN_CHARS_GRP + ']' |
| |
| HEX = '[' + HEX_GRP + ']' |
| |
| PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']' |
| |
| IRIREF = r'<(?:[^<>"{}|^`\\\x00-\x20])*>' |
| |
| BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \ |
| '.]*' + PN_CHARS + ')?' |
| |
| PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?' |
| |
| VARNAME = '[0-9' + PN_CHARS_U_GRP + '][' + PN_CHARS_U_GRP + \ |
| '0-9\u00b7\u0300-\u036f\u203f-\u2040]*' |
| |
| PERCENT = '%' + HEX + HEX |
| |
| PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS |
| |
| PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')' |
| |
| PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' + |
| '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' + |
| PN_CHARS_GRP + ':]|' + PLX + '))?') |
| |
| EXPONENT = r'[eE][+-]?\d+' |
| |
| # Lexer token definitions :: |
| |
| tokens = { |
| 'root': [ |
| (r'\s+', Text), |
| # keywords :: |
| (r'(?i)(select|construct|describe|ask|where|filter|group\s+by|minus|' |
| r'distinct|reduced|from\s+named|from|order\s+by|desc|asc|limit|' |
| r'offset|values|bindings|load|into|clear|drop|create|add|move|copy|' |
| r'insert\s+data|delete\s+data|delete\s+where|with|delete|insert|' |
| r'using\s+named|using|graph|default|named|all|optional|service|' |
| r'silent|bind|undef|union|not\s+in|in|as|having|to|prefix|base)\b', Keyword), |
| (r'(a)\b', Keyword), |
| # IRIs :: |
| ('(' + IRIREF + ')', Name.Label), |
| # blank nodes :: |
| ('(' + BLANK_NODE_LABEL + ')', Name.Label), |
| # # variables :: |
| ('[?$]' + VARNAME, Name.Variable), |
| # prefixed names :: |
| (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?', |
| bygroups(Name.Namespace, Punctuation, Name.Tag)), |
| # function names :: |
| (r'(?i)(str|lang|langmatches|datatype|bound|iri|uri|bnode|rand|abs|' |
| r'ceil|floor|round|concat|strlen|ucase|lcase|encode_for_uri|' |
| r'contains|strstarts|strends|strbefore|strafter|year|month|day|' |
| r'hours|minutes|seconds|timezone|tz|now|uuid|struuid|md5|sha1|sha256|sha384|' |
| r'sha512|coalesce|if|strlang|strdt|sameterm|isiri|isuri|isblank|' |
| r'isliteral|isnumeric|regex|substr|replace|exists|not\s+exists|' |
| r'count|sum|min|max|avg|sample|group_concat|separator)\b', |
| Name.Function), |
| # boolean literals :: |
| (r'(true|false)', Keyword.Constant), |
| # double literals :: |
| (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float), |
| # decimal literals :: |
| (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float), |
| # integer literals :: |
| (r'[+\-]?\d+', Number.Integer), |
| # operators :: |
| (r'(\|\||&&|=|\*|\-|\+|/|!=|<=|>=|!|<|>)', Operator), |
| # punctuation characters :: |
| (r'[(){}.;,:^\[\]]', Punctuation), |
| # line comments :: |
| (r'#[^\n]*', Comment), |
| # strings :: |
| (r'"""', String, 'triple-double-quoted-string'), |
| (r'"', String, 'single-double-quoted-string'), |
| (r"'''", String, 'triple-single-quoted-string'), |
| (r"'", String, 'single-single-quoted-string'), |
| ], |
| 'triple-double-quoted-string': [ |
| (r'"""', String, 'end-of-string'), |
| (r'[^\\]+', String), |
| (r'\\', String, 'string-escape'), |
| ], |
| 'single-double-quoted-string': [ |
| (r'"', String, 'end-of-string'), |
| (r'[^"\\\n]+', String), |
| (r'\\', String, 'string-escape'), |
| ], |
| 'triple-single-quoted-string': [ |
| (r"'''", String, 'end-of-string'), |
| (r'[^\\]+', String), |
| (r'\\', String.Escape, 'string-escape'), |
| ], |
| 'single-single-quoted-string': [ |
| (r"'", String, 'end-of-string'), |
| (r"[^'\\\n]+", String), |
| (r'\\', String, 'string-escape'), |
| ], |
| 'string-escape': [ |
| (r'u' + HEX + '{4}', String.Escape, '#pop'), |
| (r'U' + HEX + '{8}', String.Escape, '#pop'), |
| (r'.', String.Escape, '#pop'), |
| ], |
| 'end-of-string': [ |
| (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)', |
| bygroups(Operator, Name.Function), '#pop:2'), |
| (r'\^\^', Operator, '#pop:2'), |
| default('#pop:2'), |
| ], |
| } |
| |
| |
| class TurtleLexer(RegexLexer): |
| """ |
| Lexer for `Turtle <http://www.w3.org/TR/turtle/>`_ data language. |
| |
| .. versionadded:: 2.1 |
| """ |
| name = 'Turtle' |
| aliases = ['turtle'] |
| filenames = ['*.ttl'] |
| mimetypes = ['text/turtle', 'application/x-turtle'] |
| |
| # character group definitions :: |
| PN_CHARS_BASE_GRP = ('a-zA-Z' |
| '\u00c0-\u00d6' |
| '\u00d8-\u00f6' |
| '\u00f8-\u02ff' |
| '\u0370-\u037d' |
| '\u037f-\u1fff' |
| '\u200c-\u200d' |
| '\u2070-\u218f' |
| '\u2c00-\u2fef' |
| '\u3001-\ud7ff' |
| '\uf900-\ufdcf' |
| '\ufdf0-\ufffd') |
| |
| PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') |
| |
| PN_CHARS_GRP = (PN_CHARS_U_GRP + |
| r'\-' + |
| r'0-9' + |
| '\u00b7' + |
| '\u0300-\u036f' + |
| '\u203f-\u2040') |
| |
| PN_CHARS = '[' + PN_CHARS_GRP + ']' |
| |
| PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']' |
| |
| PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?' |
| |
| HEX_GRP = '0-9A-Fa-f' |
| |
| HEX = '[' + HEX_GRP + ']' |
| |
| PERCENT = '%' + HEX + HEX |
| |
| PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%' |
| |
| PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']' |
| |
| PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS |
| |
| PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')' |
| |
| PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' + |
| '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' + |
| PN_CHARS_GRP + ':]|' + PLX + '))?') |
| |
| patterns = { |
| 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range |
| 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)' |
| } |
| |
| tokens = { |
| 'root': [ |
| (r'\s+', Text), |
| |
| # Base / prefix |
| (r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, |
| bygroups(Keyword, Whitespace, Name.Variable, Whitespace, |
| Punctuation)), |
| (r'(@prefix|PREFIX)(\s+)%(PNAME_NS)s(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, |
| bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, |
| Name.Variable, Whitespace, Punctuation)), |
| |
| # The shorthand predicate 'a' |
| (r'(?<=\s)a(?=\s)', Keyword.Type), |
| |
| # IRIREF |
| (r'%(IRIREF)s' % patterns, Name.Variable), |
| |
| # PrefixedName |
| (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?', |
| bygroups(Name.Namespace, Punctuation, Name.Tag)), |
| |
| # Comment |
| (r'#[^\n]+', Comment), |
| |
| (r'\b(true|false)\b', Literal), |
| (r'[+\-]?\d*\.\d+', Number.Float), |
| (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float), |
| (r'[+\-]?\d+', Number.Integer), |
| (r'[\[\](){}.;,:^]', Punctuation), |
| |
| (r'"""', String, 'triple-double-quoted-string'), |
| (r'"', String, 'single-double-quoted-string'), |
| (r"'''", String, 'triple-single-quoted-string'), |
| (r"'", String, 'single-single-quoted-string'), |
| ], |
| 'triple-double-quoted-string': [ |
| (r'"""', String, 'end-of-string'), |
| (r'[^\\]+', String), |
| (r'\\', String, 'string-escape'), |
| ], |
| 'single-double-quoted-string': [ |
| (r'"', String, 'end-of-string'), |
| (r'[^"\\\n]+', String), |
| (r'\\', String, 'string-escape'), |
| ], |
| 'triple-single-quoted-string': [ |
| (r"'''", String, 'end-of-string'), |
| (r'[^\\]+', String), |
| (r'\\', String, 'string-escape'), |
| ], |
| 'single-single-quoted-string': [ |
| (r"'", String, 'end-of-string'), |
| (r"[^'\\\n]+", String), |
| (r'\\', String, 'string-escape'), |
| ], |
| 'string-escape': [ |
| (r'.', String, '#pop'), |
| ], |
| 'end-of-string': [ |
| (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)', |
| bygroups(Operator, Generic.Emph), '#pop:2'), |
| |
| (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'), |
| |
| default('#pop:2'), |
| |
| ], |
| } |
| |
| # Turtle and Tera Term macro files share the same file extension |
| # but each has a recognizable and distinct syntax. |
| def analyse_text(text): |
| for t in ('@base ', 'BASE ', '@prefix ', 'PREFIX '): |
| if re.search(r'^\s*%s' % t, text): |
| return 0.80 |
| |
| |
| class ShExCLexer(RegexLexer): |
| """ |
| Lexer for `ShExC <https://shex.io/shex-semantics/#shexc>`_ shape expressions language syntax. |
| """ |
| name = 'ShExC' |
| aliases = ['shexc', 'shex'] |
| filenames = ['*.shex'] |
| mimetypes = ['text/shex'] |
| |
| # character group definitions :: |
| |
| PN_CHARS_BASE_GRP = ('a-zA-Z' |
| '\u00c0-\u00d6' |
| '\u00d8-\u00f6' |
| '\u00f8-\u02ff' |
| '\u0370-\u037d' |
| '\u037f-\u1fff' |
| '\u200c-\u200d' |
| '\u2070-\u218f' |
| '\u2c00-\u2fef' |
| '\u3001-\ud7ff' |
| '\uf900-\ufdcf' |
| '\ufdf0-\ufffd') |
| |
| PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') |
| |
| PN_CHARS_GRP = (PN_CHARS_U_GRP + |
| r'\-' + |
| r'0-9' + |
| '\u00b7' + |
| '\u0300-\u036f' + |
| '\u203f-\u2040') |
| |
| HEX_GRP = '0-9A-Fa-f' |
| |
| PN_LOCAL_ESC_CHARS_GRP = r"_~.\-!$&'()*+,;=/?#@%" |
| |
| # terminal productions :: |
| |
| PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']' |
| |
| PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']' |
| |
| PN_CHARS = '[' + PN_CHARS_GRP + ']' |
| |
| HEX = '[' + HEX_GRP + ']' |
| |
| PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']' |
| |
| UCHAR_NO_BACKSLASH = '(?:u' + HEX + '{4}|U' + HEX + '{8})' |
| |
| UCHAR = r'\\' + UCHAR_NO_BACKSLASH |
| |
| IRIREF = r'<(?:[^\x00-\x20<>"{}|^`\\]|' + UCHAR + ')*>' |
| |
| BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \ |
| '.]*' + PN_CHARS + ')?' |
| |
| PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?' |
| |
| PERCENT = '%' + HEX + HEX |
| |
| PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS |
| |
| PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')' |
| |
| PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' + |
| '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' + |
| PN_CHARS_GRP + ':]|' + PLX + '))?') |
| |
| EXPONENT = r'[eE][+-]?\d+' |
| |
| # Lexer token definitions :: |
| |
| tokens = { |
| 'root': [ |
| (r'\s+', Text), |
| # keywords :: |
| (r'(?i)(base|prefix|start|external|' |
| r'literal|iri|bnode|nonliteral|length|minlength|maxlength|' |
| r'mininclusive|minexclusive|maxinclusive|maxexclusive|' |
| r'totaldigits|fractiondigits|' |
| r'closed|extra)\b', Keyword), |
| (r'(a)\b', Keyword), |
| # IRIs :: |
| ('(' + IRIREF + ')', Name.Label), |
| # blank nodes :: |
| ('(' + BLANK_NODE_LABEL + ')', Name.Label), |
| # prefixed names :: |
| (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + ')?', |
| bygroups(Name.Namespace, Punctuation, Name.Tag)), |
| # boolean literals :: |
| (r'(true|false)', Keyword.Constant), |
| # double literals :: |
| (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float), |
| # decimal literals :: |
| (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float), |
| # integer literals :: |
| (r'[+\-]?\d+', Number.Integer), |
| # operators :: |
| (r'[@|$&=*+?^\-~]', Operator), |
| # operator keywords :: |
| (r'(?i)(and|or|not)\b', Operator.Word), |
| # punctuation characters :: |
| (r'[(){}.;,:^\[\]]', Punctuation), |
| # line comments :: |
| (r'#[^\n]*', Comment), |
| # strings :: |
| (r'"""', String, 'triple-double-quoted-string'), |
| (r'"', String, 'single-double-quoted-string'), |
| (r"'''", String, 'triple-single-quoted-string'), |
| (r"'", String, 'single-single-quoted-string'), |
| ], |
| 'triple-double-quoted-string': [ |
| (r'"""', String, 'end-of-string'), |
| (r'[^\\]+', String), |
| (r'\\', String, 'string-escape'), |
| ], |
| 'single-double-quoted-string': [ |
| (r'"', String, 'end-of-string'), |
| (r'[^"\\\n]+', String), |
| (r'\\', String, 'string-escape'), |
| ], |
| 'triple-single-quoted-string': [ |
| (r"'''", String, 'end-of-string'), |
| (r'[^\\]+', String), |
| (r'\\', String.Escape, 'string-escape'), |
| ], |
| 'single-single-quoted-string': [ |
| (r"'", String, 'end-of-string'), |
| (r"[^'\\\n]+", String), |
| (r'\\', String, 'string-escape'), |
| ], |
| 'string-escape': [ |
| (UCHAR_NO_BACKSLASH, String.Escape, '#pop'), |
| (r'.', String.Escape, '#pop'), |
| ], |
| 'end-of-string': [ |
| (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)', |
| bygroups(Operator, Name.Function), '#pop:2'), |
| (r'\^\^', Operator, '#pop:2'), |
| default('#pop:2'), |
| ], |
| } |