| """ |
| pygments.lexers.tnt |
| ~~~~~~~~~~~~~~~~~~~ |
| |
| Lexer for Typographic Number Theory. |
| |
| :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. |
| :license: BSD, see LICENSE for details. |
| """ |
| |
| import re |
| |
| from pygments.lexer import Lexer |
| from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \ |
| Punctuation, Error |
| |
| __all__ = ['TNTLexer'] |
| |
| |
| class TNTLexer(Lexer): |
| """ |
| Lexer for Typographic Number Theory, as described in the book |
| Gödel, Escher, Bach, by Douglas R. Hofstadter |
| |
| .. versionadded:: 2.7 |
| """ |
| |
| name = 'Typographic Number Theory' |
| url = 'https://github.com/Kenny2github/language-tnt' |
| aliases = ['tnt'] |
| filenames = ['*.tnt'] |
| |
| cur = [] |
| |
| LOGIC = set('⊃→]&∧^|∨Vv') |
| OPERATORS = set('+.⋅*') |
| VARIABLES = set('abcde') |
| PRIMES = set("'′") |
| NEGATORS = set('~!') |
| QUANTIFIERS = set('AE∀∃') |
| NUMBERS = set('0123456789') |
| WHITESPACE = set('\t \v\n') |
| |
| RULES = re.compile('''(?xi) |
| joining | separation | double-tilde | fantasy\\ rule |
| | carry[- ]over(?:\\ of)?(?:\\ line)?\\ ([0-9]+) | detachment |
| | contrapositive | De\\ Morgan | switcheroo |
| | specification | generalization | interchange |
| | existence | symmetry | transitivity |
| | add\\ S | drop\\ S | induction |
| | axiom\\ ([1-5]) | premise | push | pop |
| ''') |
| LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*') |
| COMMENT = re.compile(r'\[[^\n\]]+\]') |
| |
| def __init__(self, *args, **kwargs): |
| Lexer.__init__(self, *args, **kwargs) |
| self.cur = [] |
| |
| def whitespace(self, start, text, required=False): |
| """Tokenize whitespace.""" |
| end = start |
| try: |
| while text[end] in self.WHITESPACE: |
| end += 1 |
| except IndexError: |
| end = len(text) |
| if required and end == start: |
| raise AssertionError |
| if end != start: |
| self.cur.append((start, Text, text[start:end])) |
| return end |
| |
| def variable(self, start, text): |
| """Tokenize a variable.""" |
| if text[start] not in self.VARIABLES: |
| raise AssertionError |
| end = start+1 |
| while text[end] in self.PRIMES: |
| end += 1 |
| self.cur.append((start, Name.Variable, text[start:end])) |
| return end |
| |
| def term(self, start, text): |
| """Tokenize a term.""" |
| if text[start] == 'S': # S...S(...) or S...0 |
| end = start+1 |
| while text[end] == 'S': |
| end += 1 |
| self.cur.append((start, Number.Integer, text[start:end])) |
| return self.term(end, text) |
| if text[start] == '0': # the singleton 0 |
| self.cur.append((start, Number.Integer, text[start])) |
| return start+1 |
| if text[start] in self.VARIABLES: # a''... |
| return self.variable(start, text) |
| if text[start] == '(': # (...+...) |
| self.cur.append((start, Punctuation, text[start])) |
| start = self.term(start+1, text) |
| if text[start] not in self.OPERATORS: |
| raise AssertionError |
| self.cur.append((start, Operator, text[start])) |
| start = self.term(start+1, text) |
| if text[start] != ')': |
| raise AssertionError |
| self.cur.append((start, Punctuation, text[start])) |
| return start+1 |
| raise AssertionError # no matches |
| |
| def formula(self, start, text): |
| """Tokenize a formula.""" |
| if text[start] in self.NEGATORS: # ~<...> |
| end = start+1 |
| while text[end] in self.NEGATORS: |
| end += 1 |
| self.cur.append((start, Operator, text[start:end])) |
| return self.formula(end, text) |
| if text[start] in self.QUANTIFIERS: # Aa:<...> |
| self.cur.append((start, Keyword.Declaration, text[start])) |
| start = self.variable(start+1, text) |
| if text[start] != ':': |
| raise AssertionError |
| self.cur.append((start, Punctuation, text[start])) |
| return self.formula(start+1, text) |
| if text[start] == '<': # <...&...> |
| self.cur.append((start, Punctuation, text[start])) |
| start = self.formula(start+1, text) |
| if text[start] not in self.LOGIC: |
| raise AssertionError |
| self.cur.append((start, Operator, text[start])) |
| start = self.formula(start+1, text) |
| if text[start] != '>': |
| raise AssertionError |
| self.cur.append((start, Punctuation, text[start])) |
| return start+1 |
| # ...=... |
| start = self.term(start, text) |
| if text[start] != '=': |
| raise AssertionError |
| self.cur.append((start, Operator, text[start])) |
| start = self.term(start+1, text) |
| return start |
| |
| def rule(self, start, text): |
| """Tokenize a rule.""" |
| match = self.RULES.match(text, start) |
| if match is None: |
| raise AssertionError |
| groups = sorted(match.regs[1:]) # exclude whole match |
| for group in groups: |
| if group[0] >= 0: # this group matched |
| self.cur.append((start, Keyword, text[start:group[0]])) |
| self.cur.append((group[0], Number.Integer, |
| text[group[0]:group[1]])) |
| if group[1] != match.end(): |
| self.cur.append((group[1], Keyword, |
| text[group[1]:match.end()])) |
| break |
| else: |
| self.cur.append((start, Keyword, text[start:match.end()])) |
| return match.end() |
| |
| def lineno(self, start, text): |
| """Tokenize a line referral.""" |
| end = start |
| while text[end] not in self.NUMBERS: |
| end += 1 |
| self.cur.append((start, Punctuation, text[start])) |
| self.cur.append((start+1, Text, text[start+1:end])) |
| start = end |
| match = self.LINENOS.match(text, start) |
| if match is None: |
| raise AssertionError |
| if text[match.end()] != ')': |
| raise AssertionError |
| self.cur.append((match.start(), Number.Integer, match.group(0))) |
| self.cur.append((match.end(), Punctuation, text[match.end()])) |
| return match.end() + 1 |
| |
| def error_till_line_end(self, start, text): |
| """Mark everything from ``start`` to the end of the line as Error.""" |
| end = start |
| try: |
| while text[end] != '\n': # there's whitespace in rules |
| end += 1 |
| except IndexError: |
| end = len(text) |
| if end != start: |
| self.cur.append((start, Error, text[start:end])) |
| end = self.whitespace(end, text) |
| return end |
| |
| def get_tokens_unprocessed(self, text): |
| """Returns a list of TNT tokens.""" |
| self.cur = [] |
| start = end = self.whitespace(0, text) |
| while start <= end < len(text): |
| try: |
| # try line number |
| while text[end] in self.NUMBERS: |
| end += 1 |
| if end != start: # actual number present |
| self.cur.append((start, Number.Integer, text[start:end])) |
| # whitespace is required after a line number |
| orig = len(self.cur) |
| try: |
| start = end = self.whitespace(end, text, True) |
| except AssertionError: |
| del self.cur[orig:] |
| start = end = self.error_till_line_end(end, text) |
| continue |
| # at this point it could be a comment |
| match = self.COMMENT.match(text, start) |
| if match is not None: |
| self.cur.append((start, Comment, text[start:match.end()])) |
| start = end = match.end() |
| # anything after the closing bracket is invalid |
| start = end = self.error_till_line_end(start, text) |
| # do not attempt to process the rest |
| continue |
| del match |
| if text[start] in '[]': # fantasy push or pop |
| self.cur.append((start, Keyword, text[start])) |
| start += 1 |
| end += 1 |
| else: |
| # one formula, possibly containing subformulae |
| orig = len(self.cur) |
| try: |
| start = end = self.formula(start, text) |
| except (AssertionError, RecursionError): # not well-formed |
| del self.cur[orig:] |
| while text[end] not in self.WHITESPACE: |
| end += 1 |
| self.cur.append((start, Error, text[start:end])) |
| start = end |
| # skip whitespace after formula |
| orig = len(self.cur) |
| try: |
| start = end = self.whitespace(end, text, True) |
| except AssertionError: |
| del self.cur[orig:] |
| start = end = self.error_till_line_end(start, text) |
| continue |
| # rule proving this formula a theorem |
| orig = len(self.cur) |
| try: |
| start = end = self.rule(start, text) |
| except AssertionError: |
| del self.cur[orig:] |
| start = end = self.error_till_line_end(start, text) |
| continue |
| # skip whitespace after rule |
| start = end = self.whitespace(end, text) |
| # line marker |
| if text[start] == '(': |
| orig = len(self.cur) |
| try: |
| start = end = self.lineno(start, text) |
| except AssertionError: |
| del self.cur[orig:] |
| start = end = self.error_till_line_end(start, text) |
| continue |
| start = end = self.whitespace(start, text) |
| except IndexError: |
| try: |
| del self.cur[orig:] |
| except NameError: |
| pass # if orig was never defined, fine |
| self.error_till_line_end(start, text) |
| return self.cur |