| #!/usr/bin/env python |
| # -*- coding: utf-8 -*- |
| |
| # :Id: $Id: latex2mathml.py 8878 2021-11-05 11:10:44Z milde $ |
| # :Copyright: © 2005 Jens Jørgen Mortensen [1]_ |
| # © 2010, 2021 Günter Milde. |
| # |
| # :License: Released under the terms of the `2-Clause BSD license`_, in short: |
| # |
| # Copying and distribution of this file, with or without modification, |
| # are permitted in any medium without royalty provided the copyright |
| # notice and this notice are preserved. |
| # This file is offered as-is, without any warranty. |
| # |
| # .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause |
| # |
| # .. [1] the original `rst2mathml.py` in `sandbox/jensj/latex_math` |
| |
| """Convert LaTex maths code into presentational MathML. |
| |
| This module is provisional: |
| the API is not settled and may change with any minor Docutils version. |
| """ |
| |
| # Usage: |
| # |
| # >>> from latex2mathml import * |
| |
| import collections |
| import copy |
| import re |
| import sys |
| import unicodedata |
| if sys.version_info >= (3, 0): |
| unicode = str # noqa |
| |
| from docutils.utils.math import tex2unichar, toplevel_code |
| |
| |
| # Character data |
| # -------------- |
| |
| # LaTeX math macro to Unicode mappings. |
| # Character categories. |
| |
| # identifiers -> <mi> |
| |
| letters = tex2unichar.mathalpha |
| letters['hbar'] = u'\u210F' # compatibility mapping to ℏ (\hslash). |
| # (ħ LATIN SMALL LETTER H WITH STROKE is upright) |
| |
| # special case: Capital Greek letters: (upright in TeX style) |
| greek_capitals = { |
| 'Phi':u'\u03a6', 'Xi':u'\u039e', 'Sigma':u'\u03a3', |
| 'Psi':u'\u03a8', 'Delta':u'\u0394', 'Theta':u'\u0398', |
| 'Upsilon':u'\u03d2', 'Pi':u'\u03a0', 'Omega':u'\u03a9', |
| 'Gamma':u'\u0393', 'Lambda':u'\u039b'} |
| |
| # functions -> <mi> |
| functions = {# functions with a space in the name |
| 'liminf': u'lim\u202finf', |
| 'limsup': u'lim\u202fsup', |
| 'injlim': u'inj\u202flim', |
| 'projlim': u'proj\u202flim', |
| # embellished function names (see handle_cmd() below) |
| 'varlimsup': 'lim', |
| 'varliminf': 'lim', |
| 'varprojlim': 'lim', |
| 'varinjlim': 'lim', |
| # custom function name |
| 'operatorname': None, |
| } |
| functions.update((name, name) for name in |
| ('arccos', 'arcsin', 'arctan', 'arg', 'cos', |
| 'cosh', 'cot', 'coth', 'csc', 'deg', |
| 'det', 'dim', 'exp', 'gcd', 'hom', |
| 'ker', 'lg', 'ln', 'log', 'Pr', |
| 'sec', 'sin', 'sinh', 'tan', 'tanh')) |
| # Function with limits: 'lim', 'sup', 'inf', 'max', 'min': |
| # use <mo> to allow "movablelimits" attribute (see below). |
| |
| |
| # math font selection -> <mi mathvariant=...> or <mstyle mathvariant=...> |
| math_alphabets = {# 'cmdname': 'mathvariant value' # package |
| 'boldsymbol': 'bold', |
| 'mathbf': 'bold', |
| 'mathit': 'italic', |
| 'mathtt': 'monospace', |
| 'mathrm': 'normal', |
| 'mathsf': 'sans-serif', |
| 'mathcal': 'script', |
| 'mathbfit': 'bold-italic', # isomath |
| 'mathbb': 'double-struck', # amssymb |
| 'mathfrak': 'fraktur', # amssymb |
| 'mathsfit': 'sans-serif-italic', # isomath |
| 'mathsfbfit': 'sans-serif-bold-italic', # isomath |
| 'mathscr': 'script', # mathrsfs |
| # unsupported: bold-fraktur |
| # bold-script |
| # bold-sans-serif |
| } |
| |
| # operator, fence, or separator -> <mo> |
| |
| |
| stretchables = {# extensible delimiters allowed in left/right cmds |
| 'backslash': '\\', |
| 'uparrow': u'\u2191', # ↑ UPWARDS ARROW |
| 'downarrow': u'\u2193', # ↓ DOWNWARDS ARROW |
| 'updownarrow': u'\u2195', # ↕ UP DOWN ARROW |
| 'Uparrow': u'\u21d1', # ⇑ UPWARDS DOUBLE ARROW |
| 'Downarrow': u'\u21d3', # ⇓ DOWNWARDS DOUBLE ARROW |
| 'Updownarrow': u'\u21d5', # ⇕ UP DOWN DOUBLE ARROW |
| 'lmoustache': u'\u23b0', # ⎰ UPPER LEFT OR LOWER RIGHT CURLY BRACKET SECTION |
| 'rmoustache': u'\u23b1', # ⎱ UPPER RIGHT OR LOWER LEFT CURLY BRACKET SECTION |
| 'arrowvert': u'\u23d0', # ⏐ VERTICAL LINE EXTENSION |
| 'bracevert': u'\u23aa', # ⎪ CURLY BRACKET EXTENSION |
| 'lvert': u'|', # left | |
| 'lVert': u'\u2016', # left ‖ |
| 'rvert': u'|', # right | |
| 'rVert': u'\u2016', # right ‖ |
| 'Arrowvert': u'\u2016', # ‖ |
| } |
| stretchables.update(tex2unichar.mathfence) |
| stretchables.update(tex2unichar.mathopen) # Braces |
| stretchables.update(tex2unichar.mathclose) # Braces |
| |
| # >>> print(' '.join(sorted(set(stretchables.values())))) |
| # [ \ ] { | } ‖ ↑ ↓ ↕ ⇑ ⇓ ⇕ ⌈ ⌉ ⌊ ⌋ ⌜ ⌝ ⌞ ⌟ ⎪ ⎰ ⎱ ⏐ ⟅ ⟆ ⟦ ⟧ ⟨ ⟩ ⟮ ⟯ ⦇ ⦈ |
| |
| operators = {# negated symbols without pre-composed Unicode character |
| 'nleqq': u'\u2266\u0338', # ≦̸ |
| 'ngeqq': u'\u2267\u0338', # ≧̸ |
| 'nleqslant': u'\u2a7d\u0338', # ⩽̸ |
| 'ngeqslant': u'\u2a7e\u0338', # ⩾̸ |
| 'ngtrless': u'\u2277\u0338', # txfonts |
| 'nlessgtr': u'\u2276\u0338', # txfonts |
| 'nsubseteqq': u'\u2AC5\u0338', # ⫅̸ |
| 'nsupseteqq': u'\u2AC6\u0338', # ⫆̸ |
| # compatibility definitions: |
| 'centerdot': u'\u2B1D', # BLACK VERY SMALL SQUARE | mathbin |
| 'varnothing': u'\u2300', # ⌀ DIAMETER SIGN | empty set |
| 'varpropto': u'\u221d', # ∝ PROPORTIONAL TO | sans serif |
| 'triangle': u'\u25B3', # WHITE UP-POINTING TRIANGLE | mathord |
| 'triangledown': u'\u25BD', # WHITE DOWN-POINTING TRIANGLE | mathord |
| # alias commands: |
| 'dotsb': u'\u22ef', # ⋯ with binary operators/relations |
| 'dotsc': u'\u2026', # … with commas |
| 'dotsi': u'\u22ef', # ⋯ with integrals |
| 'dotsm': u'\u22ef', # ⋯ multiplication dots |
| 'dotso': u'\u2026', # … other dots |
| # functions with movable limits (requires <mo>) |
| 'lim': 'lim', |
| 'sup': 'sup', |
| 'inf': 'inf', |
| 'max': 'max', |
| 'min': 'min', |
| } |
| operators.update(tex2unichar.mathbin) # Binary symbols |
| operators.update(tex2unichar.mathrel) # Relation symbols, arrow symbols |
| operators.update(tex2unichar.mathord) # Miscellaneous symbols |
| operators.update(tex2unichar.mathpunct) # Punctuation |
| operators.update(tex2unichar.mathop) # Variable-sized symbols |
| operators.update(stretchables) |
| |
| |
| # special cases |
| |
| thick_operators = {# style='font-weight: bold;' |
| 'thicksim': u'\u223C', # ∼ |
| 'thickapprox':u'\u2248', # ≈ |
| } |
| |
| small_operators = {# mathsize='75%' |
| 'shortmid': u'\u2223', # ∣ |
| 'shortparallel': u'\u2225', # ∥ |
| 'nshortmid': u'\u2224', # ∤ |
| 'nshortparallel': u'\u2226', # ∦ |
| 'smallfrown': u'\u2322', # ⌢ FROWN |
| 'smallsmile': u'\u2323', # ⌣ SMILE |
| 'smallint': u'\u222b', # ∫ INTEGRAL |
| } |
| |
| # Operators and functions with limits above/below in display formulas |
| # and in index position inline (movablelimits=True) |
| movablelimits = ('bigcap', 'bigcup', 'bigodot', 'bigoplus', 'bigotimes', |
| 'bigsqcup', 'biguplus', 'bigvee', 'bigwedge', |
| 'coprod', 'intop', 'ointop', 'prod', 'sum', |
| 'lim', 'max', 'min', 'sup', 'inf') |
| # Depending on settings, integrals may also be in this category. |
| # (e.g. if "amsmath" is loaded with option "intlimits", see |
| # http://mirror.ctan.org/macros/latex/required/amsmath/amsldoc.pdf) |
| # movablelimits.extend(('fint', 'iiiint', 'iiint', 'iint', 'int', 'oiint', |
| # 'oint', 'ointctrclockwise', 'sqint', |
| # 'varointclockwise',)) |
| |
| # horizontal space -> <mspace> |
| |
| spaces = {'qquad': '2em', # two \quad |
| 'quad': '1em', # 18 mu |
| 'thickspace': '0.2778em', # 5mu = 5/18em |
| 'medspace': '0.2222em', # 4mu = 2/9em |
| 'thinspace': '0.1667em', # 3mu = 1/6em |
| 'negthinspace': '-0.1667em', # -3mu = -1/6em |
| 'negmedspace': '-0.2222em', # -4mu = -2/9em |
| 'negthickspace': '-0.2778em', # -5mu = -5/18em |
| ' ': '0.25em', # inter word space |
| ';': '0.2778em', # 5mu thickspace |
| ':': '0.2222em', # 4mu medspace |
| ',': '0.1667em', # 3mu thinspace |
| '!': '-0.1667em', # negthinspace |
| } |
| |
| # accents -> <mover stretchy="false"> |
| accents = {# TeX: (spacing, combining) |
| 'acute': (u'´', u'\u0301'), |
| 'bar': (u'ˉ', u'\u0304'), |
| 'breve': (u'˘', u'\u0306'), |
| 'check': (u'ˇ', u'\u030C'), |
| 'dot': (u'˙', u'\u0307'), |
| 'ddot': (u'¨', u'\u0308'), |
| 'dddot': (u'⋯', u'\u20DB'), |
| 'grave': (u'`', u'\u0300'), |
| 'hat': (u'ˆ', u'\u0302'), |
| 'mathring': (u'˚', u'\u030A'), |
| 'tilde': (u'˜', u'\u0303'), # tilde ~ or small tilde ˜? |
| 'vec': (u'→', u'\u20d7'), # → too heavy, accents="false" |
| # TODO: ddddot |
| } |
| |
| # limits etc. -> <mover> or <munder> |
| over = {# TeX: (char, offset-correction/em) |
| 'overbrace': (u'\u23DE', -0.2), # DejaVu Math -0.6 |
| 'overleftarrow': (u'\u2190', -0.2), |
| 'overleftrightarrow': (u'\u2194', -0.2), |
| 'overline': (u'_', -0.2), # \u2012' FIGURE DASH does not stretch |
| 'overrightarrow': (u'\u2192', -0.2), |
| 'widehat': (u'^', -0.5), |
| 'widetilde': (u'~', -0.3), |
| } |
| under = {'underbrace': (u'\u23DF', 0.1), # DejaVu Math -0.7 |
| 'underleftarrow': (u'\u2190', -0.2), |
| 'underleftrightarrow': (u'\u2194', -0.2), |
| 'underline': (u'_', -0.8), |
| 'underrightarrow': (u'\u2192', -0.2), |
| } |
| |
| # Character translations |
| # ---------------------- |
| # characters with preferred alternative in mathematical use |
| # cf. https://www.w3.org/TR/MathML3/chapter7.html#chars.anomalous |
| anomalous_chars = {'-': u'\u2212', # HYPHEN-MINUS -> MINUS SIGN |
| ':': u'\u2236', # COLON -> RATIO |
| '~': u'\u00a0', # NO-BREAK SPACE |
| } |
| |
| # blackboard bold (Greek characters not working with "mathvariant" (Firefox 78) |
| mathbb = {u'Γ': u'\u213E', # ℾ |
| u'Π': u'\u213F', # ℿ |
| u'Σ': u'\u2140', # ⅀ |
| u'γ': u'\u213D', # ℽ |
| u'π': u'\u213C', # ℼ |
| } |
| |
| # Matrix environments |
| matrices = {# name: fences |
| 'matrix': ('', ''), |
| 'smallmatrix': ('', ''), # smaller, see begin_environment()! |
| 'pmatrix': ('(', ')'), |
| 'bmatrix': ('[', ']'), |
| 'Bmatrix': ('{', '}'), |
| 'vmatrix': ('|', '|'), |
| 'Vmatrix': (u'\u2016', u'\u2016'), # ‖ |
| 'cases': ('{', ''), |
| } |
| |
| layout_styles = { |
| 'displaystyle': {'displaystyle': True, 'scriptlevel': 0}, |
| 'textstyle': {'displaystyle': False, 'scriptlevel': 0}, |
| 'scriptstyle': {'displaystyle': False, 'scriptlevel': 1}, |
| 'scriptscriptstyle': {'displaystyle': False, 'scriptlevel': 2}, |
| } |
| # See also https://www.w3.org/TR/MathML3/chapter3.html#presm.scriptlevel |
| |
| fractions = {# name: style_attrs, frac_attrs |
| 'frac': ({}, {}), |
| 'cfrac': ({'displaystyle': True, 'scriptlevel': 0, |
| 'CLASS': 'cfrac'}, {}), # in LaTeX with padding |
| 'dfrac': (layout_styles['displaystyle'], {}), |
| 'tfrac': (layout_styles['textstyle'], {}), |
| 'binom': ({}, {'linethickness': 0}), |
| 'dbinom': (layout_styles['displaystyle'], {'linethickness': 0}), |
| 'tbinom': (layout_styles['textstyle'], {'linethickness': 0}), |
| } |
| |
| delimiter_sizes = ['', '1.2em', '1.623em', '2.047em', '2.470em'] |
| bigdelimiters = {'left': 0, |
| 'right': 0, |
| 'bigl': 1, |
| 'bigr': 1, |
| 'Bigl': 2, |
| 'Bigr': 2, |
| 'biggl': 3, |
| 'biggr': 3, |
| 'Biggl': 4, |
| 'Biggr': 4, |
| } |
| |
| |
| # MathML element classes |
| # ---------------------- |
| |
| class math(object): |
| """Base class for MathML elements and root of MathML trees.""" |
| |
| nchildren = None |
| """Expected number of children or None""" |
| # cf. https://www.w3.org/TR/MathML3/chapter3.html#id.3.1.3.2 |
| parent = None |
| """Parent node in MathML DOM tree.""" |
| _level = 0 # indentation level (static class variable) |
| xml_entities = { # for invalid and invisible characters |
| ord('<'): u'<', |
| ord('>'): u'>', |
| ord('&'): u'&', |
| 0x2061: u'⁡', |
| } |
| _boolstrings = {True: 'true', False: 'false'} |
| """String representation of boolean MathML attribute values.""" |
| |
| html_tagname = 'span' |
| """Tag name for HTML representation.""" |
| |
| def __init__(self, *children, **attributes): |
| """Set up node with `children` and `attributes`. |
| |
| Attributes are downcased: Use CLASS to set "class" value. |
| >>> math(mn(3), CLASS='test') |
| math(mn(3), class='test') |
| >>> math(CLASS='test').toprettyxml() |
| '<math class="test">\n</math>' |
| |
| """ |
| self.children = [] |
| self.extend(children) |
| |
| self.attributes = collections.OrderedDict() |
| # sort attributes for predictable functional tests |
| # as self.attributes.update(attributes) does not keep order in Python < 3.6 |
| for key in sorted(attributes.keys()): |
| # Use .lower() to allow argument `CLASS` for attribute `class` |
| # (Python keyword). MathML uses only lowercase attributes. |
| self.attributes[key.lower()] = attributes[key] |
| |
| def __repr__(self): |
| content = [repr(item) for item in getattr(self, 'children', [])] |
| if hasattr(self, 'data'): |
| content.append(repr(self.data)) |
| if isinstance(self, MathSchema) and self.switch: |
| content.append('switch=True') |
| content += ["%s=%r"%(k, v) for k, v in self.attributes.items() |
| if v is not None] |
| |
| return self.__class__.__name__ + '(%s)' % ', '.join(content) |
| |
| def __len__(self): |
| return len(self.children) |
| |
| # emulate dictionary-like access to attributes |
| # see `docutils.nodes.Element` for dict/list interface |
| def __getitem__(self, key): |
| return self.attributes[key] |
| def __setitem__(self, key, item): |
| self.attributes[key] = item |
| def get(self, *args, **kwargs): |
| return self.attributes.get(*args, **kwargs) |
| |
| def full(self): |
| """Return boolean indicating whether children may be appended.""" |
| return (self.nchildren is not None |
| and len(self) >= self.nchildren) |
| |
| def append(self, child): |
| """Append child and return self or first non-full parent. |
| |
| If self is full, go up the tree and return first non-full node or |
| `None`. |
| """ |
| if self.full(): |
| raise SyntaxError('Node %s already full!' % self) |
| self.children.append(child) |
| child.parent = self |
| if self.full(): |
| return self.close() |
| return self |
| |
| def extend(self, children): |
| for child in children: |
| self.append(child) |
| return self |
| |
| def close(self): |
| """Close element and return first non-full parent or None.""" |
| parent = self.parent |
| while parent is not None and parent.full(): |
| parent = parent.parent |
| return parent |
| |
| def toprettyxml(self): |
| """Return XML representation of self as string.""" |
| return ''.join(self._xml()) |
| |
| def _xml(self, level=0): |
| return ([self.xml_starttag()] |
| + self._xml_body(level) |
| + ['</%s>' % self.__class__.__name__]) |
| |
| def xml_starttag(self): |
| attrs = ['%s="%s"' % (k, str(v).replace('True', 'true').replace('False', 'false')) |
| for k, v in self.attributes.items() |
| if v is not None] |
| return '<%s>' % ' '.join([self.__class__.__name__] + attrs) |
| |
| def _xml_body(self, level=0): |
| xml = [] |
| for child in self.children: |
| xml.extend(['\n', ' ' * (level+1)]) |
| xml.extend(child._xml(level+1)) |
| xml.extend(['\n', ' ' * level]) |
| return xml |
| |
| # >>> n2 = math(mn(2)) |
| # >>> n2 |
| # math(mn(2)) |
| # >>> n2.toprettyxml() |
| # '<math>\n <mn>2</mn>\n</math>' |
| # >>> len(n2) |
| # 1 |
| # >>> eq3 = math(id='eq3', display='block') |
| # >>> eq3 |
| # math(display='block', id='eq3') |
| # >>> eq3.toprettyxml() |
| # '<math display="block" id="eq3">\n</math>' |
| # >>> len(eq3) |
| # 0 |
| # >>> math(CLASS='bold').xml_starttag() |
| # '<math class="bold">' |
| |
| class mtable(math): pass |
| |
| # >>> mt = mtable(displaystyle=True) |
| # >>> mt |
| # mtable(displaystyle=True) |
| # >>> math(mt).toprettyxml() |
| # '<math>\n <mtable displaystyle="true">\n </mtable>\n</math>' |
| |
| class mrow(math): |
| """Group sub-expressions as a horizontal row.""" |
| |
| def close(self): |
| """Close element and return first non-full parent or None. |
| |
| Remove <mrow>, if it is single child and the parent infers an mrow |
| or if it has only one child element. |
| """ |
| parent = self.parent |
| if isinstance(parent, MathRowSchema) and parent.nchildren == 1: |
| parent.nchildren = len(parent.children) |
| parent.children = self.children |
| for child in self.children: |
| child.parent = parent |
| return parent.close() |
| if len(self) == 1: |
| try: |
| parent.children[parent.children.index(self)] = self.children[0] |
| self.children[0].parent = parent |
| except (AttributeError, ValueError): |
| return self.children[0] |
| return super(mrow, self).close() |
| |
| # >>> mrow(displaystyle=False) |
| # mrow(displaystyle=False) |
| |
| # The elements <msqrt>, <mstyle>, <merror>, <mpadded>, <mphantom>, <menclose>, |
| # <mtd>, <mscarry>, and <math> treat their contents as a single inferred mrow |
| # formed from all their children. |
| class MathRowSchema(math): |
| """Base class for elements treating content as a single inferred mrow.""" |
| |
| class mtr(MathRowSchema): pass |
| |
| class mtd(MathRowSchema): pass |
| |
| class menclose(MathRowSchema): |
| nchildren = 1 # \boxed expects one argument or a group |
| |
| class mphantom(MathRowSchema): |
| nchildren = 1 # \phantom expects one argument or a group |
| |
| class msqrt(MathRowSchema): |
| nchildren = 1 # \sqrt expects one argument or a group |
| |
| class mstyle(MathRowSchema): |
| nchildren = 1 # \mathrm, ... expect one argument or a group |
| |
| |
| class MathToken(math): |
| """Token Element: contains textual data instead of children. |
| |
| Base class for mo, mi, and mn. |
| """ |
| nchildren = 0 |
| |
| def __init__(self, data, **attributes): |
| self.data = data |
| super(MathToken, self).__init__(**attributes) |
| |
| def _xml_body(self, level=0): |
| return [unicode(self.data).translate(self.xml_entities)] |
| |
| class mtext(MathToken): pass |
| class mi(MathToken): pass |
| class mo(MathToken): pass |
| class mn(MathToken): pass |
| |
| # >>> mo(u'<') |
| # mo('<') |
| # >>> mo(u'<')._xml() |
| # ['<mo>', '<', '</mo>'] |
| |
| class MathSchema(math): |
| """Base class for schemata expecting 2 or more children. |
| |
| The special attribute `switch` indicates that the last two child |
| elements are in reversed order and must be switched before XML-export. |
| """ |
| |
| nchildren = 2 |
| |
| def __init__(self, *children, **kwargs): |
| self.switch = kwargs.pop('switch', False) |
| math.__init__(self, *children, **kwargs) |
| |
| def append(self, child): |
| current_node = super(MathSchema, self).append(child) |
| # normalize order if full |
| if self.switch and self.full(): |
| self.children[-1], self.children[-2] = self.children[-2], self.children[-1] |
| self.switch = False |
| return current_node |
| |
| class msub(MathSchema): pass |
| class msup(MathSchema): pass |
| class msubsup(MathSchema): |
| nchildren = 3 |
| |
| # >>> msub(mi('x'), mo('-')) |
| # msub(mi('x'), mo('-')) |
| # >>> msubsup(mi('base'), mi('sub'), mi('super')) |
| # msubsup(mi('base'), mi('sub'), mi('super')) |
| # >>> msubsup(mi('base'), mi('super'), mi('sub'), switch=True) |
| # msubsup(mi('base'), mi('sub'), mi('super')) |
| |
| class munder(msub): pass |
| class mover(msup): pass |
| |
| # >>> munder(mi('lim'), mo('-'), accent=False) |
| # munder(mi('lim'), mo('-'), accent=False) |
| # >>> mu = munder(mo('-'), accent=False, switch=True) |
| # >>> mu |
| # munder(mo('-'), switch=True, accent=False) |
| # >>> mu.append(mi('lim')) |
| # >>> mu |
| # munder(mi('lim'), mo('-'), accent=False) |
| # >>> mu.append(mi('lim')) |
| # Traceback (most recent call last): |
| # SyntaxError: Node munder(mi('lim'), mo('-'), accent=False) already full! |
| # >>> munder(mo('-'), mi('lim'), accent=False, switch=True).toprettyxml() |
| # '<munder accent="false">\n <mi>lim</mi>\n <mo>-</mo>\n</munder>' |
| |
| class munderover(msubsup): pass |
| |
| class mroot(MathSchema): |
| nchildren = 2 |
| |
| class mfrac(math): |
| nchildren = 2 |
| |
| class mspace(math): |
| nchildren = 0 |
| |
| |
| # LaTeX to MathML translation |
| # --------------------------- |
| |
| # auxiliary functions |
| # ~~~~~~~~~~~~~~~~~~~ |
| |
| def tex_cmdname(string): |
| """Return leading TeX command name and remainder of `string`. |
| |
| >>> tex_cmdname('mymacro2') # up to first non-letter |
| ('mymacro', '2') |
| >>> tex_cmdname('name 2') # strip trailing whitespace |
| ('name', '2') |
| >>> tex_cmdname('_2') # single non-letter character |
| ('_', '2') |
| |
| """ |
| m = re.match(r'([a-zA-Z]+) *(.*)', string) |
| if m is None: |
| m = re.match(r'(.?)(.*)', string) |
| return m.group(1), m.group(2) |
| |
| # Test: |
| # |
| # >>> tex_cmdname('name_2') # first non-letter terminates |
| # ('name', '_2') |
| # >>> tex_cmdname(' next') # leading whitespace is returned |
| # (' ', 'next') |
| # >>> tex_cmdname('1 2') # whitespace after non-letter is kept |
| # ('1', ' 2') |
| # >>> tex_cmdname('') # empty string |
| # ('', '') |
| |
| def tex_number(string): |
| """Return leading number literal and remainder of `string`. |
| |
| >>> tex_number('123.4') |
| ('123.4', '') |
| |
| """ |
| m = re.match(r'([0-9.,]*[0-9]+)(.*)', string) |
| if m is None: |
| return '', string |
| return m.group(1), m.group(2) |
| |
| # Test: |
| # |
| # >>> tex_number(' 23.4b') # leading whitespace -> no number |
| # ('', ' 23.4b') |
| # >>> tex_number('23,400/2') # comma separator included |
| # ('23,400', '/2') |
| # >>> tex_number('23. 4/2') # trailing separator not included |
| # ('23', '. 4/2') |
| # >>> tex_number('4, 2') # trailing separator not included |
| # ('4', ', 2') |
| # >>> tex_number('1 000.4') |
| # ('1', ' 000.4') |
| |
| def tex_token(string): |
| """Return first simple TeX token and remainder of `string`. |
| |
| >>> tex_token('\\command{without argument}') |
| ('\\command', '{without argument}') |
| >>> tex_token('or first character') |
| ('o', 'r first character') |
| |
| """ |
| m = re.match(r"""((?P<cmd>\\[a-zA-Z]+)\s* # TeX command, skip whitespace |
| |(?P<chcmd>\\.) # one-character TeX command |
| |(?P<ch>.?)) # first character (or empty) |
| (?P<remainder>.*$) # remaining part of string |
| """, string, re.VERBOSE) |
| cmd, chcmd, ch, remainder = m.group('cmd', 'chcmd', 'ch', 'remainder') |
| return cmd or chcmd or ch, remainder |
| |
| # Test: |
| # |
| # >>> tex_token('{opening bracket of group}') |
| # ('{', 'opening bracket of group}') |
| # >>> tex_token('\\skip whitespace after macro name') |
| # ('\\skip', 'whitespace after macro name') |
| # >>> tex_token('. but not after single char') |
| # ('.', ' but not after single char') |
| # >>> tex_token('') # empty string. |
| # ('', '') |
| # >>> tex_token('\{escaped bracket') |
| # ('\\{', 'escaped bracket') |
| |
| def tex_group(string): |
| """Return first TeX group or token and remainder of `string`. |
| |
| >>> tex_group('{first group} returned without brackets') |
| ('first group', ' returned without brackets') |
| |
| """ |
| split_index = 0 |
| nest_level = 0 # level of {{nested} groups} |
| escape = False # the next character is escaped (\) |
| |
| if not string.startswith('{'): |
| # special case: there is no group, return first token and remainder |
| return string[:1], string[1:] |
| for c in string: |
| split_index += 1 |
| if escape: |
| escape = False |
| elif c == '\\': |
| escape = True |
| elif c == '{': |
| nest_level += 1 |
| elif c == '}': |
| nest_level -= 1 |
| if nest_level == 0: |
| break |
| else: |
| raise SyntaxError('Group without closing bracket') |
| return string[1:split_index-1], string[split_index:] |
| |
| |
| |
| # >>> tex_group('{} empty group') |
| # ('', ' empty group') |
| # >>> tex_group('{group with {nested} group} ') |
| # ('group with {nested} group', ' ') |
| # >>> tex_group('{group with {nested group}} at the end') |
| # ('group with {nested group}', ' at the end') |
| # >>> tex_group('{{group} {with {{complex }nesting}} constructs}') |
| # ('{group} {with {{complex }nesting}} constructs', '') |
| # >>> tex_group('{group with \\{escaped\\} brackets}') |
| # ('group with \\{escaped\\} brackets', '') |
| # >>> tex_group('{group followed by closing bracket}} from outer group') |
| # ('group followed by closing bracket', '} from outer group') |
| # >>> tex_group('No group? Return first character.') |
| # ('N', 'o group? Return first character.') |
| # >>> tex_group(' {also whitespace}') |
| # (' ', '{also whitespace}') |
| |
| |
| def tex_token_or_group(string): |
| """Return first TeX group or token and remainder of `string`. |
| |
| >>> tex_token_or_group('\\command{without argument}') |
| ('\\command', '{without argument}') |
| >>> tex_token_or_group('first character') |
| ('f', 'irst character') |
| >>> tex_token_or_group(' also whitespace') |
| (' ', 'also whitespace') |
| >>> tex_token_or_group('{first group} keep rest') |
| ('first group', ' keep rest') |
| |
| """ |
| arg, remainder = tex_token(string) |
| if arg == '{': |
| arg, remainder = tex_group(string.lstrip()) |
| return arg, remainder |
| |
| # >>> tex_token_or_group('\{no group but left bracket') |
| # ('\\{', 'no group but left bracket') |
| |
| def tex_optarg(string): |
| """Return optional argument and remainder. |
| |
| >>> tex_optarg('[optional argument] returned without brackets') |
| ('optional argument', ' returned without brackets') |
| >>> tex_optarg('{empty string, if there is no optional arg}') |
| ('', '{empty string, if there is no optional arg}') |
| |
| """ |
| m = re.match(r"""\s* # leading whitespace |
| \[(?P<optarg>(\\]|[^\[\]]|\\])*)\] # [group] without nested groups |
| (?P<remainder>.*$) |
| """, string, re.VERBOSE) |
| if m is None and not string.startswith('['): |
| return '', string |
| try: |
| return m.group('optarg'), m.group('remainder') |
| except AttributeError: |
| raise SyntaxError('Could not extract optional argument from %r' % string) |
| |
| # Test: |
| # >>> tex_optarg(' [optional argument] after whitespace') |
| # ('optional argument', ' after whitespace') |
| # >>> tex_optarg('[missing right bracket') |
| # Traceback (most recent call last): |
| # SyntaxError: Could not extract optional argument from '[missing right bracket' |
| # >>> tex_optarg('[group with [nested group]]') |
| # Traceback (most recent call last): |
| # SyntaxError: Could not extract optional argument from '[group with [nested group]]' |
| |
| |
| def parse_latex_math(node, string): |
| """Append MathML conversion of `string` to `node` and return it. |
| |
| >>> parse_latex_math(math(), r'\alpha') |
| math(mi('α')) |
| >>> parse_latex_math(mrow(), r'x_{n}') |
| mrow(msub(mi('x'), mi('n'))) |
| |
| """ |
| # Normalize white-space: |
| string = ' '.join(string.split()) |
| tree = node |
| |
| while len(string) > 0: |
| # Take off first character: |
| c, string = string[0], string[1:] |
| |
| if c == ' ': |
| continue # whitespace is ignored in LaTeX math mode |
| if c == '\\': # start of a LaTeX macro |
| cmdname, string = tex_cmdname(string) |
| node, string = handle_cmd(cmdname, node, string) |
| elif c in "_^": |
| node = handle_script_or_limit(node, c) |
| elif c == '{': |
| new_node = mrow() |
| node.append(new_node) |
| node = new_node |
| elif c == '}': |
| node = node.close() |
| elif c == '&': |
| new_node = mtd() |
| node.close().append(new_node) |
| node = new_node |
| elif c.isalpha(): |
| node = node.append(mi(c)) |
| elif c.isdigit(): |
| number, string = tex_number(string) |
| node = node.append(mn(c+number)) |
| elif c in anomalous_chars: |
| # characters with a special meaning in LaTeX math mode |
| # fix spacing before "unary" minus. |
| attributes = {} |
| if c == '-' and node.children: |
| previous_node = node.children[-1] |
| if (getattr(previous_node, 'data', '-') in '([=' |
| or previous_node.get('class') == 'mathopen'): |
| attributes['form'] = 'prefix' |
| node = node.append(mo(anomalous_chars[c], **attributes)) |
| elif c in "/()[]|": |
| node = node.append(mo(c, stretchy=False)) |
| elif c in "+*=<>,.!?`';@": |
| node = node.append(mo(c)) |
| else: |
| raise SyntaxError(u'Unsupported character: "%s"' % c) |
| return tree |
| |
| # Test: |
| |
| # >>> print(parse_latex_math(math(), '')) |
| # math() |
| # >>> parse_latex_math(math(), ' \\sqrt{ \\alpha}') |
| # math(msqrt(mi('α'))) |
| # >>> parse_latex_math(math(), '23.4x') |
| # math(mn('23.4'), mi('x')) |
| # >>> parse_latex_math(math(), '\\sqrt 2 \\ne 3') |
| # math(msqrt(mn('2')), mo('≠'), mn('3')) |
| # >>> parse_latex_math(math(), '\\sqrt{2 + 3} < 3') |
| # math(msqrt(mn('2'), mo('+'), mn('3')), mo('<'), mn('3')) |
| # >>> parse_latex_math(math(), '\\sqrt[3]{2 + 3}') |
| # math(mroot(mrow(mn('2'), mo('+'), mn('3')), mn('3'))) |
| # >>> parse_latex_math(math(), '\max_x') # function takes limits |
| # math(munder(mo('max', movablelimits=True), mi('x'))) |
| # >>> parse_latex_math(math(), 'x^j_i') # ensure correct order: base, sub, sup |
| # math(msubsup(mi('x'), mi('i'), mi('j'))) |
| # >>> parse_latex_math(math(), '\int^j_i') # ensure correct order |
| # math(msubsup(mo('∫'), mi('i'), mi('j'))) |
| # >>> parse_latex_math(math(), 'x_{\\alpha}') |
| # math(msub(mi('x'), mi('α'))) |
| # >>> parse_latex_math(math(), 'x_\\text{in}') |
| # math(msub(mi('x'), mtext('in'))) |
| |
| def handle_cmd(name, node, string): |
| """Process LaTeX command `name` followed by `string`. |
| |
| Append result to `node`. |
| If needed, parse `string` for command argument. |
| Return new current node and remainder of `string`: |
| |
| >>> handle_cmd('hbar', math(), r' \frac') |
| (math(mi('ℏ')), ' \\frac') |
| >>> handle_cmd('hspace', math(), r'{1ex} (x)') |
| (math(mspace(width='1ex')), ' (x)') |
| |
| """ |
| |
| # Token elements |
| # ============== |
| |
| # identifier -> <mi> |
| |
| if name in letters: |
| new_node = mi(letters[name]) |
| if name in greek_capitals: |
| # upright in "TeX style" but MathML sets them italic ("ISO style"). |
| # CSS styling does not change the font style in Firefox 78. |
| # Use 'mathvariant="normal"'? |
| new_node['class'] = 'capital-greek' |
| node = node.append(new_node) |
| return node, string |
| |
| if name in functions: |
| # use <mi> followed by invisible function applicator character |
| # (see https://www.w3.org/TR/MathML3/chapter3.html#presm.mi) |
| if name == 'operatorname': |
| # custom function name, e.g. ``\operatorname{abs}(x)`` |
| # TODO: \operatorname* -> with limits |
| arg, string = tex_token_or_group(string) |
| new_node = mi(arg, mathvariant='normal') |
| else: |
| new_node = mi(functions[name]) |
| # embellished function names: |
| if name == 'varliminf': # \underline\lim |
| new_node = munder(new_node, mo(u'_')) |
| elif name == 'varlimsup': # \overline\lim |
| new_node = mover(new_node, mo(u'¯'), accent=False) |
| elif name == 'varprojlim': # \underleftarrow\lim |
| new_node = munder(new_node, mo(u'\u2190')) |
| elif name == 'varinjlim': # \underrightarrow\lim |
| new_node = munder(new_node, mo(u'\u2192')) |
| |
| node = node.append(new_node) |
| # add ApplyFunction when appropriate (not \sin^2(x), say) |
| # cf. https://www.w3.org/TR/MathML3/chapter3.html#presm.mi |
| if string and string[0] not in ('^', '_'): |
| node = node.append(mo(u'\u2061')) # ⁡ |
| return node, string |
| |
| if name in math_alphabets: |
| if name == 'boldsymbol': |
| attributes = {'class': 'boldsymbol'} |
| else: |
| attributes = {'mathvariant': math_alphabets[name]} |
| if name == 'mathscr': |
| attributes['class'] = 'mathscr' |
| # Check for single symbol (letter, name, or ⅀) |
| arg, remainder = tex_token_or_group(string) |
| if arg.startswith('\\'): |
| # convert single letters (so the isalpha() test below works). |
| # TODO: convert all LICRs in a group (\matrm{\mu\Omega}) |
| arg = letters.get(arg[1:], arg) |
| if name == 'mathbb': |
| # mathvariant="double-struck" is ignored for Greek letters |
| # (tested in Firefox 78). Use literal Unicode characters. |
| arg = mathbb.get(arg, arg) |
| if arg.isalpha() or arg == u'\u2140': |
| node = node.append(mi(arg, **attributes)) |
| return node, remainder |
| # Wrap in <style> |
| style = mstyle(**attributes) |
| node.append(style) |
| return style, string |
| |
| |
| # operator, fence, or separator -> <mo> |
| |
| if name == 'colon': # trailing punctuation, not binary relation |
| node = node.append(mo(':', form='postfix', lspace='0', rspace='0.28em')) |
| return node, string |
| |
| if name == 'idotsint': |
| node = parse_latex_math(node, '\int\dotsi\int') |
| return node, string |
| |
| if name in thick_operators: |
| node = node.append(mo(thick_operators[name], style='font-weight: bold')) |
| return node, string |
| |
| if name in small_operators: |
| node = node.append(mo(small_operators[name], mathsize='75%')) |
| return node, string |
| |
| if name in operators: |
| attributes = {} |
| if name in movablelimits and string and string[0] in ' _^': |
| attributes['movablelimits'] = True |
| elif name in ('lvert', 'lVert'): |
| attributes['class'] = 'mathopen' |
| node = node.append(mo(operators[name], **attributes)) |
| return node, string |
| |
| if name in bigdelimiters: |
| delimiter_attributes = {} |
| size = delimiter_sizes[bigdelimiters[name]] |
| delimiter, string = tex_token_or_group(string) |
| if delimiter not in '()[]/|.': |
| try: |
| delimiter = stretchables[delimiter.lstrip('\\')] |
| except KeyError: |
| raise SyntaxError(u'Unsupported "\\%s" delimiter "%s"!' |
| % (name, delimiter)) |
| if size: |
| delimiter_attributes['maxsize'] = size |
| delimiter_attributes['minsize'] = size |
| delimiter_attributes['symmetric'] = True |
| if name == 'left' or name.endswith('l'): |
| row = mrow() |
| node.append(row) |
| node = row |
| if delimiter != '.': # '.' stands for "empty delimiter" |
| node.append(mo(delimiter, **delimiter_attributes)) |
| if name == 'right' or name.endswith('r'): |
| node = node.close() |
| return node, string |
| |
| if name == 'not': |
| arg, string = tex_token(string) |
| if arg == '{': |
| return node, '{\\not ' + string |
| if arg.startswith('\\'): # LaTeX macro |
| try: |
| arg = operators[arg[1:]] |
| except KeyError: |
| raise SyntaxError(u'\\not: Cannot negate: "%s"!'%arg) |
| arg = unicodedata.normalize('NFC', arg+u'\u0338') |
| node = node.append(mo(arg)) |
| return node, string |
| |
| # arbitrary text (usually comments) -> <mtext> |
| if name in ('text', 'mbox', 'textrm'): |
| arg, string = tex_token_or_group(string) |
| parts = arg.split('$') # extract inline math |
| for i, part in enumerate(parts): |
| if i % 2 == 0: # i is even |
| part = re.sub('(^ | $)', u'\u00a0', part) |
| node = node.append(mtext(part)) |
| else: |
| parse_latex_math(node, part) |
| return node, string |
| |
| # horizontal space -> <mspace> |
| if name in spaces: |
| node = node.append(mspace(width='%s'%spaces[name])) |
| return node, string |
| |
| if name in ('hspace', 'mspace'): |
| arg, string = tex_group(string) |
| if arg.endswith('mu'): |
| arg = '%sem' % (float(arg[:-2])/18) |
| node = node.append(mspace(width='%s'%arg)) |
| return node, string |
| |
| if name == 'phantom': |
| new_node = mphantom() |
| node.append(new_node) |
| return new_node, string |
| |
| if name == 'boxed': |
| new_node = menclose(notation='box') |
| node.append(new_node) |
| return new_node, string |
| |
| |
| # Complex elements (Layout schemata) |
| # ================================== |
| |
| if name == 'sqrt': |
| radix, string = tex_optarg(string) |
| if radix: |
| indexnode = mrow() |
| new_node = mroot(indexnode, switch=True) |
| parse_latex_math(indexnode, radix) |
| indexnode.close() |
| else: |
| new_node = msqrt() |
| node.append(new_node) |
| return new_node, string |
| |
| if name in fractions: |
| (style_atts, frac_atts) = fractions[name] |
| if name == 'cfrac': |
| optarg, string = tex_optarg(string) |
| optargs = {'l': 'left', 'r': 'right'} |
| if optarg in optargs: |
| frac_atts = frac_atts.copy() |
| frac_atts['numalign'] = optargs[optarg] # "numalign" is deprecated |
| frac_atts['class'] = 'numalign-' + optargs[optarg] |
| new_node = frac = mfrac(**frac_atts) |
| if name.endswith('binom'): |
| new_node = mrow(mo('('), new_node, mo(')'), CLASS='binom') |
| new_node.nchildren = 3 |
| if style_atts: |
| new_node = mstyle(new_node, **style_atts) |
| node.append(new_node) |
| return frac, string |
| |
| if name == '\\': # end of a row |
| entry = mtd() |
| new_node = mtr(entry) |
| node.close().close().append(new_node) |
| return entry, string |
| |
| if name in accents: |
| new_node = mover(mo(accents[name][0], stretchy=False), switch=True) |
| if name == 'vec': |
| new_node.children[0]['accent'] = False # scale down arrow but drop i-dot |
| new_node.tex_cmd = name # for HTML export |
| node.append(new_node) |
| return new_node, string |
| |
| if name in over: |
| # set "accent" to False (otherwise dots on i and j are dropped) |
| # but to True on accent node get "textstyle" (full size) symbols on top |
| new_node = mover(mo(over[name][0], accent=True), |
| switch=True, accent=False) |
| new_node.tex_cmd = name # for HTML export |
| node.append(new_node) |
| return new_node, string |
| |
| if name == 'overset': |
| new_node = mover(switch=True) |
| node.append(new_node) |
| return new_node, string |
| |
| if name in under: |
| new_node = munder(mo(under[name][0]), switch=True) |
| new_node.tex_cmd = name # for HTML export |
| node.append(new_node) |
| return new_node, string |
| |
| if name == 'underset': |
| new_node = munder(switch=True) |
| node.append(new_node) |
| return new_node, string |
| |
| if name in ('xleftarrow', 'xrightarrow'): |
| subscript, string = tex_optarg(string) |
| base = mo(operators['long'+name[1:]]) |
| if subscript: |
| new_node = munderover(base) |
| sub_node = parse_latex_math(mrow(), subscript) |
| if len(sub_node) == 1: |
| sub_node = sub_node.children[0] |
| new_node.append(sub_node) |
| else: |
| new_node = mover(base) |
| node.append(new_node) |
| return new_node, string |
| |
| if name in layout_styles: # 'displaystyle', 'textstyle', ... |
| new_node = mstyle(**layout_styles[name]) |
| new_node.nchildren = None |
| if isinstance(node, mrow) and len(node) == 0: |
| # replace node with new_node |
| node.parent.children[node.parent.children.index(node)] = new_node |
| new_node.parent = node.parent |
| elif node.__class__.__name__ == 'math': |
| node.append(new_node) |
| else: |
| raise SyntaxError(u'Declaration "\\%s" must be first command ' |
| u'in a group.' % name) |
| return new_node, string |
| |
| if name.endswith('limits'): |
| arg, remainder = tex_token(string) |
| if arg in '_^': # else ignore |
| string = remainder |
| node = handle_script_or_limit(node, arg, limits=name) |
| return node, string |
| |
| # Environments |
| |
| if name == 'begin': |
| return begin_environment(node, string) |
| |
| if name == 'end': |
| return end_environment(node, string) |
| |
| raise SyntaxError(u'Unknown LaTeX command: ' + name) |
| |
| # >>> handle_cmd('left', math(), '[a\\right]') |
| # (mrow(mo('[')), 'a\\right]') |
| # >>> handle_cmd('left', math(), '. a)') # empty \left |
| # (mrow(), ' a)') |
| # >>> handle_cmd('left', math(), '\\uparrow a)') # cmd |
| # (mrow(mo('↑')), 'a)') |
| # >>> handle_cmd('not', math(), '\\equiv \\alpha)') # cmd |
| # (math(mo('≢')), '\\alpha)') |
| # >>> handle_cmd('text', math(), '{ for } i>0') # group |
| # (math(mtext('\xa0for\xa0')), ' i>0') |
| # >>> handle_cmd('text', math(), '{B}T') # group |
| # (math(mtext('B')), 'T') |
| # >>> handle_cmd('text', math(), '{number of apples}}') # group |
| # (math(mtext('number of apples')), '}') |
| # >>> handle_cmd('text', math(), 'i \\sin(x)') # single char |
| # (math(mtext('i')), ' \\sin(x)') |
| # >>> handle_cmd('sin', math(), '(\\alpha)') |
| # (math(mi('sin'), mo('\u2061')), '(\\alpha)') |
| # >>> handle_cmd('sin', math(), ' \\alpha') |
| # (math(mi('sin'), mo('\u2061')), ' \\alpha') |
| # >>> handle_cmd('operatorname', math(), '{abs}(x)') |
| # (math(mi('abs', mathvariant='normal'), mo('\u2061')), '(x)') |
| # >>> handle_cmd('mathrm', math(), '\\alpha') |
| # (math(mi('α', mathvariant='normal')), '') |
| # >>> handle_cmd('mathrm', math(), '{out} = 3') |
| # (math(mi('out', mathvariant='normal')), ' = 3') |
| # >>> handle_cmd('overline', math(), '{981}') |
| # (mover(mo('¯', accent=True), switch=True, accent=False), '{981}') |
| # >>> handle_cmd('bar', math(), '{x}') |
| # (mover(mo('ˉ', stretchy=False), switch=True), '{x}') |
| # >>> handle_cmd('xleftarrow', math(), r'[\alpha]{10}') |
| # (munderover(mo('⟵'), mi('α')), '{10}') |
| # >>> handle_cmd('xleftarrow', math(), r'[\alpha=5]{10}') |
| # (munderover(mo('⟵'), mrow(mi('α'), mo('='), mn('5'))), '{10}') |
| |
| def handle_script_or_limit(node, c, limits=''): |
| """Append script or limit element to `node`.""" |
| child = node.children.pop() |
| if limits == 'limits': |
| child['movablelimits'] = False |
| elif (limits == 'movablelimits' |
| or getattr(child, 'data', '') in movablelimits): |
| child['movablelimits'] = True |
| |
| if c == '_': |
| if isinstance(child, mover): |
| new_node = munderover(*child.children, switch=True) |
| elif isinstance(child, msup): |
| new_node = msubsup(*child.children, switch=True) |
| elif (limits in ('limits', 'movablelimits') |
| or limits == '' |
| and child.get('movablelimits', None) == True): |
| new_node = munder(child) |
| else: |
| new_node = msub(child) |
| elif c == '^': |
| if isinstance(child, munder): |
| new_node = munderover(*child.children) |
| elif isinstance(child, msub): |
| new_node = msubsup(*child.children) |
| elif (limits in ('limits', 'movablelimits') |
| or limits == '' |
| and child.get('movablelimits', None) == True): |
| new_node = mover(child) |
| else: |
| new_node = msup(child) |
| node.append(new_node) |
| return new_node |
| |
| |
| def begin_environment(node, string): |
| name, string = tex_group(string) |
| if name in matrices: |
| left_delimiter = matrices[name][0] |
| attributes = {} |
| if left_delimiter: |
| wrapper = mrow(mo(left_delimiter)) |
| if name == 'cases': |
| wrapper = mrow(mo(left_delimiter, rspace='0.17em')) |
| attributes['columnalign'] = 'left' |
| node.append(wrapper) |
| node = wrapper |
| elif name == 'smallmatrix': |
| attributes['rowspacing'] = '0.02em' |
| attributes['columnspacing'] = '0.333em' |
| wrapper = mstyle(scriptlevel='1') |
| node.append(wrapper) |
| node = wrapper |
| # TODO: aligned, alignedat |
| # take an optional [t], [b] or the default [c] |
| entry = mtd() |
| node.append(mtable(mtr(entry), **attributes)) |
| node = entry |
| else: |
| raise SyntaxError(u'Environment not supported!') |
| return node, string |
| |
| |
| def end_environment(node, string): |
| name, string = tex_group(string) |
| if name in matrices: |
| node = node.close().close().close() # close: mtd, mdr, mtable |
| right_delimiter = matrices[name][1] |
| if right_delimiter: |
| node = node.append(mo(right_delimiter)) |
| node = node.close() |
| elif name == 'cases': |
| node = node.close() |
| else: |
| raise SyntaxError(u'Environment not supported!') |
| return node, string |
| |
| |
| # Return the number of "equation_columns" in `code_lines`. cf. "alignat" |
| # in http://mirror.ctan.org/macros/latex/required/amsmath/amsldoc.pdf |
| def tex_equation_columns(rows): |
| tabs = max(row.count('&') - row.count(r'\&') for row in rows) |
| if tabs == 0: |
| return 0 |
| return int(tabs/2 + 1) |
| |
| # >>> tex_equation_columns(['a = b']) |
| # 0 |
| # >>> tex_equation_columns(['a &= b']) |
| # 1 |
| # >>> tex_equation_columns(['a &= b & a \in S']) |
| # 2 |
| # >>> tex_equation_columns(['a &= b & c &= d']) |
| # 2 |
| |
| |
| # Return dictionary with attributes to style an <mtable> as align environment: |
| def align_attributes(rows): |
| atts = {'class': 'align', |
| 'displaystyle': True} |
| tabs = max(row.count('&') - row.count(r'\&') for row in rows) |
| if tabs: |
| aligns = ['right', 'left'] * tabs |
| spacing = ['0', '2em'] * tabs |
| atts['columnalign'] = ' '.join(aligns[:tabs+1]) |
| atts['columnspacing'] = ' '.join(spacing[:tabs]) |
| return atts |
| |
| # >>> align_attributes(['a = b']) |
| # {'class': 'align', 'displaystyle': True} |
| # >>> align_attributes(['a &= b']) |
| # {'class': 'align', 'displaystyle': True, 'columnalign': 'right left', 'columnspacing': '0'} |
| # >>> align_attributes(['a &= b & a \in S']) |
| # {'class': 'align', 'displaystyle': True, 'columnalign': 'right left right', 'columnspacing': '0 2em'} |
| # >>> align_attributes(['a &= b & c &= d']) |
| # {'class': 'align', 'displaystyle': True, 'columnalign': 'right left right left', 'columnspacing': '0 2em 0'} |
| |
| |
| def tex2mathml(tex_math, inline=True): |
| """Return string with MathML code corresponding to `tex_math`. |
| |
| Set `inline` to False for displayed math. |
| """ |
| # Set up tree |
| math_tree = math(xmlns='http://www.w3.org/1998/Math/MathML') |
| node = math_tree |
| if not inline: |
| math_tree['display'] = 'block' |
| rows = toplevel_code(tex_math).split(r'\\') |
| if len(rows) > 1: |
| # emulate align* environment with a math table |
| node = mtd() |
| math_tree.append(mtable(mtr(node), |
| **align_attributes(rows))) |
| parse_latex_math(node, tex_math) |
| return math_tree.toprettyxml() |
| |
| # >>> print(tex2mathml('3')) |
| # <math xmlns="http://www.w3.org/1998/Math/MathML"> |
| # <mn>3</mn> |
| # </math> |
| # >>> print(tex2mathml('3', inline=False)) |
| # <math xmlns="http://www.w3.org/1998/Math/MathML" display="block"> |
| # <mn>3</mn> |
| # </math> |
| # >>> print(tex2mathml(r'a & b \\ c & d', inline=False)) |
| # <math xmlns="http://www.w3.org/1998/Math/MathML" display="block"> |
| # <mtable class="align" columnalign="right left" columnspacing="0" displaystyle="true"> |
| # <mtr> |
| # <mtd> |
| # <mi>a</mi> |
| # </mtd> |
| # <mtd> |
| # <mi>b</mi> |
| # </mtd> |
| # </mtr> |
| # <mtr> |
| # <mtd> |
| # <mi>c</mi> |
| # </mtd> |
| # <mtd> |
| # <mi>d</mi> |
| # </mtd> |
| # </mtr> |
| # </mtable> |
| # </math> |
| # >>> print(tex2mathml(r'a \\ b', inline=False)) |
| # <math xmlns="http://www.w3.org/1998/Math/MathML" display="block"> |
| # <mtable class="align" displaystyle="true"> |
| # <mtr> |
| # <mtd> |
| # <mi>a</mi> |
| # </mtd> |
| # </mtr> |
| # <mtr> |
| # <mtd> |
| # <mi>b</mi> |
| # </mtd> |
| # </mtr> |
| # </mtable> |
| # </math> |
| |
| |
| # TODO: look up more symbols from tr25, e.g. |
| # |
| # |
| # Table 2.8 Using Vertical Line or Solidus Overlay |
| # some of the negated forms of mathematical relations that can only be |
| # encoded by using either U+0338 COMBINING LONG SOLIDUS OVERLAY or U+20D2 |
| # COMBINING LONG VERTICAL LINE OVERLAY . (For issues with using 0338 in |
| # MathML, see Section 3.2.7, Combining Marks. |
| # |
| # Table 2.9 Variants of Mathematical Symbols using VS1? |
| # |
| # Sequence Description |
| # 0030 + VS1 DIGIT ZERO - short diagonal stroke form |
| # 2205 + VS1 EMPTY SET - zero with long diagonal stroke overlay form |
| # 2229 + VS1 INTERSECTION - with serifs |
| # 222A + VS1 UNION - with serifs |
| # 2268 + VS1 LESS-THAN BUT NOT EQUAL TO - with vertical stroke |
| # 2269 + VS1 GREATER-THAN BUT NOT EQUAL TO - with vertical stroke |
| # 2272 + VS1 LESS-THAN OR EQUIVALENT TO - following the slant of the lower leg |
| # 2273 + VS1 GREATER-THAN OR EQUIVALENT TO - following the slant of the lower leg |
| # 228A + VS1 SUBSET OF WITH NOT EQUAL TO - variant with stroke through bottom members |
| # 228B + VS1 SUPERSET OF WITH NOT EQUAL TO - variant with stroke through bottom members |
| # 2293 + VS1 SQUARE CAP - with serifs |
| # 2294 + VS1 SQUARE CUP - with serifs |
| # 2295 + VS1 CIRCLED PLUS - with white rim |
| # 2297 + VS1 CIRCLED TIMES - with white rim |
| # 229C + VS1 CIRCLED EQUALS - equal sign inside and touching the circle |
| # 22DA + VS1 LESS-THAN slanted EQUAL TO OR GREATER-THAN |
| # 22DB + VS1 GREATER-THAN slanted EQUAL TO OR LESS-THAN |
| # 2A3C + VS1 INTERIOR PRODUCT - tall variant with narrow foot |
| # 2A3D + VS1 RIGHTHAND INTERIOR PRODUCT - tall variant with narrow foot |
| # 2A9D + VS1 SIMILAR OR LESS-THAN - following the slant of the upper leg |
| # 2A9E + VS1 SIMILAR OR GREATER-THAN - following the slant of the upper leg |
| # 2AAC + VS1 SMALLER THAN OR slanted EQUAL |
| # 2AAD + VS1 LARGER THAN OR slanted EQUAL |
| # 2ACB + VS1 SUBSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members |
| # 2ACC + VS1 SUPERSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members |