python-docs-venv/lib/python3.11/site-packages/docutils/utils/math/latex2mathml.py - datasketches-python - Git at Google

 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 # :Id: $Id: latex2mathml.py 8878 2021-11-05 11:10:44Z milde $
 # :Copyright: © 2005 Jens Jørgen Mortensen [1]_
 #             © 2010, 2021 Günter Milde.
 #
 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
 #
 #    Copying and distribution of this file, with or without modification,
 #    are permitted in any medium without royalty provided the copyright
 #    notice and this notice are preserved.
 #    This file is offered as-is, without any warranty.
 #
 # .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
 #
 # .. [1] the original `rst2mathml.py` in `sandbox/jensj/latex_math`

 """Convert LaTex maths code into presentational MathML.

 This module is provisional:
 the API is not settled and may change with any minor Docutils version.
 """

 # Usage:
 #
 # >>> from latex2mathml import *

 import collections
 import copy
 import re
 import sys
 import unicodedata
 if sys.version_info >= (3, 0):
     unicode = str  # noqa

 from docutils.utils.math import tex2unichar, toplevel_code


 # Character data
 # --------------

 # LaTeX math macro to Unicode mappings.
 # Character categories.

 # identifiers -> <mi>

 letters = tex2unichar.mathalpha
 letters['hbar'] = u'\u210F' # compatibility mapping to ℏ (\hslash).
 # (ħ LATIN SMALL LETTER H WITH STROKE is upright)

 # special case: Capital Greek letters: (upright in TeX style)
 greek_capitals = {
     'Phi':u'\u03a6', 'Xi':u'\u039e', 'Sigma':u'\u03a3',
     'Psi':u'\u03a8', 'Delta':u'\u0394', 'Theta':u'\u0398',
     'Upsilon':u'\u03d2', 'Pi':u'\u03a0', 'Omega':u'\u03a9',
     'Gamma':u'\u0393', 'Lambda':u'\u039b'}

 # functions -> <mi>
 functions = {# functions with a space in the name
              'liminf': u'lim\u202finf',
              'limsup': u'lim\u202fsup',
              'injlim': u'inj\u202flim',
              'projlim': u'proj\u202flim',
              # embellished function names (see handle_cmd() below)
              'varlimsup': 'lim',
              'varliminf': 'lim',
              'varprojlim': 'lim',
              'varinjlim': 'lim',
              # custom function name
              'operatorname': None,
             }
 functions.update((name, name) for name in
                  ('arccos', 'arcsin', 'arctan', 'arg',  'cos',
                   'cosh',   'cot',    'coth',   'csc',  'deg',
                   'det',    'dim',    'exp',    'gcd',  'hom',
                   'ker',    'lg',     'ln',     'log',  'Pr',
                   'sec',    'sin',    'sinh',   'tan',  'tanh'))
 # Function with limits: 'lim', 'sup', 'inf', 'max', 'min':
 # use <mo> to allow "movablelimits" attribute (see below).


 # math font selection -> <mi mathvariant=...> or <mstyle mathvariant=...>
 math_alphabets = {# 'cmdname': 'mathvariant value'  # package
                   'boldsymbol': 'bold',
                   'mathbf':     'bold',
                   'mathit':     'italic',
                   'mathtt':     'monospace',
                   'mathrm':     'normal',
                   'mathsf':     'sans-serif',
                   'mathcal':    'script',
                   'mathbfit':   'bold-italic',            # isomath
                   'mathbb':     'double-struck',          # amssymb
                   'mathfrak':   'fraktur',                # amssymb
                   'mathsfit':   'sans-serif-italic',      # isomath
                   'mathsfbfit': 'sans-serif-bold-italic', # isomath
                   'mathscr':    'script',                 # mathrsfs
                   # unsupported: bold-fraktur
                   #              bold-script
                   #              bold-sans-serif
                  }

 # operator, fence, or separator -> <mo>


 stretchables = {# extensible delimiters allowed in left/right cmds
                 'backslash':   '\\',
                 'uparrow':     u'\u2191', # ↑ UPWARDS ARROW
                 'downarrow':   u'\u2193', # ↓ DOWNWARDS ARROW
                 'updownarrow': u'\u2195', # ↕ UP DOWN ARROW
                 'Uparrow':     u'\u21d1', # ⇑ UPWARDS DOUBLE ARROW
                 'Downarrow':   u'\u21d3', # ⇓ DOWNWARDS DOUBLE ARROW
                 'Updownarrow': u'\u21d5', # ⇕ UP DOWN DOUBLE ARROW
                 'lmoustache':  u'\u23b0', # ⎰ UPPER LEFT OR LOWER RIGHT CURLY BRACKET SECTION
                 'rmoustache':  u'\u23b1', # ⎱ UPPER RIGHT OR LOWER LEFT CURLY BRACKET SECTION
                 'arrowvert':   u'\u23d0', # ⏐ VERTICAL LINE EXTENSION
                 'bracevert':   u'\u23aa', # ⎪ CURLY BRACKET EXTENSION
                 'lvert':      u'|',      # left  |
                 'lVert':      u'\u2016', # left  ‖
                 'rvert':      u'|',      # right |
                 'rVert':      u'\u2016', # right ‖
                 'Arrowvert':  u'\u2016', # ‖
                }
 stretchables.update(tex2unichar.mathfence)
 stretchables.update(tex2unichar.mathopen)  # Braces
 stretchables.update(tex2unichar.mathclose) # Braces

 # >>> print(' '.join(sorted(set(stretchables.values()))))
 # [ \ ] { | } ‖ ↑ ↓ ↕ ⇑ ⇓ ⇕ ⌈ ⌉ ⌊ ⌋ ⌜ ⌝ ⌞ ⌟ ⎪ ⎰ ⎱ ⏐ ⟅ ⟆ ⟦ ⟧ ⟨ ⟩ ⟮ ⟯ ⦇ ⦈

 operators = {# negated symbols without pre-composed Unicode character
              'nleqq':      u'\u2266\u0338', # ≦̸
              'ngeqq':      u'\u2267\u0338', # ≧̸
              'nleqslant':  u'\u2a7d\u0338', # ⩽̸
              'ngeqslant':  u'\u2a7e\u0338', # ⩾̸
              'ngtrless':   u'\u2277\u0338', # txfonts
              'nlessgtr':   u'\u2276\u0338', # txfonts
              'nsubseteqq': u'\u2AC5\u0338', # ⫅̸
              'nsupseteqq': u'\u2AC6\u0338', # ⫆̸
              # compatibility definitions:
              'centerdot': u'\u2B1D', # BLACK VERY SMALL SQUARE | mathbin
              'varnothing': u'\u2300', # ⌀ DIAMETER SIGN | empty set
              'varpropto': u'\u221d', # ∝ PROPORTIONAL TO | sans serif
              'triangle': u'\u25B3', # WHITE UP-POINTING TRIANGLE | mathord
              'triangledown': u'\u25BD', # WHITE DOWN-POINTING TRIANGLE | mathord
              # alias commands:
              'dotsb': u'\u22ef', # ⋯ with binary operators/relations
              'dotsc': u'\u2026', # … with commas
              'dotsi': u'\u22ef', # ⋯ with integrals
              'dotsm': u'\u22ef', # ⋯ multiplication dots
              'dotso': u'\u2026', # … other dots
              # functions with movable limits (requires <mo>)
              'lim': 'lim',
              'sup': 'sup',
              'inf': 'inf',
              'max': 'max',
              'min': 'min',
             }
 operators.update(tex2unichar.mathbin)   # Binary symbols
 operators.update(tex2unichar.mathrel)   # Relation symbols, arrow symbols
 operators.update(tex2unichar.mathord)   # Miscellaneous symbols
 operators.update(tex2unichar.mathpunct) # Punctuation
 operators.update(tex2unichar.mathop)    # Variable-sized symbols
 operators.update(stretchables)


 # special cases

 thick_operators = {# style='font-weight: bold;'
                    'thicksim':   u'\u223C', # ∼
                    'thickapprox':u'\u2248', # ≈
                   }

 small_operators = {# mathsize='75%'
                    'shortmid':       u'\u2223', # ∣
                    'shortparallel':  u'\u2225', # ∥
                    'nshortmid':      u'\u2224', # ∤
                    'nshortparallel': u'\u2226', # ∦
                    'smallfrown':     u'\u2322', # ⌢ FROWN
                    'smallsmile':     u'\u2323', # ⌣ SMILE
                    'smallint':       u'\u222b', # ∫ INTEGRAL
                   }

 # Operators and functions with limits above/below in display formulas
 # and in index position inline (movablelimits=True)
 movablelimits = ('bigcap', 'bigcup', 'bigodot', 'bigoplus', 'bigotimes',
                   'bigsqcup', 'biguplus', 'bigvee', 'bigwedge',
                   'coprod', 'intop', 'ointop', 'prod', 'sum',
                   'lim', 'max', 'min', 'sup', 'inf')
 # Depending on settings, integrals may also be in this category.
 # (e.g. if "amsmath" is loaded with option "intlimits", see
 #  http://mirror.ctan.org/macros/latex/required/amsmath/amsldoc.pdf)
 # movablelimits.extend(('fint', 'iiiint', 'iiint', 'iint', 'int', 'oiint',
 #                       'oint', 'ointctrclockwise', 'sqint',
 #                       'varointclockwise',))

 # horizontal space -> <mspace>

 spaces = {'qquad':         '2em',       # two \quad
           'quad':          '1em',       # 18 mu
           'thickspace':    '0.2778em',  # 5mu = 5/18em
           'medspace':      '0.2222em',  # 4mu = 2/9em
           'thinspace':     '0.1667em',  # 3mu = 1/6em
           'negthinspace':  '-0.1667em', # -3mu = -1/6em
           'negmedspace':   '-0.2222em', # -4mu = -2/9em
           'negthickspace': '-0.2778em', # -5mu = -5/18em
           ' ':             '0.25em',    # inter word space
           ';':             '0.2778em',  # 5mu thickspace
           ':':             '0.2222em',  # 4mu medspace
           ',':             '0.1667em',  # 3mu thinspace
           '!':             '-0.1667em', # negthinspace
          }

 # accents -> <mover stretchy="false">
 accents = {# TeX:      (spacing, combining)
            'acute':    (u'´', u'\u0301'),
            'bar':      (u'ˉ', u'\u0304'),
            'breve':    (u'˘', u'\u0306'),
            'check':    (u'ˇ', u'\u030C'),
            'dot':      (u'˙', u'\u0307'),
            'ddot':     (u'¨', u'\u0308'),
            'dddot':    (u'⋯', u'\u20DB'),
            'grave':    (u'`', u'\u0300'),
            'hat':      (u'ˆ', u'\u0302'),
            'mathring': (u'˚', u'\u030A'),
            'tilde':    (u'˜', u'\u0303'), # tilde ~ or small tilde ˜?
            'vec':      (u'→', u'\u20d7'), # → too heavy, accents="false"
            # TODO: ddddot
        }

 # limits etc. -> <mover> or <munder>
 over = {# TeX:                  (char,     offset-correction/em)
         'overbrace':            (u'\u23DE', -0.2), # DejaVu Math -0.6
         'overleftarrow':        (u'\u2190', -0.2),
         'overleftrightarrow':   (u'\u2194', -0.2),
         'overline':             (u'_',      -0.2),   # \u2012' FIGURE DASH does not stretch
         'overrightarrow':       (u'\u2192', -0.2),
         'widehat':              (u'^',      -0.5),
         'widetilde':            (u'~',      -0.3),
        }
 under = {'underbrace':          (u'\u23DF',  0.1), # DejaVu Math -0.7
          'underleftarrow':      (u'\u2190', -0.2),
          'underleftrightarrow': (u'\u2194', -0.2),
          'underline':           (u'_',      -0.8),
          'underrightarrow':     (u'\u2192', -0.2),
         }

 # Character translations
 # ----------------------
 # characters with preferred alternative in mathematical use
 # cf. https://www.w3.org/TR/MathML3/chapter7.html#chars.anomalous
 anomalous_chars = {'-': u'\u2212', # HYPHEN-MINUS -> MINUS SIGN
                    ':': u'\u2236', # COLON -> RATIO
                    '~': u'\u00a0', # NO-BREAK SPACE
                   }

 # blackboard bold (Greek characters not working with "mathvariant" (Firefox 78)
 mathbb = {u'Γ': u'\u213E',    # ℾ
           u'Π': u'\u213F',    # ℿ
           u'Σ': u'\u2140',    # ⅀
           u'γ': u'\u213D',    # ℽ
           u'π': u'\u213C',    # ℼ
          }

 # Matrix environments
 matrices = {# name:    fences
             'matrix':  ('', ''),
             'smallmatrix':  ('', ''), # smaller, see begin_environment()!
             'pmatrix': ('(', ')'),
             'bmatrix': ('[', ']'),
             'Bmatrix': ('{', '}'),
             'vmatrix': ('|', '|'),
             'Vmatrix': (u'\u2016', u'\u2016'), # ‖
             'cases':   ('{', ''),
            }

 layout_styles = {
     'displaystyle':      {'displaystyle': True,  'scriptlevel': 0},
     'textstyle':         {'displaystyle': False, 'scriptlevel': 0},
     'scriptstyle':       {'displaystyle': False, 'scriptlevel': 1},
     'scriptscriptstyle': {'displaystyle': False, 'scriptlevel': 2},
     }
 # See also https://www.w3.org/TR/MathML3/chapter3.html#presm.scriptlevel

 fractions = {# name:   style_attrs, frac_attrs
              'frac':   ({}, {}),
              'cfrac':  ({'displaystyle': True,  'scriptlevel': 0,
                          'CLASS': 'cfrac'}, {}), # in LaTeX with padding
              'dfrac':  (layout_styles['displaystyle'], {}),
              'tfrac':  (layout_styles['textstyle'], {}),
              'binom':  ({}, {'linethickness': 0}),
              'dbinom': (layout_styles['displaystyle'], {'linethickness': 0}),
              'tbinom': (layout_styles['textstyle'], {'linethickness': 0}),
             }

 delimiter_sizes = ['', '1.2em', '1.623em', '2.047em', '2.470em']
 bigdelimiters = {'left':  0,
                  'right': 0,
                  'bigl':  1,
                  'bigr':  1,
                  'Bigl':  2,
                  'Bigr':  2,
                  'biggl': 3,
                  'biggr': 3,
                  'Biggl': 4,
                  'Biggr': 4,
                 }


 # MathML element classes
 # ----------------------

 class math(object):
     """Base class for MathML elements and root of MathML trees."""

     nchildren = None
     """Expected number of children or None"""
     # cf. https://www.w3.org/TR/MathML3/chapter3.html#id.3.1.3.2
     parent = None
     """Parent node in MathML DOM tree."""
     _level = 0 # indentation level (static class variable)
     xml_entities = { # for invalid and invisible characters
                     ord('<'): u'&lt;',
                     ord('>'): u'&gt;',
                     ord('&'): u'&amp;',
                     0x2061:   u'&ApplyFunction;',
                    }
     _boolstrings = {True: 'true', False: 'false'}
     """String representation of boolean MathML attribute values."""

     html_tagname = 'span'
     """Tag name for HTML representation."""

     def __init__(self, *children, **attributes):
         """Set up node with `children` and `attributes`.

         Attributes are downcased: Use CLASS to set "class" value.
         >>> math(mn(3), CLASS='test')
         math(mn(3), class='test')
         >>> math(CLASS='test').toprettyxml()
         '<math class="test">\n</math>'

         """
         self.children = []
         self.extend(children)

         self.attributes = collections.OrderedDict()
         # sort attributes for predictable functional tests
         # as self.attributes.update(attributes) does not keep order in Python < 3.6
         for key in sorted(attributes.keys()):
             # Use .lower() to allow argument `CLASS` for attribute `class`
             # (Python keyword). MathML uses only lowercase attributes.
             self.attributes[key.lower()] = attributes[key]

     def __repr__(self):
         content = [repr(item) for item in getattr(self, 'children', [])]
         if hasattr(self, 'data'):
             content.append(repr(self.data))
         if isinstance(self, MathSchema) and self.switch:
             content.append('switch=True')
         content += ["%s=%r"%(k, v) for k, v in self.attributes.items()
                     if v is not None]

         return self.__class__.__name__ + '(%s)' % ', '.join(content)

     def __len__(self):
         return len(self.children)

     # emulate dictionary-like access to attributes
     # see `docutils.nodes.Element` for dict/list interface
     def __getitem__(self, key):
         return self.attributes[key]
     def __setitem__(self, key, item):
         self.attributes[key] = item
     def get(self, *args, **kwargs):
         return self.attributes.get(*args, **kwargs)

     def full(self):
         """Return boolean indicating whether children may be appended."""
         return (self.nchildren is not None
                 and len(self) >= self.nchildren)

     def append(self, child):
         """Append child and return self or first non-full parent.

         If self is full, go up the tree and return first non-full node or
         `None`.
         """
         if self.full():
             raise SyntaxError('Node %s already full!' % self)
         self.children.append(child)
         child.parent = self
         if self.full():
             return self.close()
         return self

     def extend(self, children):
         for child in children:
             self.append(child)
         return self

     def close(self):
         """Close element and return first non-full parent or None."""
         parent = self.parent
         while parent is not None and parent.full():
             parent = parent.parent
         return parent

     def toprettyxml(self):
         """Return XML representation of self as string."""
         return ''.join(self._xml())

     def _xml(self, level=0):
         return ([self.xml_starttag()]
                 + self._xml_body(level)
                 + ['</%s>' % self.__class__.__name__])

     def xml_starttag(self):
         attrs = ['%s="%s"' % (k, str(v).replace('True', 'true').replace('False', 'false'))
                  for k, v in self.attributes.items()
                  if v is not None]
         return '<%s>' % ' '.join([self.__class__.__name__] + attrs)

     def _xml_body(self, level=0):
         xml = []
         for child in self.children:
             xml.extend(['\n', '  ' * (level+1)])
             xml.extend(child._xml(level+1))
         xml.extend(['\n', '  ' * level])
         return xml

 # >>> n2 = math(mn(2))
 # >>> n2
 # math(mn(2))
 # >>> n2.toprettyxml()
 # '<math>\n  <mn>2</mn>\n</math>'
 # >>> len(n2)
 # 1
 # >>> eq3 = math(id='eq3', display='block')
 # >>> eq3
 # math(display='block', id='eq3')
 # >>> eq3.toprettyxml()
 # '<math display="block" id="eq3">\n</math>'
 # >>> len(eq3)
 # 0
 # >>> math(CLASS='bold').xml_starttag()
 # '<math class="bold">'

 class mtable(math): pass

 # >>> mt = mtable(displaystyle=True)
 # >>> mt
 # mtable(displaystyle=True)
 # >>> math(mt).toprettyxml()
 # '<math>\n  <mtable displaystyle="true">\n  </mtable>\n</math>'

 class mrow(math):
     """Group sub-expressions as a horizontal row."""

     def close(self):
         """Close element and return first non-full parent or None.

         Remove <mrow>, if it is single child and the parent infers an mrow
         or if it has only one child element.
         """
         parent = self.parent
         if isinstance(parent, MathRowSchema) and parent.nchildren == 1:
             parent.nchildren = len(parent.children)
             parent.children = self.children
             for child in self.children:
                 child.parent = parent
             return parent.close()
         if len(self) == 1:
             try:
                 parent.children[parent.children.index(self)] = self.children[0]
                 self.children[0].parent = parent
             except (AttributeError, ValueError):
                 return self.children[0]
         return super(mrow, self).close()

 # >>> mrow(displaystyle=False)
 # mrow(displaystyle=False)

 # The elements <msqrt>, <mstyle>, <merror>, <mpadded>, <mphantom>, <menclose>,
 # <mtd>, <mscarry>, and <math> treat their contents as a single inferred mrow
 # formed from all their children.
 class MathRowSchema(math):
     """Base class for elements treating content as a single inferred mrow."""

 class mtr(MathRowSchema): pass

 class mtd(MathRowSchema): pass

 class menclose(MathRowSchema):
     nchildren = 1 # \boxed expects one argument or a group

 class mphantom(MathRowSchema):
     nchildren = 1 # \phantom expects one argument or a group

 class msqrt(MathRowSchema):
     nchildren = 1 # \sqrt expects one argument or a group

 class mstyle(MathRowSchema):
     nchildren = 1 # \mathrm, ... expect one argument or a group


 class MathToken(math):
     """Token Element: contains textual data instead of children.

     Base class for mo, mi, and mn.
     """
     nchildren = 0

     def __init__(self, data, **attributes):
         self.data = data
         super(MathToken, self).__init__(**attributes)

     def _xml_body(self, level=0):
         return [unicode(self.data).translate(self.xml_entities)]

 class mtext(MathToken): pass
 class mi(MathToken): pass
 class mo(MathToken): pass
 class mn(MathToken): pass

 # >>> mo(u'<')
 # mo('<')
 # >>> mo(u'<')._xml()
 # ['<mo>', '&lt;', '</mo>']

 class MathSchema(math):
     """Base class for schemata expecting 2 or more children.

     The special attribute `switch` indicates that the last two child
     elements are in reversed order and must be switched before XML-export.
     """

     nchildren = 2

     def __init__(self, *children, **kwargs):
         self.switch = kwargs.pop('switch', False)
         math.__init__(self, *children, **kwargs)

     def append(self, child):
         current_node = super(MathSchema, self).append(child)
         # normalize order if full
         if self.switch and self.full():
             self.children[-1], self.children[-2] = self.children[-2], self.children[-1]
             self.switch = False
         return current_node

 class msub(MathSchema): pass
 class msup(MathSchema): pass
 class msubsup(MathSchema):
     nchildren = 3

 # >>> msub(mi('x'), mo('-'))
 # msub(mi('x'), mo('-'))
 # >>> msubsup(mi('base'), mi('sub'), mi('super'))
 # msubsup(mi('base'), mi('sub'), mi('super'))
 # >>> msubsup(mi('base'), mi('super'), mi('sub'), switch=True)
 # msubsup(mi('base'), mi('sub'), mi('super'))

 class munder(msub): pass
 class mover(msup): pass

 # >>> munder(mi('lim'), mo('-'), accent=False)
 # munder(mi('lim'), mo('-'), accent=False)
 # >>> mu = munder(mo('-'), accent=False, switch=True)
 # >>> mu
 # munder(mo('-'), switch=True, accent=False)
 # >>> mu.append(mi('lim'))
 # >>> mu
 # munder(mi('lim'), mo('-'), accent=False)
 # >>> mu.append(mi('lim'))
 # Traceback (most recent call last):
 # SyntaxError: Node munder(mi('lim'), mo('-'), accent=False) already full!
 # >>> munder(mo('-'), mi('lim'), accent=False, switch=True).toprettyxml()
 # '<munder accent="false">\n  <mi>lim</mi>\n  <mo>-</mo>\n</munder>'

 class munderover(msubsup): pass

 class mroot(MathSchema):
     nchildren = 2

 class mfrac(math):
     nchildren = 2

 class mspace(math):
     nchildren = 0


 # LaTeX to MathML translation
 # ---------------------------

 # auxiliary functions
 # ~~~~~~~~~~~~~~~~~~~

 def tex_cmdname(string):
     """Return leading TeX command name and remainder of `string`.

     >>> tex_cmdname('mymacro2') # up to first non-letter
     ('mymacro', '2')
     >>> tex_cmdname('name 2') # strip trailing whitespace
     ('name', '2')
     >>> tex_cmdname('_2') # single non-letter character
     ('_', '2')

     """
     m = re.match(r'([a-zA-Z]+) *(.*)', string)
     if m is None:
         m = re.match(r'(.?)(.*)', string)
     return m.group(1), m.group(2)

 # Test:
 #
 # >>> tex_cmdname('name_2') # first non-letter terminates
 # ('name', '_2')
 # >>> tex_cmdname(' next') # leading whitespace is returned
 # (' ', 'next')
 # >>> tex_cmdname('1 2') # whitespace after non-letter is kept
 # ('1', ' 2')
 # >>> tex_cmdname('') # empty string
 # ('', '')

 def tex_number(string):
     """Return leading number literal and remainder of `string`.

     >>> tex_number('123.4')
     ('123.4', '')

     """
     m = re.match(r'([0-9.,]*[0-9]+)(.*)', string)
     if m is None:
         return '', string
     return m.group(1), m.group(2)

 # Test:
 #
 # >>> tex_number(' 23.4b') # leading whitespace -> no number
 # ('', ' 23.4b')
 # >>> tex_number('23,400/2') # comma separator included
 # ('23,400', '/2')
 # >>> tex_number('23. 4/2') # trailing separator not included
 # ('23', '. 4/2')
 # >>> tex_number('4, 2') # trailing separator not included
 # ('4', ', 2')
 # >>> tex_number('1 000.4')
 # ('1', ' 000.4')

 def tex_token(string):
     """Return first simple TeX token and remainder of `string`.

     >>> tex_token('\\command{without argument}')
     ('\\command', '{without argument}')
     >>> tex_token('or first character')
     ('o', 'r first character')

     """
     m = re.match(r"""((?P<cmd>\\[a-zA-Z]+)\s* # TeX command, skip whitespace
                       |(?P<chcmd>\\.)          # one-character TeX command
                       |(?P<ch>.?))            # first character (or empty)
                      (?P<remainder>.*$)    # remaining part of string
                  """, string, re.VERBOSE)
     cmd, chcmd, ch, remainder = m.group('cmd', 'chcmd', 'ch', 'remainder')
     return cmd or chcmd or ch, remainder

 # Test:
 #
 # >>> tex_token('{opening bracket of group}')
 # ('{', 'opening bracket of group}')
 # >>> tex_token('\\skip whitespace after macro name')
 # ('\\skip', 'whitespace after macro name')
 # >>> tex_token('. but not after single char')
 # ('.', ' but not after single char')
 # >>> tex_token('') # empty string.
 # ('', '')
 # >>> tex_token('\{escaped bracket')
 # ('\\{', 'escaped bracket')

 def tex_group(string):
     """Return first TeX group or token and remainder of `string`.

     >>> tex_group('{first group} returned without brackets')
     ('first group', ' returned without brackets')

     """
     split_index = 0
     nest_level = 0   # level of {{nested} groups}
     escape = False   # the next character is escaped (\)

     if not string.startswith('{'):
         # special case: there is no group, return first token and remainder
         return string[:1], string[1:]
     for c in string:
         split_index += 1
         if escape:
             escape = False
         elif c == '\\':
             escape = True
         elif c == '{':
             nest_level += 1
         elif c == '}':
             nest_level -= 1
         if nest_level == 0:
             break
     else:
         raise SyntaxError('Group without closing bracket')
     return string[1:split_index-1], string[split_index:]


 # >>> tex_group('{} empty group')
 # ('', ' empty group')
 # >>> tex_group('{group with {nested} group} ')
 # ('group with {nested} group', ' ')
 # >>> tex_group('{group with {nested group}} at the end')
 # ('group with {nested group}', ' at the end')
 # >>> tex_group('{{group} {with {{complex }nesting}} constructs}')
 # ('{group} {with {{complex }nesting}} constructs', '')
 # >>> tex_group('{group with \\{escaped\\} brackets}')
 # ('group with \\{escaped\\} brackets', '')
 # >>> tex_group('{group followed by closing bracket}} from outer group')
 # ('group followed by closing bracket', '} from outer group')
 # >>> tex_group('No group? Return first character.')
 # ('N', 'o group? Return first character.')
 # >>> tex_group(' {also whitespace}')
 # (' ', '{also whitespace}')


 def tex_token_or_group(string):
     """Return first TeX group or token and remainder of `string`.

     >>> tex_token_or_group('\\command{without argument}')
     ('\\command', '{without argument}')
     >>> tex_token_or_group('first character')
     ('f', 'irst character')
     >>> tex_token_or_group(' also whitespace')
     (' ', 'also whitespace')
     >>> tex_token_or_group('{first group} keep rest')
     ('first group', ' keep rest')

     """
     arg, remainder = tex_token(string)
     if arg == '{':
         arg, remainder = tex_group(string.lstrip())
     return arg, remainder

 # >>> tex_token_or_group('\{no group but left bracket')
 # ('\\{', 'no group but left bracket')

 def tex_optarg(string):
     """Return optional argument and remainder.

     >>> tex_optarg('[optional argument] returned without brackets')
     ('optional argument', ' returned without brackets')
     >>> tex_optarg('{empty string, if there is no optional arg}')
     ('', '{empty string, if there is no optional arg}')

     """
     m = re.match(r"""\s*                            # leading whitespace
                  \[(?P<optarg>(\\]|[^\[\]]|\\])*)\] # [group] without nested groups
                  (?P<remainder>.*$)
                  """, string, re.VERBOSE)
     if m is None and not string.startswith('['):
         return '', string
     try:
         return m.group('optarg'), m.group('remainder')
     except AttributeError:
         raise SyntaxError('Could not extract optional argument from %r' % string)

 # Test:
 # >>> tex_optarg(' [optional argument] after whitespace')
 # ('optional argument', ' after whitespace')
 # >>> tex_optarg('[missing right bracket')
 # Traceback (most recent call last):
 # SyntaxError: Could not extract optional argument from '[missing right bracket'
 # >>> tex_optarg('[group with [nested group]]')
 # Traceback (most recent call last):
 # SyntaxError: Could not extract optional argument from '[group with [nested group]]'


 def parse_latex_math(node, string):
     """Append MathML conversion of `string` to `node` and return it.

     >>> parse_latex_math(math(), r'\alpha')
     math(mi('α'))
     >>> parse_latex_math(mrow(), r'x_{n}')
     mrow(msub(mi('x'), mi('n')))

     """
     # Normalize white-space:
     string = ' '.join(string.split())
     tree = node

     while len(string) > 0:
         # Take off first character:
         c, string = string[0], string[1:]

         if c == ' ':
             continue  # whitespace is ignored in LaTeX math mode
         if c == '\\': # start of a LaTeX macro
             cmdname, string = tex_cmdname(string)
             node, string = handle_cmd(cmdname, node, string)
         elif c in "_^":
             node = handle_script_or_limit(node, c)
         elif c == '{':
             new_node = mrow()
             node.append(new_node)
             node = new_node
         elif c == '}':
             node = node.close()
         elif c == '&':
             new_node = mtd()
             node.close().append(new_node)
             node = new_node
         elif c.isalpha():
             node = node.append(mi(c))
         elif c.isdigit():
             number, string = tex_number(string)
             node = node.append(mn(c+number))
         elif c in anomalous_chars:
             # characters with a special meaning in LaTeX math mode
             # fix spacing before "unary" minus.
             attributes = {}
             if c == '-' and node.children:
                 previous_node = node.children[-1]
                 if (getattr(previous_node, 'data', '-') in '([='
                     or previous_node.get('class') == 'mathopen'):
                     attributes['form'] = 'prefix'
             node = node.append(mo(anomalous_chars[c], **attributes))
         elif c in "/()[]|":
             node = node.append(mo(c, stretchy=False))
         elif c in "+*=<>,.!?`';@":
             node = node.append(mo(c))
         else:
             raise SyntaxError(u'Unsupported character: "%s"' % c)
     return tree

 # Test:

 # >>> print(parse_latex_math(math(), ''))
 # math()
 # >>> parse_latex_math(math(), ' \\sqrt{ \\alpha}')
 # math(msqrt(mi('α')))
 # >>> parse_latex_math(math(), '23.4x')
 # math(mn('23.4'), mi('x'))
 # >>> parse_latex_math(math(), '\\sqrt 2 \\ne 3')
 # math(msqrt(mn('2')), mo('≠'), mn('3'))
 # >>> parse_latex_math(math(), '\\sqrt{2 + 3} < 3')
 # math(msqrt(mn('2'), mo('+'), mn('3')), mo('<'), mn('3'))
 # >>> parse_latex_math(math(), '\\sqrt[3]{2 + 3}')
 # math(mroot(mrow(mn('2'), mo('+'), mn('3')), mn('3')))
 # >>> parse_latex_math(math(), '\max_x') # function takes limits
 # math(munder(mo('max', movablelimits=True), mi('x')))
 # >>> parse_latex_math(math(), 'x^j_i') # ensure correct order: base, sub, sup
 # math(msubsup(mi('x'), mi('i'), mi('j')))
 # >>> parse_latex_math(math(), '\int^j_i') # ensure correct order
 # math(msubsup(mo('∫'), mi('i'), mi('j')))
 # >>> parse_latex_math(math(), 'x_{\\alpha}')
 # math(msub(mi('x'), mi('α')))
 # >>> parse_latex_math(math(), 'x_\\text{in}')
 # math(msub(mi('x'), mtext('in')))

 def handle_cmd(name, node, string):
     """Process LaTeX command `name` followed by `string`.

     Append result to `node`.
     If needed, parse `string` for command argument.
     Return new current node and remainder of `string`:

     >>> handle_cmd('hbar', math(), r' \frac')
     (math(mi('ℏ')), ' \\frac')
     >>> handle_cmd('hspace', math(), r'{1ex} (x)')
     (math(mspace(width='1ex')), ' (x)')

     """

     # Token elements
     # ==============

     # identifier  ->  <mi>

     if name in letters:
         new_node = mi(letters[name])
         if name in greek_capitals:
             # upright in "TeX style" but MathML sets them italic ("ISO style").
             # CSS styling does not change the font style in Firefox 78.
             # Use 'mathvariant="normal"'?
             new_node['class'] = 'capital-greek'
         node = node.append(new_node)
         return node, string

     if name in functions:
         # use <mi> followed by invisible function applicator character
         # (see https://www.w3.org/TR/MathML3/chapter3.html#presm.mi)
         if name == 'operatorname':
             # custom function name, e.g. ``\operatorname{abs}(x)``
             # TODO: \operatorname* -> with limits
             arg, string = tex_token_or_group(string)
             new_node = mi(arg, mathvariant='normal')
         else:
             new_node = mi(functions[name])
         # embellished function names:
         if name == 'varliminf':    # \underline\lim
             new_node = munder(new_node, mo(u'_'))
         elif name == 'varlimsup':  # \overline\lim
             new_node = mover(new_node, mo(u'¯'), accent=False)
         elif name == 'varprojlim': # \underleftarrow\lim
             new_node = munder(new_node, mo(u'\u2190'))
         elif name == 'varinjlim':  # \underrightarrow\lim
             new_node = munder(new_node, mo(u'\u2192'))

         node = node.append(new_node)
         # add ApplyFunction when appropriate (not \sin^2(x), say)
         # cf. https://www.w3.org/TR/MathML3/chapter3.html#presm.mi
         if string and string[0] not in ('^', '_'):
             node = node.append(mo(u'\u2061')) # &ApplyFunction;
         return node, string

     if name in math_alphabets:
         if name == 'boldsymbol':
             attributes = {'class': 'boldsymbol'}
         else:
             attributes = {'mathvariant': math_alphabets[name]}
         if name == 'mathscr':
             attributes['class'] = 'mathscr'
         # Check for single symbol (letter, name, or ⅀)
         arg, remainder = tex_token_or_group(string)
         if arg.startswith('\\'):
             # convert single letters (so the isalpha() test below works).
             # TODO: convert all LICRs in a group (\matrm{\mu\Omega})
             arg = letters.get(arg[1:], arg)
         if name == 'mathbb':
             # mathvariant="double-struck" is ignored for Greek letters
             # (tested in Firefox 78). Use literal Unicode characters.
             arg = mathbb.get(arg, arg)
         if arg.isalpha() or arg == u'\u2140':
             node = node.append(mi(arg, **attributes))
             return node, remainder
         # Wrap in <style>
         style = mstyle(**attributes)
         node.append(style)
         return style, string


     # operator, fence, or separator  ->  <mo>

     if name == 'colon': # trailing punctuation, not binary relation
         node = node.append(mo(':', form='postfix', lspace='0', rspace='0.28em'))
         return node, string

     if name == 'idotsint':
         node = parse_latex_math(node, '\int\dotsi\int')
         return node, string

     if name in thick_operators:
         node = node.append(mo(thick_operators[name], style='font-weight: bold'))
         return node, string

     if name in small_operators:
         node = node.append(mo(small_operators[name], mathsize='75%'))
         return node, string

     if name in operators:
         attributes = {}
         if name in movablelimits and string and string[0] in ' _^':
             attributes['movablelimits'] = True
         elif name in ('lvert', 'lVert'):
             attributes['class'] = 'mathopen'
         node = node.append(mo(operators[name], **attributes))
         return node, string

     if name in bigdelimiters:
         delimiter_attributes = {}
         size = delimiter_sizes[bigdelimiters[name]]
         delimiter, string = tex_token_or_group(string)
         if delimiter not in '()[]/|.':
             try:
                 delimiter = stretchables[delimiter.lstrip('\\')]
             except KeyError:
                 raise SyntaxError(u'Unsupported "\\%s" delimiter "%s"!'
                                   % (name, delimiter))
         if size:
             delimiter_attributes['maxsize'] = size
             delimiter_attributes['minsize'] = size
             delimiter_attributes['symmetric'] = True
         if name == 'left' or name.endswith('l'):
             row = mrow()
             node.append(row)
             node = row
         if delimiter != '.': # '.' stands for "empty delimiter"
             node.append(mo(delimiter, **delimiter_attributes))
         if name == 'right' or name.endswith('r'):
             node = node.close()
         return node, string

     if name == 'not':
         arg, string = tex_token(string)
         if arg == '{':
             return node, '{\\not ' + string
         if arg.startswith('\\'): # LaTeX macro
             try:
                 arg = operators[arg[1:]]
             except KeyError:
                 raise SyntaxError(u'\\not: Cannot negate: "%s"!'%arg)
         arg = unicodedata.normalize('NFC', arg+u'\u0338')
         node = node.append(mo(arg))
         return node, string

     # arbitrary text (usually comments)  ->  <mtext>
     if name in ('text', 'mbox', 'textrm'):
         arg, string = tex_token_or_group(string)
         parts = arg.split('$') # extract inline math
         for i, part in enumerate(parts):
             if i % 2 == 0: # i is even
                 part = re.sub('(^ | $)', u'\u00a0', part)
                 node = node.append(mtext(part))
             else:
                 parse_latex_math(node, part)
         return node, string

     # horizontal space -> <mspace>
     if name in spaces:
         node = node.append(mspace(width='%s'%spaces[name]))
         return node, string

     if name in ('hspace', 'mspace'):
         arg, string = tex_group(string)
         if arg.endswith('mu'):
             arg = '%sem' % (float(arg[:-2])/18)
         node = node.append(mspace(width='%s'%arg))
         return node, string

     if name == 'phantom':
         new_node = mphantom()
         node.append(new_node)
         return new_node, string

     if name == 'boxed':
         new_node = menclose(notation='box')
         node.append(new_node)
         return new_node, string


     # Complex elements (Layout schemata)
     # ==================================

     if name == 'sqrt':
         radix, string = tex_optarg(string)
         if radix:
             indexnode = mrow()
             new_node = mroot(indexnode, switch=True)
             parse_latex_math(indexnode, radix)
             indexnode.close()
         else:
             new_node = msqrt()
         node.append(new_node)
         return new_node, string

     if name in fractions:
         (style_atts, frac_atts) = fractions[name]
         if name == 'cfrac':
             optarg, string = tex_optarg(string)
             optargs = {'l': 'left', 'r': 'right'}
             if optarg in optargs:
                 frac_atts = frac_atts.copy()
                 frac_atts['numalign'] = optargs[optarg] # "numalign" is deprecated
                 frac_atts['class'] = 'numalign-' + optargs[optarg]
         new_node = frac = mfrac(**frac_atts)
         if name.endswith('binom'):
             new_node = mrow(mo('('), new_node, mo(')'), CLASS='binom')
             new_node.nchildren = 3
         if style_atts:
             new_node = mstyle(new_node, **style_atts)
         node.append(new_node)
         return frac, string

     if name == '\\': # end of a row
         entry = mtd()
         new_node = mtr(entry)
         node.close().close().append(new_node)
         return entry, string

     if name in accents:
         new_node = mover(mo(accents[name][0], stretchy=False), switch=True)
         if name == 'vec':
             new_node.children[0]['accent'] = False # scale down arrow but drop i-dot
         new_node.tex_cmd = name # for HTML export
         node.append(new_node)
         return new_node, string

     if name in over:
         # set "accent" to False (otherwise dots on i and j are dropped)
         # but to True on accent node get "textstyle" (full size) symbols on top
         new_node = mover(mo(over[name][0], accent=True),
                          switch=True, accent=False)
         new_node.tex_cmd = name # for HTML export
         node.append(new_node)
         return new_node, string

     if name == 'overset':
         new_node = mover(switch=True)
         node.append(new_node)
         return new_node, string

     if name in under:
         new_node = munder(mo(under[name][0]), switch=True)
         new_node.tex_cmd = name # for HTML export
         node.append(new_node)
         return new_node, string

     if name == 'underset':
         new_node = munder(switch=True)
         node.append(new_node)
         return new_node, string

     if name in ('xleftarrow', 'xrightarrow'):
         subscript, string = tex_optarg(string)
         base = mo(operators['long'+name[1:]])
         if subscript:
             new_node = munderover(base)
             sub_node = parse_latex_math(mrow(), subscript)
             if len(sub_node) == 1:
                 sub_node = sub_node.children[0]
             new_node.append(sub_node)
         else:
             new_node = mover(base)
         node.append(new_node)
         return new_node, string

     if name in layout_styles: # 'displaystyle', 'textstyle', ...
         new_node = mstyle(**layout_styles[name])
         new_node.nchildren = None
         if isinstance(node, mrow) and len(node) == 0:
             # replace node with new_node
             node.parent.children[node.parent.children.index(node)] = new_node
             new_node.parent = node.parent
         elif node.__class__.__name__ == 'math':
             node.append(new_node)
         else:
             raise SyntaxError(u'Declaration "\\%s" must be first command '
                               u'in a group.' % name)
         return new_node, string

     if name.endswith('limits'):
         arg, remainder = tex_token(string)
         if arg in '_^': # else ignore
             string = remainder
             node = handle_script_or_limit(node, arg, limits=name)
         return node, string

     # Environments

     if name == 'begin':
         return begin_environment(node, string)

     if name == 'end':
         return end_environment(node, string)

     raise SyntaxError(u'Unknown LaTeX command: ' + name)

 # >>> handle_cmd('left', math(), '[a\\right]')
 # (mrow(mo('[')), 'a\\right]')
 # >>> handle_cmd('left', math(), '. a)') # empty \left
 # (mrow(), ' a)')
 # >>> handle_cmd('left', math(), '\\uparrow a)') # cmd
 # (mrow(mo('↑')), 'a)')
 # >>> handle_cmd('not', math(), '\\equiv \\alpha)') # cmd
 # (math(mo('≢')), '\\alpha)')
 # >>> handle_cmd('text', math(), '{ for } i>0') # group
 # (math(mtext('\xa0for\xa0')), ' i>0')
 # >>> handle_cmd('text', math(), '{B}T') # group
 # (math(mtext('B')), 'T')
 # >>> handle_cmd('text', math(), '{number of apples}}') # group
 # (math(mtext('number of apples')), '}')
 # >>> handle_cmd('text', math(), 'i \\sin(x)') # single char
 # (math(mtext('i')), ' \\sin(x)')
 # >>> handle_cmd('sin', math(), '(\\alpha)')
 # (math(mi('sin'), mo('\u2061')), '(\\alpha)')
 # >>> handle_cmd('sin', math(), ' \\alpha')
 # (math(mi('sin'), mo('\u2061')), ' \\alpha')
 # >>> handle_cmd('operatorname', math(), '{abs}(x)')
 # (math(mi('abs', mathvariant='normal'), mo('\u2061')), '(x)')
 # >>> handle_cmd('mathrm', math(), '\\alpha')
 # (math(mi('α', mathvariant='normal')), '')
 # >>> handle_cmd('mathrm', math(), '{out} = 3')
 # (math(mi('out', mathvariant='normal')), ' = 3')
 # >>> handle_cmd('overline', math(), '{981}')
 # (mover(mo('¯', accent=True), switch=True, accent=False), '{981}')
 # >>> handle_cmd('bar', math(), '{x}')
 # (mover(mo('ˉ', stretchy=False), switch=True), '{x}')
 # >>> handle_cmd('xleftarrow', math(), r'[\alpha]{10}')
 # (munderover(mo('⟵'), mi('α')), '{10}')
 # >>> handle_cmd('xleftarrow', math(), r'[\alpha=5]{10}')
 # (munderover(mo('⟵'), mrow(mi('α'), mo('='), mn('5'))), '{10}')

 def handle_script_or_limit(node, c, limits=''):
     """Append script or limit element to `node`."""
     child = node.children.pop()
     if limits == 'limits':
         child['movablelimits'] = False
     elif (limits == 'movablelimits'
           or getattr(child, 'data', '') in movablelimits):
         child['movablelimits'] = True

     if c == '_':
         if isinstance(child, mover):
             new_node = munderover(*child.children, switch=True)
         elif isinstance(child, msup):
             new_node = msubsup(*child.children, switch=True)
         elif (limits in ('limits', 'movablelimits')
               or limits == ''
               and child.get('movablelimits', None) == True):
             new_node = munder(child)
         else:
             new_node = msub(child)
     elif c == '^':
         if isinstance(child, munder):
             new_node = munderover(*child.children)
         elif isinstance(child, msub):
             new_node = msubsup(*child.children)
         elif (limits in ('limits', 'movablelimits')
               or limits == ''
               and child.get('movablelimits', None) == True):
             new_node = mover(child)
         else:
             new_node = msup(child)
     node.append(new_node)
     return new_node


 def begin_environment(node, string):
     name, string = tex_group(string)
     if name in matrices:
         left_delimiter = matrices[name][0]
         attributes = {}
         if left_delimiter:
             wrapper = mrow(mo(left_delimiter))
             if name == 'cases':
                 wrapper = mrow(mo(left_delimiter, rspace='0.17em'))
                 attributes['columnalign'] = 'left'
             node.append(wrapper)
             node = wrapper
         elif name == 'smallmatrix':
             attributes['rowspacing'] = '0.02em'
             attributes['columnspacing'] = '0.333em'
             wrapper = mstyle(scriptlevel='1')
             node.append(wrapper)
             node = wrapper
         # TODO: aligned, alignedat
         # take an optional [t], [b] or the default [c]
         entry = mtd()
         node.append(mtable(mtr(entry), **attributes))
         node = entry
     else:
         raise SyntaxError(u'Environment not supported!')
     return node, string


 def end_environment(node, string):
     name, string = tex_group(string)
     if name in matrices:
         node = node.close().close().close() # close: mtd, mdr, mtable
         right_delimiter = matrices[name][1]
         if right_delimiter:
             node = node.append(mo(right_delimiter))
             node = node.close()
         elif name == 'cases':
             node = node.close()
     else:
         raise SyntaxError(u'Environment not supported!')
     return node, string


 # Return the number of "equation_columns" in `code_lines`. cf. "alignat"
 # in http://mirror.ctan.org/macros/latex/required/amsmath/amsldoc.pdf
 def tex_equation_columns(rows):
     tabs = max(row.count('&') - row.count(r'\&') for row in rows)
     if tabs == 0:
         return 0
     return int(tabs/2 + 1)

 # >>> tex_equation_columns(['a = b'])
 # 0
 # >>> tex_equation_columns(['a &= b'])
 # 1
 # >>> tex_equation_columns(['a &= b & a \in S'])
 # 2
 # >>> tex_equation_columns(['a &= b & c &= d'])
 # 2


 # Return dictionary with attributes to style an <mtable> as align environment:
 def align_attributes(rows):
     atts = {'class': 'align',
             'displaystyle': True}
     tabs = max(row.count('&') - row.count(r'\&') for row in rows)
     if tabs:
         aligns = ['right', 'left'] * tabs
         spacing = ['0', '2em'] * tabs
         atts['columnalign'] = ' '.join(aligns[:tabs+1])
         atts['columnspacing'] = ' '.join(spacing[:tabs])
     return atts

 # >>> align_attributes(['a = b'])
 # {'class': 'align', 'displaystyle': True}
 # >>> align_attributes(['a &= b'])
 # {'class': 'align', 'displaystyle': True, 'columnalign': 'right left', 'columnspacing': '0'}
 # >>> align_attributes(['a &= b & a \in S'])
 # {'class': 'align', 'displaystyle': True, 'columnalign': 'right left right', 'columnspacing': '0 2em'}
 # >>> align_attributes(['a &= b & c &= d'])
 # {'class': 'align', 'displaystyle': True, 'columnalign': 'right left right left', 'columnspacing': '0 2em 0'}


 def tex2mathml(tex_math, inline=True):
     """Return string with MathML code corresponding to `tex_math`.

     Set `inline` to False for displayed math.
     """
     # Set up tree
     math_tree = math(xmlns='http://www.w3.org/1998/Math/MathML')
     node = math_tree
     if not inline:
         math_tree['display'] = 'block'
         rows = toplevel_code(tex_math).split(r'\\')
         if len(rows) > 1:
             # emulate align* environment with a math table
             node = mtd()
             math_tree.append(mtable(mtr(node),
                                     **align_attributes(rows)))
     parse_latex_math(node, tex_math)
     return math_tree.toprettyxml()

 # >>> print(tex2mathml('3'))
 # <math xmlns="http://www.w3.org/1998/Math/MathML">
 #   <mn>3</mn>
 # </math>
 # >>> print(tex2mathml('3', inline=False))
 # <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
 #   <mn>3</mn>
 # </math>
 # >>> print(tex2mathml(r'a & b \\ c & d', inline=False))
 # <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
 #   <mtable class="align" columnalign="right left" columnspacing="0" displaystyle="true">
 #     <mtr>
 #       <mtd>
 #         <mi>a</mi>
 #       </mtd>
 #       <mtd>
 #         <mi>b</mi>
 #       </mtd>
 #     </mtr>
 #     <mtr>
 #       <mtd>
 #         <mi>c</mi>
 #       </mtd>
 #       <mtd>
 #         <mi>d</mi>
 #       </mtd>
 #     </mtr>
 #   </mtable>
 # </math>
 # >>> print(tex2mathml(r'a \\ b', inline=False))
 # <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
 #   <mtable class="align" displaystyle="true">
 #     <mtr>
 #       <mtd>
 #         <mi>a</mi>
 #       </mtd>
 #     </mtr>
 #     <mtr>
 #       <mtd>
 #         <mi>b</mi>
 #       </mtd>
 #     </mtr>
 #   </mtable>
 # </math>


 # TODO: look up more symbols from tr25, e.g.
 #
 #
 # Table 2.8 Using Vertical Line or Solidus Overlay
 #   some of the negated forms of mathematical relations that can only be
 #   encoded by using either U+0338 COMBINING LONG SOLIDUS OVERLAY or U+20D2
 #   COMBINING LONG VERTICAL LINE OVERLAY . (For issues with using 0338 in
 #   MathML, see Section 3.2.7, Combining Marks.
 #
 # Table 2.9 Variants of Mathematical Symbols using VS1?
 #
 # Sequence      Description
 # 0030 + VS1    DIGIT ZERO - short diagonal stroke form
 # 2205 + VS1    EMPTY SET - zero with long diagonal stroke overlay form
 # 2229 + VS1    INTERSECTION - with serifs
 # 222A + VS1    UNION - with serifs
 # 2268 + VS1    LESS-THAN BUT NOT EQUAL TO - with vertical stroke
 # 2269 + VS1    GREATER-THAN BUT NOT EQUAL TO - with vertical stroke
 # 2272 + VS1    LESS-THAN OR EQUIVALENT TO - following the slant of the lower leg
 # 2273 + VS1    GREATER-THAN OR EQUIVALENT TO - following the slant of the lower leg
 # 228A + VS1    SUBSET OF WITH NOT EQUAL TO - variant with stroke through bottom members
 # 228B + VS1    SUPERSET OF WITH NOT EQUAL TO - variant with stroke through bottom members
 # 2293 + VS1    SQUARE CAP - with serifs
 # 2294 + VS1    SQUARE CUP - with serifs
 # 2295 + VS1    CIRCLED PLUS - with white rim
 # 2297 + VS1    CIRCLED TIMES - with white rim
 # 229C + VS1    CIRCLED EQUALS - equal sign inside and touching the circle
 # 22DA + VS1    LESS-THAN slanted EQUAL TO OR GREATER-THAN
 # 22DB + VS1    GREATER-THAN slanted EQUAL TO OR LESS-THAN
 # 2A3C + VS1    INTERIOR PRODUCT - tall variant with narrow foot
 # 2A3D + VS1    RIGHTHAND INTERIOR PRODUCT - tall variant with narrow foot
 # 2A9D + VS1    SIMILAR OR LESS-THAN - following the slant of the upper leg
 # 2A9E + VS1    SIMILAR OR GREATER-THAN - following the slant of the upper leg
 # 2AAC + VS1    SMALLER THAN OR slanted EQUAL
 # 2AAD + VS1    LARGER THAN OR slanted EQUAL
 # 2ACB + VS1    SUBSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members
 # 2ACC + VS1    SUPERSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members