| # -*- coding: utf-8 -*- |
| # |
| # Copyright (C) 2003-2009 Edgewall Software |
| # Copyright (C) 2003-2004 Jonas Borgström <jonas@edgewall.com> |
| # Copyright (C) 2006 Matthew Good <trac@matt-good.net> |
| # Copyright (C) 2005-2006 Christian Boos <cboos@edgewall.org> |
| # All rights reserved. |
| # |
| # This software is licensed as described in the file COPYING, which |
| # you should have received as part of this distribution. The terms |
| # are also available at http://trac.edgewall.org/wiki/TracLicense. |
| # |
| # This software consists of voluntary contributions made by many |
| # individuals. For the exact contribution history, see the revision |
| # history and logs, available at http://trac.edgewall.org/log/. |
| # |
| # Author: Jonas Borgström <jonas@edgewall.com> |
| # Matthew Good <trac@matt-good.net> |
| # Christian Boos <cboos@edgewall.org> |
| |
| import __builtin__ |
| import locale |
| import os |
| import re |
| import sys |
| import textwrap |
| from urllib import quote, quote_plus, unquote |
| from unicodedata import east_asian_width |
| |
| from trac.util.translation import _ |
| |
| |
| CRLF = '\r\n' |
| |
| class Empty(unicode): |
| """A special tag object evaluating to the empty string""" |
| __slots__ = [] |
| |
| empty = Empty() |
| |
| del Empty # shouldn't be used outside of Trac core |
| |
| |
| # -- Unicode |
| |
| def to_unicode(text, charset=None): |
| """Convert input to an `unicode` object. |
| |
| For a `str` object, we'll first try to decode the bytes using the given |
| `charset` encoding (or UTF-8 if none is specified), then we fall back to |
| the latin1 encoding which might be correct or not, but at least preserves |
| the original byte sequence by mapping each byte to the corresponding |
| unicode code point in the range U+0000 to U+00FF. |
| |
| For anything else, a simple `unicode()` conversion is attempted, |
| with special care taken with `Exception` objects. |
| """ |
| if isinstance(text, str): |
| try: |
| return unicode(text, charset or 'utf-8') |
| except UnicodeDecodeError: |
| return unicode(text, 'latin1') |
| elif isinstance(text, Exception): |
| # two possibilities for storing unicode strings in exception data: |
| try: |
| # custom __str__ method on the exception (e.g. PermissionError) |
| return unicode(text) |
| except UnicodeError: |
| # unicode arguments given to the exception (e.g. parse_date) |
| return ' '.join([to_unicode(arg) for arg in text.args]) |
| return unicode(text) |
| |
| |
| def exception_to_unicode(e, traceback=False): |
| """Convert an `Exception` to an `unicode` object. |
| |
| In addition to `to_unicode`, this representation of the exception |
| also contains the class name and optionally the traceback. |
| """ |
| message = '%s: %s' % (e.__class__.__name__, to_unicode(e)) |
| if traceback: |
| from trac.util import get_last_traceback |
| traceback_only = get_last_traceback().split('\n')[:-2] |
| message = '\n%s\n%s' % (to_unicode('\n'.join(traceback_only)), message) |
| return message |
| |
| |
| def path_to_unicode(path): |
| """Convert a filesystem path to unicode, using the filesystem encoding.""" |
| if isinstance(path, str): |
| try: |
| return unicode(path, sys.getfilesystemencoding()) |
| except UnicodeDecodeError: |
| return unicode(path, 'latin1') |
| return unicode(path) |
| |
| |
| _ws_leading_re = re.compile(ur'\A[\s\u200b]+', re.UNICODE) |
| _ws_trailing_re = re.compile(ur'[\s\u200b]+\Z', re.UNICODE) |
| |
| def stripws(text, leading=True, trailing=True): |
| """Strips unicode white-spaces and ZWSPs from ``text``. |
| |
| :param leading: strips leading spaces from ``text`` unless ``leading`` is |
| `False`. |
| :param trailing: strips trailing spaces from ``text`` unless ``trailing`` |
| is `False`. |
| """ |
| if leading: |
| text = _ws_leading_re.sub('', text) |
| if trailing: |
| text = _ws_trailing_re.sub('', text) |
| return text |
| |
| |
| def strip_line_ws(text, leading=True, trailing=True): |
| """Strips unicode white-spaces and ZWSPs from each line of ``text``. |
| |
| :param leading: strips leading spaces from ``text`` unless ``leading`` is |
| `False`. |
| :param trailing: strips trailing spaces from ``text`` unless ``trailing`` |
| is `False`. |
| """ |
| lines = re.compile(r'(\n|\r\n|\r)').split(text) |
| if leading: |
| lines[::2] = (_ws_leading_re.sub('', line) for line in lines[::2]) |
| if trailing: |
| lines[::2] = (_ws_trailing_re.sub('', line) for line in lines[::2]) |
| return ''.join(lines) |
| |
| |
| _js_quote = {'\\': '\\\\', '"': '\\"', '\b': '\\b', '\f': '\\f', |
| '\n': '\\n', '\r': '\\r', '\t': '\\t', "'": "\\'"} |
| for i in range(0x20) + [ord(c) for c in '&<>']: |
| _js_quote.setdefault(chr(i), '\\u%04x' % i) |
| _js_quote_re = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t\'&<>]') |
| _js_string_re = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t&<>]') |
| |
| |
| def javascript_quote(text): |
| """Quote strings for inclusion in single or double quote delimited |
| Javascript strings |
| """ |
| if not text: |
| return '' |
| def replace(match): |
| return _js_quote[match.group(0)] |
| return _js_quote_re.sub(replace, text) |
| |
| |
| def to_js_string(text): |
| """Embed the given string in a double quote delimited Javascript string |
| (conform to the JSON spec) |
| """ |
| if not text: |
| return '""' |
| def replace(match): |
| return _js_quote[match.group(0)] |
| return '"%s"' % _js_string_re.sub(replace, text) |
| |
| |
| def unicode_quote(value, safe='/'): |
| """A unicode aware version of `urllib.quote` |
| |
| :param value: anything that converts to a `str`. If `unicode` |
| input is given, it will be UTF-8 encoded. |
| :param safe: as in `quote`, the characters that would otherwise be |
| quoted but shouldn't here (defaults to '/') |
| """ |
| return quote(value.encode('utf-8') if isinstance(value, unicode) |
| else str(value), safe) |
| |
| |
| def unicode_quote_plus(value, safe=''): |
| """A unicode aware version of `urllib.quote_plus`. |
| |
| :param value: anything that converts to a `str`. If `unicode` |
| input is given, it will be UTF-8 encoded. |
| :param safe: as in `quote_plus`, the characters that would |
| otherwise be quoted but shouldn't here (defaults to |
| '/') |
| """ |
| return quote_plus(value.encode('utf-8') if isinstance(value, unicode) |
| else str(value), safe) |
| |
| |
| def unicode_unquote(value): |
| """A unicode aware version of `urllib.unquote`. |
| |
| :param str: UTF-8 encoded `str` value (for example, as obtained by |
| `unicode_quote`). |
| :rtype: `unicode` |
| """ |
| return unquote(value).decode('utf-8') |
| |
| |
| def unicode_urlencode(params, safe=''): |
| """A unicode aware version of `urllib.urlencode`. |
| |
| Values set to `empty` are converted to the key alone, without the |
| equal sign. |
| """ |
| if isinstance(params, dict): |
| params = params.iteritems() |
| l = [] |
| for k, v in params: |
| if v is empty: |
| l.append(unicode_quote_plus(k, safe)) |
| else: |
| l.append(unicode_quote_plus(k, safe) + '=' + |
| unicode_quote_plus(v, safe)) |
| return '&'.join(l) |
| |
| |
| _qs_quote_safe = ''.join(chr(c) for c in xrange(0x21, 0x7f)) |
| |
| def quote_query_string(text): |
| """Quote strings for query string |
| """ |
| return unicode_quote_plus(text, _qs_quote_safe) |
| |
| |
| def to_utf8(text, charset='latin1'): |
| """Convert a string to UTF-8, assuming the encoding is either UTF-8, ISO |
| Latin-1, or as specified by the optional `charset` parameter. |
| |
| .. deprecated :: 0.10 |
| You should use `unicode` strings only. |
| """ |
| try: |
| # Do nothing if it's already utf-8 |
| u = unicode(text, 'utf-8') |
| return text |
| except UnicodeError: |
| try: |
| # Use the user supplied charset if possible |
| u = unicode(text, charset) |
| except UnicodeError: |
| # This should always work |
| u = unicode(text, 'latin1') |
| return u.encode('utf-8') |
| |
| |
| class unicode_passwd(unicode): |
| """Conceal the actual content of the string when `repr` is called.""" |
| def __repr__(self): |
| return '*******' |
| |
| |
| def stream_encoding(stream): |
| """Return the appropriate encoding for the given stream.""" |
| encoding = getattr(stream, 'encoding', None) |
| # Windows returns 'cp0' to indicate no encoding |
| return encoding if encoding not in (None, 'cp0') else 'utf-8' |
| |
| |
| def console_print(out, *args, **kwargs): |
| """Output the given arguments to the console, encoding the output |
| as appropriate. |
| |
| :param kwargs: ``newline`` controls whether a newline will be appended |
| (defaults to `True`) |
| """ |
| cons_charset = stream_encoding(out) |
| out.write(' '.join([to_unicode(a).encode(cons_charset, 'replace') |
| for a in args])) |
| if kwargs.get('newline', True): |
| out.write('\n') |
| |
| |
| def printout(*args, **kwargs): |
| """Do a `console_print` on `sys.stdout`.""" |
| console_print(sys.stdout, *args, **kwargs) |
| |
| |
| def printerr(*args, **kwargs): |
| """Do a `console_print` on `sys.stderr`.""" |
| console_print(sys.stderr, *args, **kwargs) |
| |
| |
| def raw_input(prompt): |
| """Input one line from the console and converts it to unicode as |
| appropriate. |
| """ |
| printout(prompt, newline=False) |
| return to_unicode(__builtin__.raw_input(), sys.stdin.encoding) |
| |
| |
| _preferredencoding = locale.getpreferredencoding() |
| |
| def getpreferredencoding(): |
| """Return the encoding, which is retrieved on ahead, according to user |
| preference. |
| |
| We should use this instead of `locale.getpreferredencoding()` which |
| is not thread-safe.""" |
| return _preferredencoding |
| |
| |
| # -- Plain text formatting |
| |
| def text_width(text, ambiwidth=1): |
| """Determine the column width of `text` in Unicode characters. |
| |
| The characters in the East Asian Fullwidth (F) or East Asian Wide (W) |
| have a column width of 2. The other characters in the East Asian |
| Halfwidth (H) or East Asian Narrow (Na) have a column width of 1. |
| |
| That `ambiwidth` parameter is used for the column width of the East |
| Asian Ambiguous (A). If `1`, the same width as characters in US-ASCII. |
| This is expected by most users. If `2`, twice the width of US-ASCII |
| characters. This is expected by CJK users. |
| |
| cf. http://www.unicode.org/reports/tr11/. |
| """ |
| twice = 'FWA' if ambiwidth == 2 else 'FW' |
| return sum([2 if east_asian_width(chr) in twice else 1 |
| for chr in to_unicode(text)]) |
| |
| _default_ambiwidth = 1 # Default width of East Asian Ambiguous (A) |
| if os.name == 'nt': |
| try: |
| # `ctypes` is available since Python 2.5 |
| import ctypes |
| codepage = ctypes.windll.kernel32.GetConsoleOutputCP() |
| except ImportError: |
| # Try to retrieve the codepage from stderr and stdout |
| codepage = (sys.stderr.encoding or sys.stdout.encoding or '')[2:] |
| codepage = codepage.isdigit() and int(codepage) or 0 |
| |
| if codepage in (932, # Japanese (Shift-JIS) |
| 936, # Chinese Simplified (GB2312) |
| 949, # Korean (Unified Hangul Code) |
| 950): # Chinese Traditional (Big5) |
| _default_ambiwidth = 2 |
| del codepage |
| else: |
| if re.match(r'zh|ja|kr', os.environ.get('LANG') or '', re.IGNORECASE): |
| _default_ambiwidth = 2 |
| |
| |
| def print_table(data, headers=None, sep=' ', out=None, ambiwidth=None): |
| """Print data according to a tabular layout. |
| |
| :param data: a sequence of rows; assume all rows are of equal length. |
| :param headers: an optional row containing column headers; must be of |
| the same length as each row in `data`. |
| :param sep: column separator |
| :param out: output file descriptor (`None` means use `sys.stdout`) |
| :param ambiwidth: column width of the East Asian Ambiguous (A). If None, |
| detect ambiwidth with the locale settings. If others, |
| pass to the `ambiwidth` parameter of `text_width`. |
| """ |
| if out is None: |
| out = sys.stdout |
| charset = getattr(out, 'encoding', None) or 'utf-8' |
| if ambiwidth is None: |
| ambiwidth = _default_ambiwidth |
| data = list(data) |
| if headers: |
| data.insert(0, headers) |
| elif not data: |
| return |
| |
| # Convert to an unicode object with `to_unicode`. If None, convert to a |
| # empty string. |
| def to_text(val): |
| if val is None: |
| return u'' |
| return to_unicode(val) |
| |
| def tw(text): |
| return text_width(text, ambiwidth=ambiwidth) |
| |
| # Convert each cell to an unicode object |
| data = [[to_text(cell) for cell in row] for row in data] |
| |
| num_cols = len(data[0]) |
| col_width = [max(tw(row[idx]) for row in data) |
| for idx in xrange(num_cols)] |
| |
| out.write('\n') |
| for ridx, row in enumerate(data): |
| for cidx, cell in enumerate(row): |
| if headers and ridx == 0: |
| sp = '%*s' % (tw(sep), ' ') # No separator in header |
| else: |
| sp = sep |
| if cidx + 1 == num_cols: |
| sp = '' # No separator after last column |
| |
| line = u'%-*s%s' % (col_width[cidx] - tw(cell) + len(cell), |
| cell, sp) |
| line = line.encode(charset, 'replace') |
| out.write(line) |
| |
| out.write('\n') |
| if ridx == 0 and headers: |
| out.write('-' * (tw(sep) * cidx + sum(col_width))) |
| out.write('\n') |
| out.write('\n') |
| |
| |
| def shorten_line(text, maxlen=75): |
| """Truncates content to at most `maxlen` characters. |
| |
| This tries to be (a bit) clever and attempts to find a proper word |
| boundary for doing so. |
| """ |
| if len(text or '') < maxlen: |
| return text |
| cut = max(text.rfind(' ', 0, maxlen), text.rfind('\n', 0, maxlen)) |
| if cut < 0: |
| cut = maxlen |
| return text[:cut] + ' ...' |
| |
| |
| class UnicodeTextWrapper(textwrap.TextWrapper): |
| breakable_char_ranges = [ |
| (0x1100, 0x11FF), # Hangul Jamo |
| (0x2E80, 0x2EFF), # CJK Radicals Supplement |
| (0x3000, 0x303F), # CJK Symbols and Punctuation |
| (0x3040, 0x309F), # Hiragana |
| (0x30A0, 0x30FF), # Katakana |
| (0x3130, 0x318F), # Hangul Compatibility Jamo |
| (0x3190, 0x319F), # Kanbun |
| (0x31C0, 0x31EF), # CJK Strokes |
| (0x3200, 0x32FF), # Enclosed CJK Letters and Months |
| (0x3300, 0x33FF), # CJK Compatibility |
| (0x3400, 0x4DBF), # CJK Unified Ideographs Extension A |
| (0x4E00, 0x9FFF), # CJK Unified Ideographs |
| (0xA960, 0xA97F), # Hangul Jamo Extended-A |
| (0xAC00, 0xD7AF), # Hangul Syllables |
| (0xD7B0, 0xD7FF), # Hangul Jamo Extended-B |
| (0xF900, 0xFAFF), # CJK Compatibility Ideographs |
| (0xFE30, 0xFE4F), # CJK Compatibility Forms |
| (0xFF00, 0xFFEF), # Halfwidth and Fullwidth Forms |
| (0x20000, 0x2FFFF, u'[\uD840-\uD87F][\uDC00-\uDFFF]'), # Plane 2 |
| (0x30000, 0x3FFFF, u'[\uD880-\uD8BF][\uDC00-\uDFFF]'), # Plane 3 |
| ] |
| |
| split_re = None |
| breakable_re = None |
| |
| @classmethod |
| def _init_patterns(cls): |
| char_ranges = [] |
| surrogate_pairs = [] |
| for val in cls.breakable_char_ranges: |
| try: |
| high = unichr(val[0]) |
| low = unichr(val[1]) |
| char_ranges.append(u'%s-%s' % (high, low)) |
| except ValueError: |
| # Narrow build, `re` cannot use characters >= 0x10000 |
| surrogate_pairs.append(val[2]) |
| char_ranges = u''.join(char_ranges) |
| if surrogate_pairs: |
| pattern = u'(?:[%s]|%s)+' % (char_ranges, |
| u'|'.join(surrogate_pairs)) |
| else: |
| pattern = u'[%s]+' % char_ranges |
| |
| cls.split_re = re.compile( |
| ur'(\s+|' + # any whitespace |
| pattern + u'|' + # breakable text |
| ur'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' + # hyphenated words |
| ur'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash |
| re.UNICODE) |
| cls.breakable_re = re.compile(ur'\A' + pattern, re.UNICODE) |
| |
| def __init__(self, cols, replace_whitespace=0, break_long_words=0, |
| initial_indent='', subsequent_indent='', ambiwidth=1): |
| textwrap.TextWrapper.__init__( |
| self, cols, replace_whitespace=0, break_long_words=0, |
| initial_indent=initial_indent, |
| subsequent_indent=subsequent_indent) |
| self.ambiwidth = ambiwidth |
| if self.split_re is None: |
| self._init_patterns() |
| |
| def _split(self, text): |
| chunks = self.split_re.split(to_unicode(text)) |
| chunks = filter(None, chunks) |
| return chunks |
| |
| def _text_width(self, text): |
| return text_width(text, ambiwidth=self.ambiwidth) |
| |
| def _wrap_chunks(self, chunks): |
| lines = [] |
| chunks.reverse() |
| text_width = self._text_width |
| |
| while chunks: |
| cur_line = [] |
| cur_width = 0 |
| |
| if lines: |
| indent = self.subsequent_indent |
| else: |
| indent = self.initial_indent |
| width = self.width - text_width(indent) |
| |
| if chunks[-1].strip() == '' and lines: |
| del chunks[-1] |
| |
| while chunks: |
| chunk = chunks[-1] |
| w = text_width(chunk) |
| if cur_width + w <= width: |
| cur_line.append(chunks.pop()) |
| cur_width += w |
| elif self.breakable_re.match(chunk): |
| left_space = width - cur_width |
| for i in xrange(len(chunk)): |
| w = text_width(chunk[i]) |
| if left_space < w: |
| break |
| left_space -= w |
| if i > 0: |
| cur_line.append(chunk[:i]) |
| chunk = chunk[i:] |
| chunks[-1] = chunk |
| w = text_width(chunk) |
| break |
| else: |
| break |
| |
| if chunks and w > width: |
| self._handle_long_word(chunks, cur_line, cur_width, width) |
| |
| if cur_line and cur_line[-1].strip() == '': |
| del cur_line[-1] |
| |
| if cur_line: |
| lines.append(indent + ''.join(cur_line)) |
| |
| return lines |
| |
| |
| def wrap(t, cols=75, initial_indent='', subsequent_indent='', |
| linesep=os.linesep, ambiwidth=1): |
| """Wraps the single paragraph in `t`, which contains unicode characters. |
| The every line is at most `cols` characters long. |
| |
| That `ambiwidth` parameter is used for the column width of the East |
| Asian Ambiguous (A). If `1`, the same width as characters in US-ASCII. |
| This is expected by most users. If `2`, twice the width of US-ASCII |
| characters. This is expected by CJK users. |
| """ |
| t = t.strip().replace('\r\n', '\n').replace('\r', '\n') |
| wrapper = UnicodeTextWrapper(cols, replace_whitespace=0, |
| break_long_words=0, |
| initial_indent=initial_indent, |
| subsequent_indent=subsequent_indent, |
| ambiwidth=ambiwidth) |
| wrappedLines = [] |
| for line in t.split('\n'): |
| wrappedLines += wrapper.wrap(line.rstrip()) or [''] |
| return linesep.join(wrappedLines) |
| |
| |
| def obfuscate_email_address(address): |
| """Replace anything looking like an e-mail address (``'@something'``) |
| with a trailing ellipsis (``'@…'``) |
| """ |
| if address: |
| at = address.find('@') |
| if at != -1: |
| return address[:at] + u'@\u2026' + \ |
| ('>' if address[-1] == '>' else '') |
| return address |
| |
| |
| def breakable_path(path): |
| """Make a path breakable after path separators, and conversely, avoid |
| breaking at spaces. |
| """ |
| if not path: |
| return path |
| prefix = '' |
| if path.startswith('/'): # Avoid breaking after a leading / |
| prefix = '/' |
| path = path[1:] |
| return prefix + path.replace('/', u'/\u200b').replace('\\', u'\\\u200b') \ |
| .replace(' ', u'\u00a0') |
| |
| |
| def normalize_whitespace(text, to_space=u'\u00a0', remove=u'\u200b'): |
| """Normalize whitespace in a string, by replacing special spaces by normal |
| spaces and removing zero-width spaces.""" |
| if not text: |
| return text |
| for each in to_space: |
| text = text.replace(each, ' ') |
| for each in remove: |
| text = text.replace(each, '') |
| return text |
| |
| def unquote_label(txt): |
| """Remove (one level of) enclosing single or double quotes. |
| |
| .. versionadded :: 1.0 |
| """ |
| return txt[1:-1] if txt and txt[0] in "'\"" and txt[0] == txt[-1] else txt |
| |
| # -- Conversion |
| |
| def pretty_size(size, format='%.1f'): |
| """Pretty print content size information with appropriate unit. |
| |
| :param size: number of bytes |
| :param format: can be used to adjust the precision shown |
| """ |
| if size is None: |
| return '' |
| |
| jump = 1024 |
| if size < jump: |
| return _('%(size)s bytes', size=size) |
| |
| units = ['KB', 'MB', 'GB', 'TB'] |
| i = 0 |
| while size >= jump and i < len(units): |
| i += 1 |
| size /= 1024. |
| |
| return (format + ' %s') % (size, units[i - 1]) |
| |
| |
| def expandtabs(s, tabstop=8, ignoring=None): |
| """Expand tab characters `'\\\\t'` into spaces. |
| |
| :param tabstop: number of space characters per tab |
| (defaults to the canonical 8) |
| |
| :param ignoring: if not `None`, the expansion will be "smart" and |
| go from one tabstop to the next. In addition, |
| this parameter lists characters which can be |
| ignored when computing the indent. |
| """ |
| if '\t' not in s: |
| return s |
| if ignoring is None: |
| return s.expandtabs(tabstop) |
| |
| outlines = [] |
| for line in s.split('\n'): |
| if '\t' not in line: |
| outlines.append(line) |
| continue |
| p = 0 |
| s = [] |
| for c in line: |
| if c == '\t': |
| n = tabstop - p % tabstop |
| s.append(' ' * n) |
| p += n |
| elif not ignoring or c not in ignoring: |
| p += 1 |
| s.append(c) |
| else: |
| s.append(c) |
| outlines.append(''.join(s)) |
| return '\n'.join(outlines) |
| |
| |
| def fix_eol(text, eol): |
| """Fix end-of-lines in a text.""" |
| lines = text.splitlines() |
| lines.append('') |
| return eol.join(lines) |
| |
| def unicode_to_base64(text, strip_newlines=True): |
| """Safe conversion of ``text`` to base64 representation using |
| utf-8 bytes. |
| |
| Strips newlines from output unless ``strip_newlines`` is `False`. |
| """ |
| text = to_unicode(text) |
| if strip_newlines: |
| return text.encode('utf-8').encode('base64').replace('\n', '') |
| return text.encode('utf-8').encode('base64') |
| |
| def unicode_from_base64(text): |
| """Safe conversion of ``text`` to unicode based on utf-8 bytes.""" |
| return text.decode('base64').decode('utf-8') |
| |
| |
| def levenshtein_distance(lhs, rhs): |
| """Return the Levenshtein distance between two strings.""" |
| if len(lhs) > len(rhs): |
| rhs, lhs = lhs, rhs |
| if not lhs: |
| return len(rhs) |
| |
| prev = range(len(rhs) + 1) |
| for lidx, lch in enumerate(lhs): |
| curr = [lidx + 1] |
| for ridx, rch in enumerate(rhs): |
| cost = (lch != rch) * 2 |
| curr.append(min(prev[ridx + 1] + 1, # deletion |
| curr[ridx] + 1, # insertion |
| prev[ridx] + cost)) # substitution |
| prev = curr |
| return prev[-1] |