| #!/usr/bin/env python |
| """ezt.py -- EaZy Templating |
| |
| For documentation, please see: http://code.google.com/p/ezt/wiki/Syntax |
| """ |
| # |
| # Copyright (C) 2001-2011 Greg Stein. All Rights Reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are |
| # met: |
| # |
| # * Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # |
| # * Redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS |
| # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE |
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| # POSSIBILITY OF SUCH DAMAGE. |
| # |
| # |
| # This software is maintained by Greg and is available at: |
| # http://code.google.com/p/ezt/ |
| # |
| |
| __author__ = 'Greg Stein' |
| __version__ = '1.0' |
| __license__ = 'BSD' |
| |
| import re |
| from types import StringType, IntType, FloatType, LongType |
| import os |
| import urllib |
| try: |
| import cStringIO |
| except ImportError: |
| import StringIO |
| cStringIO = StringIO |
| |
| # |
| # Formatting types |
| # |
| FORMAT_RAW = 'raw' |
| FORMAT_HTML = 'html' |
| FORMAT_XML = 'xml' |
| FORMAT_JS = 'js' |
| FORMAT_URL = 'url' |
| |
| # |
| # This regular expression matches three alternatives: |
| # expr: NEWLINE | DIRECTIVE | BRACKET | COMMENT |
| # DIRECTIVE: '[' ITEM (whitespace ITEM)* '] |
| # ITEM: STRING | NAME |
| # STRING: '"' (not-slash-or-dquote | '\' anychar)* '"' |
| # NAME: (alphanum | '_' | '-' | '.')+ |
| # BRACKET: '[[]' |
| # COMMENT: '[#' not-rbracket* ']' |
| # |
| # When used with the split() method, the return value will be composed of |
| # non-matching text and the three paren groups (NEWLINE, DIRECTIVE and |
| # BRACKET). Since the COMMENT matches are not placed into a group, they are |
| # considered a "splitting" value and simply dropped. |
| # |
| _item = r'(?:"(?:[^\\"]|\\.)*"|[-\w.]+)' |
| _re_parse = re.compile(r'(\r?\n)|\[(%s(?: +%s)*)\]|(\[\[\])|\[#[^\]]*\]' % |
| (_item, _item)) |
| |
| _re_args = re.compile(r'"(?:[^\\"]|\\.)*"|[-\w.]+') |
| |
| # block commands and their argument counts |
| _block_cmd_specs = { 'if-index':2, 'for':1, 'is':2, 'define':1, 'format':1 } |
| _block_cmds = _block_cmd_specs.keys() |
| |
| # two regular expressions for compressing whitespace. the first is used to |
| # compress any whitespace including a newline into a single newline. the |
| # second regex is used to compress runs of whitespace into a single space. |
| _re_newline = re.compile('[ \t\r\f\v]*\n\\s*') |
| _re_whitespace = re.compile(r'\s\s+') |
| |
| # this regex is used to substitute arguments into a value. we split the value, |
| # replace the relevant pieces, and then put it all back together. splitting |
| # will produce a list of: TEXT ( splitter TEXT )*. splitter will be '%' or |
| # an integer. |
| _re_subst = re.compile('%(%|[0-9]+)') |
| |
| class Template: |
| |
| def __init__(self, fname=None, compress_whitespace=1, |
| base_format=FORMAT_RAW): |
| self.compress_whitespace = compress_whitespace |
| if fname: |
| self.parse_file(fname, base_format) |
| |
| def parse_file(self, fname, base_format=FORMAT_RAW): |
| "fname -> a string object with pathname of file containg an EZT template." |
| |
| self.parse(_FileReader(fname), base_format) |
| |
| def parse(self, text_or_reader, base_format=FORMAT_RAW): |
| """Parse the template specified by text_or_reader. |
| |
| The argument should be a string containing the template, or it should |
| specify a subclass of ezt.Reader which can read templates. The base |
| format for printing values is given by base_format. |
| """ |
| if not isinstance(text_or_reader, Reader): |
| # assume the argument is a plain text string |
| text_or_reader = _TextReader(text_or_reader) |
| |
| self.program = self._parse(text_or_reader, |
| base_printer=_parse_format(base_format)) |
| |
| def generate(self, fp, data): |
| if hasattr(data, '__getitem__') or callable(getattr(data, 'keys', None)): |
| # a dictionary-like object was passed. convert it to an |
| # attribute-based object. |
| class _data_ob: |
| def __init__(self, d): |
| vars(self).update(d) |
| data = _data_ob(data) |
| |
| ctx = _context() |
| ctx.data = data |
| ctx.for_index = { } |
| ctx.defines = { } |
| self._execute(self.program, fp, ctx) |
| |
| def _parse(self, reader, for_names=None, file_args=(), base_printer=None): |
| """text -> string object containing the template. |
| |
| This is a private helper function doing the real work for method parse. |
| It returns the parsed template as a 'program'. This program is a sequence |
| made out of strings or (function, argument) 2-tuples. |
| |
| Note: comment directives [# ...] are automatically dropped by _re_parse. |
| """ |
| |
| filename = reader.filename() |
| # parse the template program into: (TEXT NEWLINE DIRECTIVE BRACKET)* TEXT |
| parts = _re_parse.split(reader.text) |
| |
| program = [ ] |
| stack = [ ] |
| if not for_names: |
| for_names = [ ] |
| |
| if base_printer is None: |
| base_printer = () |
| printers = [ base_printer ] |
| |
| one_newline_copied = False |
| line_number = 1 |
| for i in range(len(parts)): |
| piece = parts[i] |
| which = i % 4 # discriminate between: TEXT NEWLINE DIRECTIVE BRACKET |
| if which == 0: |
| # TEXT. append if non-empty. |
| if piece: |
| if self.compress_whitespace: |
| piece = _re_whitespace.sub(' ', piece) |
| program.append(piece) |
| one_newline_copied = False |
| elif which == 1: |
| # NEWLINE. append unless compress_whitespace requested |
| if piece: |
| line_number += 1 |
| if self.compress_whitespace: |
| if not one_newline_copied: |
| program.append('\n') |
| one_newline_copied = True |
| else: |
| program.append(piece) |
| elif which == 3: |
| # BRACKET directive. append '[' if present. |
| if piece: |
| program.append('[') |
| one_newline_copied = False |
| elif piece: |
| # DIRECTIVE is present. |
| one_newline_copied = False |
| args = _re_args.findall(piece) |
| cmd = args[0] |
| if cmd == 'else': |
| if len(args) > 1: |
| raise ArgCountSyntaxError(str(args[1:]), filename, line_number) |
| ### check: don't allow for 'for' cmd |
| idx = stack[-1][1] |
| true_section = program[idx:] |
| del program[idx:] |
| stack[-1][3] = true_section |
| elif cmd == 'end': |
| if len(args) > 1: |
| raise ArgCountSyntaxError(str(args[1:]), filename, line_number) |
| # note: true-section may be None |
| try: |
| cmd, idx, args, true_section, start_line_number = stack.pop() |
| except IndexError: |
| raise UnmatchedEndError(None, filename, line_number) |
| else_section = program[idx:] |
| if cmd == 'format': |
| printers.pop() |
| else: |
| func = getattr(self, '_cmd_' + re.sub('-', '_', cmd)) |
| program[idx:] = [ (func, (args, true_section, else_section), |
| filename, line_number) ] |
| if cmd == 'for': |
| for_names.pop() |
| elif cmd in _block_cmds: |
| if len(args) > _block_cmd_specs[cmd] + 1: |
| raise ArgCountSyntaxError(str(args[1:]), filename, line_number) |
| ### this assumes arg1 is always a ref unless cmd is 'define' |
| if cmd != 'define': |
| args[1] = _prepare_ref(args[1], for_names, file_args) |
| |
| # handle arg2 for the 'is' command |
| if cmd == 'is': |
| args[2] = _prepare_ref(args[2], for_names, file_args) |
| elif cmd == 'for': |
| for_names.append(args[1][0]) # append the refname |
| elif cmd == 'format': |
| if args[1][0]: |
| raise BadFormatConstantError(str(args[1:]), filename, line_number) |
| printers.append(_parse_format(args[1][1])) |
| |
| # remember the cmd, current pos, args, and a section placeholder |
| stack.append([cmd, len(program), args[1:], None, line_number]) |
| elif cmd == 'include' or cmd == 'insertfile': |
| is_insertfile = (cmd == 'insertfile') |
| # extra arguments are meaningless when using insertfile |
| if is_insertfile and len(args) != 2: |
| raise ArgCountSyntaxError(str(args), filename, line_number) |
| if args[1][0] == '"': |
| include_filename = args[1][1:-1] |
| if is_insertfile: |
| program.append(reader.read_other(include_filename).text) |
| else: |
| f_args = [ ] |
| for arg in args[2:]: |
| f_args.append(_prepare_ref(arg, for_names, file_args)) |
| program.extend(self._parse(reader.read_other(include_filename), |
| for_names, f_args, printers[-1])) |
| else: |
| if len(args) != 2: |
| raise ArgCountSyntaxError(str(args), filename, line_number) |
| if is_insertfile: |
| cmd = self._cmd_insertfile |
| else: |
| cmd = self._cmd_include |
| program.append((cmd, |
| (_prepare_ref(args[1], for_names, file_args), |
| reader, printers[-1]), filename, line_number)) |
| elif cmd == 'if-any': |
| f_args = [ ] |
| for arg in args[1:]: |
| f_args.append(_prepare_ref(arg, for_names, file_args)) |
| stack.append(['if-any', len(program), f_args, None, line_number]) |
| else: |
| # implied PRINT command |
| if len(args) > 1: |
| f_args = [ ] |
| for arg in args: |
| f_args.append(_prepare_ref(arg, for_names, file_args)) |
| program.append((self._cmd_subst, |
| (printers[-1], f_args[0], f_args[1:]), |
| filename, line_number)) |
| else: |
| valref = _prepare_ref(args[0], for_names, file_args) |
| program.append((self._cmd_print, (printers[-1], valref), |
| filename, line_number)) |
| |
| if stack: |
| raise UnclosedBlocksError('Block opened at line %s' % stack[-1][4], |
| filename=filename) |
| return program |
| |
| def _execute(self, program, fp, ctx): |
| """This private helper function takes a 'program' sequence as created |
| by the method '_parse' and executes it step by step. strings are written |
| to the file object 'fp' and functions are called. |
| """ |
| for step in program: |
| if isinstance(step, StringType): |
| fp.write(step) |
| else: |
| method, method_args, filename, line_number = step |
| method(method_args, fp, ctx, filename, line_number) |
| |
| def _cmd_print(self, (transforms, valref), fp, ctx, filename, line_number): |
| value = _get_value(valref, ctx, filename, line_number) |
| # if the value has a 'read' attribute, then it is a stream: copy it |
| if hasattr(value, 'read'): |
| while 1: |
| chunk = value.read(16384) |
| if not chunk: |
| break |
| for t in transforms: |
| chunk = t(chunk) |
| fp.write(chunk) |
| else: |
| for t in transforms: |
| value = t(value) |
| fp.write(value) |
| |
| def _cmd_subst(self, (transforms, valref, args), fp, ctx, filename, |
| line_number): |
| fmt = _get_value(valref, ctx, filename, line_number) |
| parts = _re_subst.split(fmt) |
| for i in range(len(parts)): |
| piece = parts[i] |
| if i%2 == 1 and piece != '%': |
| idx = int(piece) |
| if idx < len(args): |
| piece = _get_value(args[idx], ctx, filename, line_number) |
| else: |
| piece = '<undef>' |
| for t in transforms: |
| piece = t(piece) |
| fp.write(piece) |
| |
| def _cmd_include(self, (valref, reader, printer), fp, ctx, filename, |
| line_number): |
| fname = _get_value(valref, ctx, filename, line_number) |
| ### note: we don't have the set of for_names to pass into this parse. |
| ### I don't think there is anything to do but document it |
| self._execute(self._parse(reader.read_other(fname), base_printer=printer), |
| fp, ctx) |
| |
| def _cmd_insertfile(self, (valref, reader, printer), fp, ctx, filename, |
| line_number): |
| fname = _get_value(valref, ctx, filename, line_number) |
| fp.write(reader.read_other(fname).text) |
| |
| def _cmd_if_any(self, args, fp, ctx, filename, line_number): |
| "If any value is a non-empty string or non-empty list, then T else F." |
| (valrefs, t_section, f_section) = args |
| value = 0 |
| for valref in valrefs: |
| if _get_value(valref, ctx, filename, line_number): |
| value = 1 |
| break |
| self._do_if(value, t_section, f_section, fp, ctx) |
| |
| def _cmd_if_index(self, args, fp, ctx, filename, line_number): |
| ((valref, value), t_section, f_section) = args |
| list, idx = ctx.for_index[valref[0]] |
| if value == 'even': |
| value = idx % 2 == 0 |
| elif value == 'odd': |
| value = idx % 2 == 1 |
| elif value == 'first': |
| value = idx == 0 |
| elif value == 'last': |
| value = idx == len(list)-1 |
| else: |
| value = idx == int(value) |
| self._do_if(value, t_section, f_section, fp, ctx) |
| |
| def _cmd_is(self, args, fp, ctx, filename, line_number): |
| ((left_ref, right_ref), t_section, f_section) = args |
| right_value = _get_value(right_ref, ctx, filename, line_number) |
| left_value = _get_value(left_ref, ctx, filename, line_number) |
| value = left_value.lower() == right_value.lower() |
| self._do_if(value, t_section, f_section, fp, ctx) |
| |
| def _do_if(self, value, t_section, f_section, fp, ctx): |
| if t_section is None: |
| t_section = f_section |
| f_section = None |
| if value: |
| section = t_section |
| else: |
| section = f_section |
| if section is not None: |
| self._execute(section, fp, ctx) |
| |
| def _cmd_for(self, args, fp, ctx, filename, line_number): |
| ((valref,), unused, section) = args |
| list = _get_value(valref, ctx, filename, line_number) |
| refname = valref[0] |
| if isinstance(list, StringType): |
| raise NeedSequenceError(refname, filename, line_number) |
| ctx.for_index[refname] = idx = [ list, 0 ] |
| for item in list: |
| self._execute(section, fp, ctx) |
| idx[1] = idx[1] + 1 |
| del ctx.for_index[refname] |
| |
| def _cmd_define(self, args, fp, ctx, filename, line_number): |
| ((name,), unused, section) = args |
| valfp = cStringIO.StringIO() |
| if section is not None: |
| self._execute(section, valfp, ctx) |
| ctx.defines[name] = valfp.getvalue() |
| |
| def boolean(value): |
| "Return a value suitable for [if-any bool_var] usage in a template." |
| if value: |
| return 'yes' |
| return None |
| |
| |
| def _prepare_ref(refname, for_names, file_args): |
| """refname -> a string containing a dotted identifier. example:"foo.bar.bang" |
| for_names -> a list of active for sequences. |
| |
| Returns a `value reference', a 3-tuple made out of (refname, start, rest), |
| for fast access later. |
| """ |
| # is the reference a string constant? |
| if refname[0] == '"': |
| return None, refname[1:-1], None |
| |
| parts = refname.split('.') |
| start = parts[0] |
| rest = parts[1:] |
| |
| # if this is an include-argument, then just return the prepared ref |
| if start[:3] == 'arg': |
| try: |
| idx = int(start[3:]) |
| except ValueError: |
| pass |
| else: |
| if idx < len(file_args): |
| orig_refname, start, more_rest = file_args[idx] |
| if more_rest is None: |
| # the include-argument was a string constant |
| return None, start, None |
| |
| # prepend the argument's "rest" for our further processing |
| rest[:0] = more_rest |
| |
| # rewrite the refname to ensure that any potential 'for' processing |
| # has the correct name |
| ### this can make it hard for debugging include files since we lose |
| ### the 'argNNN' names |
| if not rest: |
| return start, start, [ ] |
| refname = start + '.' + '.'.join(rest) |
| |
| if for_names: |
| # From last to first part, check if this reference is part of a for loop |
| for i in range(len(parts), 0, -1): |
| name = '.'.join(parts[:i]) |
| if name in for_names: |
| return refname, name, parts[i:] |
| |
| return refname, start, rest |
| |
| def _get_value((refname, start, rest), ctx, filename, line_number): |
| """(refname, start, rest) -> a prepared `value reference' (see above). |
| ctx -> an execution context instance. |
| |
| Does a name space lookup within the template name space. Active |
| for blocks take precedence over data dictionary members with the |
| same name. |
| """ |
| if rest is None: |
| # it was a string constant |
| return start |
| |
| # get the starting object |
| if ctx.for_index.has_key(start): |
| list, idx = ctx.for_index[start] |
| ob = list[idx] |
| elif ctx.defines.has_key(start): |
| ob = ctx.defines[start] |
| elif hasattr(ctx.data, start): |
| ob = getattr(ctx.data, start) |
| else: |
| raise UnknownReference(refname, filename, line_number) |
| |
| # walk the rest of the dotted reference |
| for attr in rest: |
| try: |
| ob = getattr(ob, attr) |
| except AttributeError: |
| raise UnknownReference(refname, filename, line_number) |
| |
| # make sure we return a string instead of some various Python types |
| if isinstance(ob, (IntType, FloatType, LongType)): |
| return str(ob) |
| if ob is None: |
| return '' |
| |
| # string or a sequence |
| return ob |
| |
| def _replace(s, replace_map): |
| for orig, repl in replace_map: |
| s = s.replace(orig, repl) |
| return s |
| |
| REPLACE_JS_MAP = ( |
| ('\\', r'\\'), ('\t', r'\t'), ('\n', r'\n'), ('\r', r'\r'), |
| ('"', r'\x22'), ('\'', r'\x27'), ('&', r'\x26'), |
| ('<', r'\x3c'), ('>', r'\x3e'), ('=', r'\x3d'), |
| ) |
| |
| # Various unicode whitespace |
| REPLACE_JS_UNICODE_MAP = ( |
| (u'\u0085', r'\u0085'), (u'\u2028', r'\u2028'), (u'\u2029', r'\u2029'), |
| ) |
| |
| # Why not cgi.escape? It doesn't do single quotes which are occasionally |
| # used to contain HTML attributes and event handler definitions (unfortunately) |
| REPLACE_HTML_MAP = ( |
| ('&', '&'), ('<', '<'), ('>', '>'), |
| ('"', '"'), ('\'', '''), |
| ) |
| |
| def _js_escape(s): |
| s = _replace(s, REPLACE_JS_MAP) |
| ### perhaps attempt to coerce the string to unicode and then replace? |
| if isinstance(s, unicode): |
| s = _replace(s, REPLACE_JS_UNICODE_MAP) |
| return s |
| |
| def _html_escape(s): |
| return _replace(s, REPLACE_HTML_MAP) |
| |
| def _url_escape(s): |
| ### quote_plus barfs on non-ASCII characters. According to |
| ### http://www.w3.org/International/O-URL-code.html URIs should be |
| ### UTF-8 encoded first. |
| if isinstance(s, unicode): |
| s = s.encode('utf8') |
| return urllib.quote_plus(s) |
| |
| FORMATTERS = { |
| FORMAT_RAW: None, |
| FORMAT_HTML: _html_escape, |
| FORMAT_XML: _html_escape, ### use the same quoting as HTML for now |
| FORMAT_JS: _js_escape, |
| FORMAT_URL: _url_escape, |
| } |
| |
| def _parse_format(format_string=FORMAT_RAW): |
| format_funcs = [] |
| try: |
| for fspec in format_string.split(','): |
| format_func = FORMATTERS[fspec] |
| if format_func is not None: |
| format_funcs.append(format_func) |
| except KeyError: |
| raise UnknownFormatConstantError(format_string) |
| return format_funcs |
| |
| class _context: |
| """A container for the execution context""" |
| |
| |
| class Reader: |
| """Abstract class which allows EZT to detect Reader objects.""" |
| def filename(self): |
| return '(%s does not provide filename() method)' % repr(self) |
| |
| class _FileReader(Reader): |
| """Reads templates from the filesystem.""" |
| def __init__(self, fname): |
| self.text = open(fname, 'rb').read() |
| self._dir = os.path.dirname(fname) |
| self.fname = fname |
| def read_other(self, relative): |
| return _FileReader(os.path.join(self._dir, relative)) |
| def filename(self): |
| return self.fname |
| |
| class _TextReader(Reader): |
| """'Reads' a template from provided text.""" |
| def __init__(self, text): |
| self.text = text |
| def read_other(self, relative): |
| raise BaseUnavailableError() |
| def filename(self): |
| return '(text)' |
| |
| |
| class EZTException(Exception): |
| """Parent class of all EZT exceptions.""" |
| def __init__(self, message=None, filename=None, line_number=None): |
| self.message = message |
| self.filename = filename |
| self.line_number = line_number |
| def __str__(self): |
| ret = [] |
| if self.message is not None: |
| ret.append(self.message) |
| if self.filename is not None: |
| ret.append('in file ' + str(self.filename)) |
| if self.line_number is not None: |
| ret.append('at line ' + str(self.line_number)) |
| return ' '.join(ret) |
| |
| class ArgCountSyntaxError(EZTException): |
| """A bracket directive got the wrong number of arguments.""" |
| |
| class UnknownReference(EZTException): |
| """The template references an object not contained in the data dictionary.""" |
| |
| class NeedSequenceError(EZTException): |
| """The object dereferenced by the template is no sequence (tuple or list).""" |
| |
| class UnclosedBlocksError(EZTException): |
| """This error may be simply a missing [end].""" |
| |
| class UnmatchedEndError(EZTException): |
| """This error may be caused by a misspelled if directive.""" |
| |
| class BaseUnavailableError(EZTException): |
| """Base location is unavailable, which disables includes.""" |
| |
| class BadFormatConstantError(EZTException): |
| """Format specifiers must be string constants.""" |
| |
| class UnknownFormatConstantError(EZTException): |
| """The format specifier is an unknown value.""" |
| |
| |
| # --- standard test environment --- |
| def test_parse(): |
| assert _re_parse.split('[a]') == ['', '[a]', None, ''] |
| assert _re_parse.split('[a] [b]') == \ |
| ['', '[a]', None, ' ', '[b]', None, ''] |
| assert _re_parse.split('[a c] [b]') == \ |
| ['', '[a c]', None, ' ', '[b]', None, ''] |
| assert _re_parse.split('x [a] y [b] z') == \ |
| ['x ', '[a]', None, ' y ', '[b]', None, ' z'] |
| assert _re_parse.split('[a "b" c "d"]') == \ |
| ['', '[a "b" c "d"]', None, ''] |
| assert _re_parse.split(r'["a \"b[foo]" c.d f]') == \ |
| ['', '["a \\"b[foo]" c.d f]', None, ''] |
| |
| def _test(argv): |
| import doctest, ezt |
| verbose = "-v" in argv |
| return doctest.testmod(ezt, verbose=verbose) |
| |
| if __name__ == "__main__": |
| # invoke unit test for this module: |
| import sys |
| sys.exit(_test(sys.argv)[0]) |