| import re |
| import logging |
| from urlparse import urljoin |
| |
| from tg import config |
| from pylons import request |
| from BeautifulSoup import BeautifulSoup |
| |
| import markdown |
| import feedparser |
| |
| from . import macro |
| from . import helpers as h |
| from allura import model as M |
| |
| log = logging.getLogger(__name__) |
| |
| PLAINTEXT_BLOCK_RE = re.compile( \ |
| r'(?P<bplain>\[plain\])(?P<code>.*?)(?P<eplain>\[\/plain\])', |
| re.MULTILINE|re.DOTALL |
| ) |
| |
| MACRO_PATTERN = r'\[\[([^\]\[]+)\]\]' |
| |
| |
| class ForgeExtension(markdown.Extension): |
| |
| def __init__(self, wiki=False, email=False, macro_context=None): |
| markdown.Extension.__init__(self) |
| self._use_wiki = wiki |
| self._is_email = email |
| self._macro_context = macro_context |
| |
| def extendMarkdown(self, md, md_globals): |
| md.registerExtension(self) |
| # allow markdown within e.g. <div markdown>...</div> More info at: https://github.com/waylan/Python-Markdown/issues/52 |
| md.preprocessors['html_block'].markdown_in_raw = True |
| md.preprocessors['fenced-code'] = FencedCodeProcessor() |
| md.preprocessors.add('plain_text_block', PlainTextPreprocessor(md), "_begin") |
| md.inlinePatterns['autolink_1'] = AutolinkPattern(r'(http(?:s?)://[a-zA-Z0-9./\-_0%?&=+#;~:]+)') |
| # replace the link pattern with our extended version |
| md.inlinePatterns['link'] = ForgeLinkPattern(markdown.inlinepatterns.LINK_RE, md, ext=self) |
| md.inlinePatterns['short_reference'] = ForgeLinkPattern(markdown.inlinepatterns.SHORT_REF_RE, md, ext=self) |
| # macro must be processed before links |
| md.inlinePatterns.add('macro', ForgeMacroPattern(MACRO_PATTERN, md, ext=self), '<link') |
| self.forge_link_tree_processor = ForgeLinkTreeProcessor(md) |
| md.treeprocessors['links'] = self.forge_link_tree_processor |
| # Sanitize HTML |
| md.postprocessors['sanitize_html'] = HTMLSanitizer() |
| # Rewrite all relative links that don't start with . to have a '../' prefix |
| md.postprocessors['rewrite_relative_links'] = RelativeLinkRewriter( |
| make_absolute=self._is_email) |
| # Put a class around markdown content for custom css |
| md.postprocessors['add_custom_class'] = AddCustomClass() |
| md.postprocessors['mark_safe'] = MarkAsSafe() |
| |
| def reset(self): |
| self.forge_link_tree_processor.reset() |
| |
| |
| class ForgeLinkPattern(markdown.inlinepatterns.LinkPattern): |
| |
| artifact_re = re.compile(r'((.*?):)?((.*?):)?(.+)') |
| |
| def __init__(self, *args, **kwargs): |
| self.ext = kwargs.pop('ext') |
| markdown.inlinepatterns.LinkPattern.__init__(self, *args, **kwargs) |
| |
| def handleMatch(self, m): |
| el = markdown.util.etree.Element('a') |
| el.text = m.group(2) |
| is_link_with_brackets = False |
| try: |
| href = m.group(9) |
| except IndexError: |
| href = m.group(2) |
| is_link_with_brackets = True |
| try: |
| title = m.group(13) |
| except IndexError: |
| title = None |
| |
| if href: |
| if href == 'TOC': |
| return '[TOC]' # skip TOC |
| if self.artifact_re.match(href): |
| href, classes = self._expand_alink(href, is_link_with_brackets) |
| el.set('href', self.sanitize_url(self.unescape(href.strip()))) |
| el.set('class', classes) |
| else: |
| el.set('href', '') |
| |
| if title: |
| title = markdown.inlinepatterns.dequote(self.unescape(title)) |
| el.set('title', title) |
| |
| return el |
| |
| def _expand_alink(self, link, is_link_with_brackets): |
| '''Return (href, classes) for an artifact link''' |
| classes = '' |
| if is_link_with_brackets: |
| classes = 'alink' |
| href = link |
| shortlink = M.Shortlink.lookup(link) |
| if shortlink: |
| href = shortlink.url |
| self.ext.forge_link_tree_processor.alinks.append(shortlink) |
| elif self.ext._use_wiki and is_link_with_brackets: |
| href = h.urlquote(link) |
| classes += ' notfound' |
| attach_link = link.split('/attachment/') |
| if len(attach_link) == 2 and self.ext._use_wiki: |
| shortlink = M.Shortlink.lookup(attach_link[0]) |
| if shortlink: |
| attach_status = ' notfound' |
| for attach in shortlink.ref.artifact.attachments: |
| if attach.filename == attach_link[1]: |
| attach_status = '' |
| classes += attach_status |
| return href, classes |
| |
| |
| class PlainTextPreprocessor(markdown.preprocessors.Preprocessor): |
| ''' |
| This was used earlier for [plain] tags that the Blog tool's rss importer |
| created, before html2text did good escaping of all special markdown chars. |
| Can be deprecated. |
| ''' |
| |
| def run(self, lines): |
| text = "\n".join(lines) |
| while 1: |
| res = PLAINTEXT_BLOCK_RE.finditer(text) |
| for m in res: |
| code = self._escape(m.group('code')) |
| placeholder = self.markdown.htmlStash.store(code, safe=True) |
| text = '%s%s%s'% (text[:m.start()], placeholder, text[m.end():]) |
| break |
| else: |
| break |
| return text.split("\n") |
| |
| def _escape(self, txt): |
| """ basic html escaping """ |
| txt = txt.replace('&', '&') |
| txt = txt.replace('<', '<') |
| txt = txt.replace('>', '>') |
| txt = txt.replace('"', '"') |
| return txt |
| |
| |
| class FencedCodeProcessor(markdown.preprocessors.Preprocessor): |
| pattern = '~~~~' |
| |
| def run(self, lines): |
| in_block = False |
| new_lines = [] |
| for line in lines: |
| if line.lstrip().startswith(self.pattern): |
| in_block = not in_block |
| continue |
| if in_block: |
| new_lines.append(' ' + line) |
| else: |
| new_lines.append(line) |
| return new_lines |
| |
| |
| class ForgeMacroPattern(markdown.inlinepatterns.Pattern): |
| |
| def __init__(self, *args, **kwargs): |
| self.ext = kwargs.pop('ext') |
| self.macro = macro.parse(self.ext._macro_context) |
| markdown.inlinepatterns.Pattern.__init__(self, *args, **kwargs) |
| |
| def handleMatch(self, m): |
| html = self.macro(m.group(2)) |
| placeholder = self.markdown.htmlStash.store(html) |
| return placeholder |
| |
| |
| class ForgeLinkTreeProcessor(markdown.treeprocessors.Treeprocessor): |
| '''Wraps artifact links with []''' |
| |
| def __init__(self, parent): |
| self.parent = parent |
| self.alinks = [] |
| |
| def run(self, root): |
| for node in root.getiterator('a'): |
| if 'alink' in node.get('class', '').split() and node.text: |
| node.text = '[' + node.text + ']' |
| return root |
| |
| def reset(self): |
| self.alinks = [] |
| |
| |
| class MarkAsSafe(markdown.postprocessors.Postprocessor): |
| |
| def run(self, text): |
| return h.html.literal(text) |
| |
| |
| class AddCustomClass(markdown.postprocessors.Postprocessor): |
| |
| def run(self, text): |
| return '<div class="markdown_content">%s</div>' % text |
| |
| |
| class RelativeLinkRewriter(markdown.postprocessors.Postprocessor): |
| |
| def __init__(self, make_absolute=False): |
| self._make_absolute = make_absolute |
| |
| def run(self, text): |
| try: |
| if not request.path_info.endswith('/'): return text |
| except: |
| # Must be being called outside the request context |
| pass |
| soup = BeautifulSoup(text) |
| if self._make_absolute: |
| rewrite = self._rewrite_abs |
| else: |
| rewrite = self._rewrite |
| for link in soup.findAll('a'): |
| rewrite(link, 'href') |
| for link in soup.findAll('img'): |
| rewrite(link, 'src') |
| return unicode(soup) |
| |
| def _rewrite(self, tag, attr): |
| val = tag.get(attr) |
| if val is None: return |
| if ' ' in val: |
| # Don't urllib.quote to avoid possible double-quoting |
| # just make sure no spaces |
| val = val.replace(' ', '%20') |
| tag[attr] = val |
| if '://' in val: |
| if 'sf.net' in val or 'sourceforge.net' in val: |
| return |
| else: |
| tag['rel']='nofollow' |
| return |
| if val.startswith('/'): return |
| if val.startswith('.'): return |
| if val.startswith('mailto:'): return |
| if val.startswith('#'): return |
| tag[attr] = '../' + val |
| |
| def _rewrite_abs(self, tag, attr): |
| self._rewrite(tag, attr) |
| val = tag.get(attr) |
| val = urljoin(config.get('base_url', 'http://sourceforge.net/'),val) |
| tag[attr] = val |
| |
| |
| class HTMLSanitizer(markdown.postprocessors.Postprocessor): |
| |
| def run(self, text): |
| try: |
| p = feedparser._HTMLSanitizer('utf-8') |
| except TypeError: # $@%## pre-released versions from SOG |
| p = feedparser._HTMLSanitizer('utf-8', '') |
| p.feed(text.encode('utf-8')) |
| return unicode(p.output(), 'utf-8') |
| |
| |
| class AutolinkPattern(markdown.inlinepatterns.LinkPattern): |
| |
| def handleMatch(self, mo): |
| old_link = mo.group(2) |
| result = markdown.util.etree.Element('a') |
| result.text = old_link |
| result.set('href', old_link) |
| return result |