blob: 22c18ade49be9069550ef1142f7e4292e5bc953d [file] [log] [blame]
Generates Table of Contents for markdown.
Only generates a ToC for the headers FOLLOWING th [TOC] tag,
so you can insert it after a specific section that need not be
include in the ToC.
from __future__ import unicode_literals
import logging
import re
from bs4 import BeautifulSoup, Comment
from pelican import contents, signals
from pelican.utils import slugify
logger = logging.getLogger(__name__)
IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
def unique(id, ids):
""" Ensure id is unique in set of ids. Append '_1', '_2'... if not """
while id in ids or not id:
m = IDCOUNT_RE.match(id)
if m:
id = '%s_%d' % (, int( + 1)
id = '%s_%d' % (id, 1)
return id
class HtmlTreeNode(object):
def __init__(self, parent, header, level, id):
self.children = []
self.parent = parent
self.header = header
self.level = level = id
def add(self, new_header, ids):
new_level =
new_string = new_header.string
new_id = new_header.attrs.get('id')
if not new_string:
new_string = new_header.find_all(
text=lambda t: not isinstance(t, Comment),
new_string = "".join(new_string)
if not new_id:
new_id = slugify(new_string, ())
new_id = unique(new_id, ids) # make sure id is unique
new_header.attrs['id'] = new_id
if(self.level < new_level):
new_node = HtmlTreeNode(self, new_string, new_level, new_id)
self.children += [new_node]
return new_node, new_header
elif(self.level == new_level):
new_node = HtmlTreeNode(self.parent, new_string, new_level, new_id)
self.parent.children += [new_node]
return new_node, new_header
elif(self.level > new_level):
return self.parent.add(new_header, ids)
def __str__(self):
ret = ""
if self.parent:
ret = "<a class='toc-href' href='#{0}' title='{1}'>{1}</a>".format(, self.header)
if self.children:
ret += "<ul>{}</ul>".format('{}'*len(self.children)).format(
if self.parent:
ret = "<li>{}</li>".format(ret)
if not self.parent:
ret = "<div id='toc' style='border-radius: 3px; border: 1px solid #999; background-color: #EEE; padding: 4px;'><h4>Table of Contents:</h4><ul>{}</ul></div>".format(ret)
return ret
def init_default_config(pelican):
from pelican.settings import DEFAULT_CONFIG
'TOC_HEADERS': '^h[1-6]',
'TOC_RUN': 'true'
pelican.settings.setdefault('TOC', TOC_DEFAULT)
def generate_toc(content):
if isinstance(content, contents.Static):
all_ids = set()
title = content.metadata.get('title', 'Title')
tree = node = HtmlTreeNode(None, title, 'h0', '')
soup = BeautifulSoup(content._content, 'html.parser')
settoc = False
header_re = re.compile(content.metadata.get(
'toc_headers', content.settings['TOC']['TOC_HEADERS']))
except re.error as e:
logger.error("TOC_HEADERS '%s' is not a valid re\n%s",
raise e
# Find TOC tag
tocTag = soup.find('p', text = '[TOC]')
if tocTag:
for header in tocTag.findAllNext(header_re):
settoc = True
node, new_header = node.add(header, all_ids)
header.replaceWith(new_header) # to get our ids back into soup
if settoc:
print("Generating ToC for %s" % content.slug)
tree_string = '{}'.format(tree)
tree_soup = BeautifulSoup(tree_string, 'html.parser')
content.toc = tree_soup.decode(formatter='html')
itoc = soup.find('p', text = '[TOC]')
if itoc:
content._content = soup.decode(formatter='html')
def register():