plugins/asfgenid.py - infrastructure-pelican - Git at Google

 '''
 asfgenid
 ===================================
 Generates HeadingIDs, ElementID, and PermaLinks
 First find all specified IDs and classes. Assure unique ID and permalink
 Next find all headings missing IDs. Assure unique ID and permalink
 Generates a Table of Content
 '''

 # from __future__ import unicode_literals

 import sys
 import traceback
 import re
 import unicodedata

 from bs4 import BeautifulSoup, Comment

 import pelican.contents
 import pelican.plugins.signals

 '''
 Based on
 https://github.com/waylan/Python-Markdown/blob/master/markdown/extensions/headerid.py
 Which is BSD licensed, but is very much rewritten.
 '''

 ASF_GENID = {
     'unsafe_tags': True,        # fix script, style, and iframe html that gfm filters as unsafe
     'metadata': True,           # {{ metadata }} inclusion of data in the html.
     'elements': True,	        # {#id} and {.class} annotations.
     'headings': True,	        # add slugified id to headings missing id. Can be overridden by page metadata.
     'headings_re': r'^h[1-6]',  # regex for which headings to check.
     'permalinks': True,	        # add permalinks to elements and headings when id is added.
     'toc': True,  	        # check for [TOC] and add Table of Content if present.
     'toc_headers': r'h[1-6]',   # regex for which headings to include in the [TOC]
     'tables': True,	        # add class="table" for tables missing class.
     'debug': False
 }

 # Fixup tuples for HTML that GFM makes into text.
 # Fixup [ and ] that download templates use for ezt.
 FIXUP_UNSAFE = [
     (re.compile(r'&lt;script'), '<script'),
     (re.compile(r'&lt;/script'), '</script'),
     (re.compile(r'&lt;style'), '<style'),
     (re.compile(r'&lt;/style'), '</style'),
     (re.compile(r'&lt;iframe'), '<iframe'),
     (re.compile(r'&lt;/iframe'), '</iframe'),
     (re.compile(r'%5B'), '['),
     (re.compile(r'%5D'), ']'),
 ]

 # Find {{ metadata }} inclusions
 METADATA_RE = re.compile(r'{{\s*(?P<meta>[-_:a-zA-Z0-9]+)\s*}}')

 # Find {#id} or {.class} elementid annotations
 ELEMENTID_RE = re.compile(r'(?:[ \t]*[{\[][ \t]*(?P<type>[#.])(?P<id>[-._:a-zA-Z0-9 ]+)[}\]])(\n|$)')

 # ID duplicates match
 IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')

 # For permalinks
 LINK_CHAR = '¶'

 # strip permalink chars from headings for ToC
 PARA_MAP = {
     ord(LINK_CHAR): None
 }

 # Find table tags - to check for ones without class attribute.
 TABLE_RE = re.compile(r'^table')


 # An item in a Table of Contents - from toc.py
 class HtmlTreeNode(object):
     def __init__(self, parent, header, level, tag_id):
         self.children = []
         self.parent = parent
         self.header = header
         self.level = level
         self.tag_id = tag_id

     def add(self, new_header):
         new_level = new_header.name
         new_string = new_header.string
         new_id = new_header.attrs.get('id')

         if not new_string:
             new_string = new_header.find_all(
                 text=lambda t: not isinstance(t, Comment),
                 recursive=True)
             new_string = ''.join(new_string)
         new_string = new_string.translate(PARA_MAP)

         if self.level < new_level:
             new_node = HtmlTreeNode(self, new_string, new_level, new_id)
             self.children += [new_node]
             return new_node, new_header
         elif self.level == new_level:
             new_node = HtmlTreeNode(self.parent, new_string, new_level, new_id)
             self.parent.children += [new_node]
             return new_node, new_header
         elif self.level > new_level:
             return self.parent.add(new_header)

     def __str__(self):
         ret = ''
         if self.parent:
             ret = "<a class='toc-href' href='#{0}' title='{1}'>{1}</a>".format(
                 self.tag_id, self.header)

         if self.children:
             ret += "<ul>{}</ul>".format('{}' * len(self.children)).format(
                 *self.children)

         if self.parent:
             ret = "<li>{}</li>".format(ret)

         if not self.parent:
             ret = "<div id='toc'>{}</div>".format(ret)

         return ret


 # assure configuration
 def init_default_config(pelican):
     from pelican.settings import DEFAULT_CONFIG

     DEFAULT_CONFIG.setdefault('ASF_GENID', ASF_GENID)
     if(pelican):
         pelican.settings.setdefault('ASF_GENID', ASF_GENID)


 # from Apache CMS markdown/extensions/headerid.py - slugify in the same way as the Apache CMS
 def slugify(value, separator):
     """ Slugify a string, to make it URL friendly. """
     value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
     value = re.sub('[^\\w\\s-]', '', value.decode('ascii')).strip().lower()
     return re.sub('[%s\\s]+' % separator, separator, value)


 # Ensure an id is unique in a set of ids. Append '_1', '_2'... if not
 def unique(tag_id, ids):
     while tag_id in ids or not tag_id:
         m = IDCOUNT_RE.match(tag_id)
         print(f'WARNING: id="{tag_id}" is a duplicate')
         if m:
             tag_id = '%s_%d' % (m.group(1), int(m.group(2)) + 1)
         else:
             tag_id = '%s_%d' % (tag_id, 1)
     ids.add(tag_id)
     return tag_id


 # append a permalink
 def permalink(soup, mod_element):
     new_tag = soup.new_tag('a', href='#' + mod_element['id'])
     new_tag['class'] = 'headerlink'
     new_tag['title'] = 'Permalink'
     new_tag.string = LINK_CHAR
     mod_element.append(new_tag)


 # fixup cmark content - note that this may be too hungry. It may need to occur later and skipped in codeblock and pre tags.
 def fixup_content(content):
     text = content._content
     modified = False
     # Find messed up html
     for regex, replace in FIXUP_UNSAFE:
         m = regex.search(text)
         if m:
             modified = True
             text = re.sub(regex, replace, text)
     if modified:
         content._content = text


 # expand metadata found in {{ key }}
 def expand_metadata(tag, metadata, debug):
     this_string = str(tag.string)
     m = 1
     modified = False
     while m:
         m = METADATA_RE.search(this_string)
         if m:
             this_data = m.group(1).strip()
             format_string = '{{{0}}}'.format(this_data)
             try:
                 new_string = format_string.format(**metadata)
                 if debug:
                     print(f'{{{{{m.group(1)}}}}} -> {new_string}')
             except Exception:
                 # the data expression was not found
                 print(f'{{{{{m.group(1)}}}}} is not found')
                 new_string = format_string
             # replace the first pattern with the new_string
             this_string = re.sub(METADATA_RE, new_string, this_string, count=1)
             modified = True
     if modified:
         tag.string.replace_with(this_string)


 # do elementid transformation for {#id} and {.class} attribute annotations.
 def elementid_transform(ids, soup, tag, permalinks, perma_set, debug):
     tagnav = tag.parent
     this_string = str(tag.string)
     if debug:
         print(f'name = {tagnav.name}, string = {this_string}')
     if tagnav.name not in ['[document]', 'code', 'pre']:
         m = ELEMENTID_RE.search(tag.string)
         if m:
             # this replacement could be better it truncates and likely drops additional annotations
             tag.string.replace_with(this_string[:m.start()])
             if m.group('type') == '#':
                 # id attribute annotation
                 tagnav['id'] = unique(m.group('id'), ids)
                 if permalinks:
                     permalink(soup, tagnav)
                     unique(tagnav['id'], perma_set)
                 if debug:
                     print(f'# insertion {tagnav}')
             else:
                 # class attribute annotation (regex only recognizes the two types)
                 tagnav['class'] = m.group('id')
                 if debug:
                     print(f'Class {tag.name} : {tagnav["class"]}')


 # generate id for a heading
 def headingid_transform(ids, soup, tag, permalinks, perma_set):
     new_string = tag.string
     if not new_string:
         # roll up strings if no immediate string
         new_string = tag.find_all(
             text=lambda t: not isinstance(t, Comment),
             recursive=True)
         new_string = ''.join(new_string)

     # don't have an id create it from text
     new_id = slugify(new_string, '-')
     tag['id'] = unique(new_id, ids)
     if permalinks:
         permalink(soup, tag)
         # inform if there is a duplicate permalink
         unique(tag['id'], perma_set)


 # generate table of contents from headings after [TOC] content
 def generate_toc(content, tags, title, toc_headers, debug):
     settoc = False
     tree = node = HtmlTreeNode(None, title, 'h0', '')
     # find the last [TOC]
     taglast = tags[0]
     for tag in tags:
         taglast = tag
     # find all headings after the final [TOC]
     heading_re = re.compile(toc_headers)
     for header in taglast.findAllNext(heading_re):
         # we have heading content for the ToC
         settoc = True
         # add the heading.
         node, _new_header = node.add(header)
     # convert the ToC to Beautiful Soup
     tree_soup = ''
     if settoc:
         if debug:
             print('  ToC')
         # convert the HtmlTreeNode into Beautiful Soup
         tree_string = '{}'.format(tree)
         tree_soup = BeautifulSoup(tree_string, 'html.parser')
         # Make the ToC available to the theme's template
         content.toc = tree_soup.decode(formatter='html')
     # replace the first [TOC] with the generated table of contents
     for tag in tags:
         tag.replaceWith(tree_soup)
         # replace additional [TOC] with nothing
         tree_soup = ''


 # create breadcrumb html
 def make_breadcrumbs(rel_source_path, title):
     parts = rel_source_path.split('/')
     url = '/'
     crumbs = []
     crumbs.append(f'<a href="/">Home</a>&nbsp;&raquo&nbsp;')
     # don't process the filename part
     last = len(parts)-1
     for i in range(last):
         url = f"{url}{parts[i]}/"
         p = parts[i].capitalize()
         crumbs.append(f'<a href="{url}">{p}</a>&nbsp;&raquo&nbsp;')
     crumbs.append(f'<a href="#">{title}</a>')
     return ''.join(crumbs)


 # add the asfdata metadata into GFM content.
 def add_data(content):
     """ Mix in ASF data as metadata """

     # if the reader is 'asf' then the asf metadata is already in place during asfreader plugin.
     if content.metadata.get('reader') != 'asf':
         asf_metadata = content.settings.get('ASF_DATA', { }).get('metadata')
         if asf_metadata:
             content.metadata.update(asf_metadata)


 # main worker transforming the html
 def generate_id(content):
     if isinstance(content, pelican.contents.Static):
         return

     # get plugin settings
     asf_genid = content.settings['ASF_GENID']
     # asf_headings setting may be overridden
     asf_headings = content.metadata.get('asf_headings', str(asf_genid['headings']))

     # show active plugins
     if asf_genid['debug']:
         print('asfgenid:\nshow plugins in case one is processing before this one')
         for name in content.settings['PLUGINS']:
             print(f'plugin: {name}')

     # track the id tags
     ids = set()
     # track permalinks
     permalinks = set()

     # step 1 - fixup html that cmark marks unsafe - move to later?
     if asf_genid['unsafe_tags']:
         fixup_content(content)

     # step 2 - prepare for genid processes
     # parse html content into BeautifulSoup4
     soup = BeautifulSoup(content._content, 'html.parser')
     # page title
     title = content.metadata.get('title', 'Title')
     # assure relative source path is in the metadata
     content.metadata['relative_source_path'] = rel_source_path = content.relative_source_path
     # create breadcrumb html
     content.metadata['breadcrumbs'] = breadcrumbs = make_breadcrumbs(rel_source_path, title)
     # display output path and title
     print(f'{content.relative_source_path} - {title}')
     # if debug display breadcrumb html
     if asf_genid['debug']:
         print(f'    {breadcrumbs}')
     # enhance metadata if done by asfreader
     add_data(content)

     # step 3 - metadata expansion
     if asf_genid['metadata']:
         if asf_genid['debug']:
             print(f'metadata expansion: {content.relative_source_path}')
         for tag in soup.findAll(string=METADATA_RE):
             expand_metadata(tag, content.metadata, asf_genid['debug'])

     # step 4 - find all id attributes already present
     for tag in soup.findAll(id=True):
         unique(tag['id'], ids)
         # don't change existing ids

     # step 5 - find all {#id} and {.class} text and assign attributes
     if asf_genid['elements']:
         if asf_genid['debug']:
             print(f'elementid: {content.relative_source_path}')
         for tag in soup.findAll(string=ELEMENTID_RE):
             elementid_transform(ids, soup, tag, asf_genid['permalinks'], permalinks, asf_genid['debug'])

     # step 6 - find all headings w/o ids already present or assigned with {#id} text
     if asf_headings == 'True':
         if asf_genid['debug']:
             print(f'headings: {content.relative_source_path}')
         # Find heading tags
         HEADING_RE = re.compile(asf_genid['headings_re'])
         for tag in soup.findAll(HEADING_RE, id=False):
             headingid_transform(ids, soup, tag, asf_genid['permalinks'], permalinks)

     # step 7 - find all tables without class
     if asf_genid['tables']:
         if asf_genid['debug']:
             print(f'tables: {content.relative_source_path}')
         for tag in soup.findAll(TABLE_RE, _class=False):
             tag['class'] = 'table'

     # step 8 - find TOC tag and generate Table of Contents
     if asf_genid['toc']:
         tags = soup('p', text='[TOC]')
         if tags:
             generate_toc(content, tags, title, asf_genid['toc_headers'], asf_genid['debug'])

     # step 9 - reset the html content
     content._content = soup.decode(formatter='html')

     # step 10 - output all of the permalinks created
     if asf_genid['debug']:
         for tag in permalinks:
             print(f'    #{tag}')


 def tb_connect(pel_ob):
     """Print any exception, before Pelican chews it into nothingness."""
     try:
         generate_id(pel_ob)
     except Exception:
         print('-----', file=sys.stderr)
         print('FATAL: %s' % (pel_ob.relative_source_path), file=sys.stderr)
         traceback.print_exc()
         # if we have errors in this module then we want to quit to avoid erasing the site
         sys.exit(4)


 def register():
     pelican.plugins.signals.initialized.connect(init_default_config)


 pelican.plugins.signals.content_object_init.connect(tb_connect)
	'''
	asfgenid
	===================================
	Generates HeadingIDs, ElementID, and PermaLinks
	First find all specified IDs and classes. Assure unique ID and permalink
	Next find all headings missing IDs. Assure unique ID and permalink
	Generates a Table of Content
	'''

	# from __future__ import unicode_literals

	import sys
	import traceback
	import re
	import unicodedata

	from bs4 import BeautifulSoup, Comment

	import pelican.contents
	import pelican.plugins.signals

	'''
	Based on
	https://github.com/waylan/Python-Markdown/blob/master/markdown/extensions/headerid.py
	Which is BSD licensed, but is very much rewritten.
	'''

	ASF_GENID = {
	'unsafe_tags': True, # fix script, style, and iframe html that gfm filters as unsafe
	'metadata': True, # {{ metadata }} inclusion of data in the html.
	'elements': True, # {#id} and {.class} annotations.
	'headings': True, # add slugified id to headings missing id. Can be overridden by page metadata.
	'headings_re': r'^h[1-6]', # regex for which headings to check.
	'permalinks': True, # add permalinks to elements and headings when id is added.
	'toc': True, # check for [TOC] and add Table of Content if present.
	'toc_headers': r'h[1-6]', # regex for which headings to include in the [TOC]
	'tables': True, # add class="table" for tables missing class.
	'debug': False
	}

	# Fixup tuples for HTML that GFM makes into text.
	# Fixup [ and ] that download templates use for ezt.
	FIXUP_UNSAFE = [
	(re.compile(r'<script'), '<script'),
	(re.compile(r'</script'), '</script'),
	(re.compile(r'<style'), '<style'),
	(re.compile(r'</style'), '</style'),
	(re.compile(r'<iframe'), '<iframe'),
	(re.compile(r'</iframe'), '</iframe'),
	(re.compile(r'%5B'), '['),
	(re.compile(r'%5D'), ']'),
	]

	# Find {{ metadata }} inclusions
	METADATA_RE = re.compile(r'{{\s(?P<meta>[-_:a-zA-Z0-9]+)\s}}')

	# Find {#id} or {.class} elementid annotations
	ELEMENTID_RE = re.compile(r'(?:[ \t][{\[][ \t](?P<type>[#.])(?P<id>[-._:a-zA-Z0-9 ]+)[}\]])(\n\|$)')

	# ID duplicates match
	IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')

	# For permalinks
	LINK_CHAR = '¶'

	# strip permalink chars from headings for ToC
	PARA_MAP = {
	ord(LINK_CHAR): None
	}

	# Find table tags - to check for ones without class attribute.
	TABLE_RE = re.compile(r'^table')


	# An item in a Table of Contents - from toc.py
	class HtmlTreeNode(object):
	def __init__(self, parent, header, level, tag_id):
	self.children = []
	self.parent = parent
	self.header = header
	self.level = level
	self.tag_id = tag_id

	def add(self, new_header):
	new_level = new_header.name
	new_string = new_header.string
	new_id = new_header.attrs.get('id')

	if not new_string:
	new_string = new_header.find_all(
	text=lambda t: not isinstance(t, Comment),
	recursive=True)
	new_string = ''.join(new_string)
	new_string = new_string.translate(PARA_MAP)

	if self.level < new_level:
	new_node = HtmlTreeNode(self, new_string, new_level, new_id)
	self.children += [new_node]
	return new_node, new_header
	elif self.level == new_level:
	new_node = HtmlTreeNode(self.parent, new_string, new_level, new_id)
	self.parent.children += [new_node]
	return new_node, new_header
	elif self.level > new_level:
	return self.parent.add(new_header)

	def __str__(self):
	ret = ''
	if self.parent:
	ret = "<a class='toc-href' href='#{0}' title='{1}'>{1}</a>".format(
	self.tag_id, self.header)

	if self.children:
	ret += "<ul>{}</ul>".format('{}' * len(self.children)).format(
	*self.children)

	if self.parent:
	ret = "<li>{}</li>".format(ret)

	if not self.parent:
	ret = "<div id='toc'>{}</div>".format(ret)

	return ret


	# assure configuration
	def init_default_config(pelican):
	from pelican.settings import DEFAULT_CONFIG

	DEFAULT_CONFIG.setdefault('ASF_GENID', ASF_GENID)
	if(pelican):
	pelican.settings.setdefault('ASF_GENID', ASF_GENID)


	# from Apache CMS markdown/extensions/headerid.py - slugify in the same way as the Apache CMS
	def slugify(value, separator):
	""" Slugify a string, to make it URL friendly. """
	value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
	value = re.sub('[^\\w\\s-]', '', value.decode('ascii')).strip().lower()
	return re.sub('[%s\\s]+' % separator, separator, value)


	# Ensure an id is unique in a set of ids. Append '_1', '_2'... if not
	def unique(tag_id, ids):
	while tag_id in ids or not tag_id:
	m = IDCOUNT_RE.match(tag_id)
	print(f'WARNING: id="{tag_id}" is a duplicate')
	if m:
	tag_id = '%s_%d' % (m.group(1), int(m.group(2)) + 1)
	else:
	tag_id = '%s_%d' % (tag_id, 1)
	ids.add(tag_id)
	return tag_id


	# append a permalink
	def permalink(soup, mod_element):
	new_tag = soup.new_tag('a', href='#' + mod_element['id'])
	new_tag['class'] = 'headerlink'
	new_tag['title'] = 'Permalink'
	new_tag.string = LINK_CHAR
	mod_element.append(new_tag)


	# fixup cmark content - note that this may be too hungry. It may need to occur later and skipped in codeblock and pre tags.
	def fixup_content(content):
	text = content._content
	modified = False
	# Find messed up html
	for regex, replace in FIXUP_UNSAFE:
	m = regex.search(text)
	if m:
	modified = True
	text = re.sub(regex, replace, text)
	if modified:
	content._content = text


	# expand metadata found in {{ key }}
	def expand_metadata(tag, metadata, debug):
	this_string = str(tag.string)
	m = 1
	modified = False
	while m:
	m = METADATA_RE.search(this_string)
	if m:
	this_data = m.group(1).strip()
	format_string = '{{{0}}}'.format(this_data)
	try:
	new_string = format_string.format(**metadata)
	if debug:
	print(f'{{{{{m.group(1)}}}}} -> {new_string}')
	except Exception:
	# the data expression was not found
	print(f'{{{{{m.group(1)}}}}} is not found')
	new_string = format_string
	# replace the first pattern with the new_string
	this_string = re.sub(METADATA_RE, new_string, this_string, count=1)
	modified = True
	if modified:
	tag.string.replace_with(this_string)


	# do elementid transformation for {#id} and {.class} attribute annotations.
	def elementid_transform(ids, soup, tag, permalinks, perma_set, debug):
	tagnav = tag.parent
	this_string = str(tag.string)
	if debug:
	print(f'name = {tagnav.name}, string = {this_string}')
	if tagnav.name not in ['[document]', 'code', 'pre']:
	m = ELEMENTID_RE.search(tag.string)
	if m:
	# this replacement could be better it truncates and likely drops additional annotations
	tag.string.replace_with(this_string[:m.start()])
	if m.group('type') == '#':
	# id attribute annotation
	tagnav['id'] = unique(m.group('id'), ids)
	if permalinks:
	permalink(soup, tagnav)
	unique(tagnav['id'], perma_set)
	if debug:
	print(f'# insertion {tagnav}')
	else:
	# class attribute annotation (regex only recognizes the two types)
	tagnav['class'] = m.group('id')
	if debug:
	print(f'Class {tag.name} : {tagnav["class"]}')


	# generate id for a heading
	def headingid_transform(ids, soup, tag, permalinks, perma_set):
	new_string = tag.string
	if not new_string:
	# roll up strings if no immediate string
	new_string = tag.find_all(
	text=lambda t: not isinstance(t, Comment),
	recursive=True)
	new_string = ''.join(new_string)

	# don't have an id create it from text
	new_id = slugify(new_string, '-')
	tag['id'] = unique(new_id, ids)
	if permalinks:
	permalink(soup, tag)
	# inform if there is a duplicate permalink
	unique(tag['id'], perma_set)


	# generate table of contents from headings after [TOC] content
	def generate_toc(content, tags, title, toc_headers, debug):
	settoc = False
	tree = node = HtmlTreeNode(None, title, 'h0', '')
	# find the last [TOC]
	taglast = tags[0]
	for tag in tags:
	taglast = tag
	# find all headings after the final [TOC]
	heading_re = re.compile(toc_headers)
	for header in taglast.findAllNext(heading_re):
	# we have heading content for the ToC
	settoc = True
	# add the heading.
	node, _new_header = node.add(header)
	# convert the ToC to Beautiful Soup
	tree_soup = ''
	if settoc:
	if debug:
	print(' ToC')
	# convert the HtmlTreeNode into Beautiful Soup
	tree_string = '{}'.format(tree)
	tree_soup = BeautifulSoup(tree_string, 'html.parser')
	# Make the ToC available to the theme's template
	content.toc = tree_soup.decode(formatter='html')
	# replace the first [TOC] with the generated table of contents
	for tag in tags:
	tag.replaceWith(tree_soup)
	# replace additional [TOC] with nothing
	tree_soup = ''


	# create breadcrumb html
	def make_breadcrumbs(rel_source_path, title):
	parts = rel_source_path.split('/')
	url = '/'
	crumbs = []
	crumbs.append(f'<a href="/">Home</a> &raquo ')
	# don't process the filename part
	last = len(parts)-1
	for i in range(last):
	url = f"{url}{parts[i]}/"
	p = parts[i].capitalize()
	crumbs.append(f'<a href="{url}">{p}</a> &raquo ')
	crumbs.append(f'<a href="#">{title}</a>')
	return ''.join(crumbs)


	# add the asfdata metadata into GFM content.
	def add_data(content):
	""" Mix in ASF data as metadata """

	# if the reader is 'asf' then the asf metadata is already in place during asfreader plugin.
	if content.metadata.get('reader') != 'asf':
	asf_metadata = content.settings.get('ASF_DATA', { }).get('metadata')
	if asf_metadata:
	content.metadata.update(asf_metadata)


	# main worker transforming the html
	def generate_id(content):
	if isinstance(content, pelican.contents.Static):
	return

	# get plugin settings
	asf_genid = content.settings['ASF_GENID']
	# asf_headings setting may be overridden
	asf_headings = content.metadata.get('asf_headings', str(asf_genid['headings']))

	# show active plugins
	if asf_genid['debug']:
	print('asfgenid:\nshow plugins in case one is processing before this one')
	for name in content.settings['PLUGINS']:
	print(f'plugin: {name}')

	# track the id tags
	ids = set()
	# track permalinks
	permalinks = set()

	# step 1 - fixup html that cmark marks unsafe - move to later?
	if asf_genid['unsafe_tags']:
	fixup_content(content)

	# step 2 - prepare for genid processes
	# parse html content into BeautifulSoup4
	soup = BeautifulSoup(content._content, 'html.parser')
	# page title
	title = content.metadata.get('title', 'Title')
	# assure relative source path is in the metadata
	content.metadata['relative_source_path'] = rel_source_path = content.relative_source_path
	# create breadcrumb html
	content.metadata['breadcrumbs'] = breadcrumbs = make_breadcrumbs(rel_source_path, title)
	# display output path and title
	print(f'{content.relative_source_path} - {title}')
	# if debug display breadcrumb html
	if asf_genid['debug']:
	print(f' {breadcrumbs}')
	# enhance metadata if done by asfreader
	add_data(content)

	# step 3 - metadata expansion
	if asf_genid['metadata']:
	if asf_genid['debug']:
	print(f'metadata expansion: {content.relative_source_path}')
	for tag in soup.findAll(string=METADATA_RE):
	expand_metadata(tag, content.metadata, asf_genid['debug'])

	# step 4 - find all id attributes already present
	for tag in soup.findAll(id=True):
	unique(tag['id'], ids)
	# don't change existing ids

	# step 5 - find all {#id} and {.class} text and assign attributes
	if asf_genid['elements']:
	if asf_genid['debug']:
	print(f'elementid: {content.relative_source_path}')
	for tag in soup.findAll(string=ELEMENTID_RE):
	elementid_transform(ids, soup, tag, asf_genid['permalinks'], permalinks, asf_genid['debug'])

	# step 6 - find all headings w/o ids already present or assigned with {#id} text
	if asf_headings == 'True':
	if asf_genid['debug']:
	print(f'headings: {content.relative_source_path}')
	# Find heading tags
	HEADING_RE = re.compile(asf_genid['headings_re'])
	for tag in soup.findAll(HEADING_RE, id=False):
	headingid_transform(ids, soup, tag, asf_genid['permalinks'], permalinks)

	# step 7 - find all tables without class
	if asf_genid['tables']:
	if asf_genid['debug']:
	print(f'tables: {content.relative_source_path}')
	for tag in soup.findAll(TABLE_RE, _class=False):
	tag['class'] = 'table'

	# step 8 - find TOC tag and generate Table of Contents
	if asf_genid['toc']:
	tags = soup('p', text='[TOC]')
	if tags:
	generate_toc(content, tags, title, asf_genid['toc_headers'], asf_genid['debug'])

	# step 9 - reset the html content
	content._content = soup.decode(formatter='html')

	# step 10 - output all of the permalinks created
	if asf_genid['debug']:
	for tag in permalinks:
	print(f' #{tag}')


	def tb_connect(pel_ob):
	"""Print any exception, before Pelican chews it into nothingness."""
	try:
	generate_id(pel_ob)
	except Exception:
	print('-----', file=sys.stderr)
	print('FATAL: %s' % (pel_ob.relative_source_path), file=sys.stderr)
	traceback.print_exc()
	# if we have errors in this module then we want to quit to avoid erasing the site
	sys.exit(4)


	def register():
	pelican.plugins.signals.initialized.connect(init_default_config)


	pelican.plugins.signals.content_object_init.connect(tb_connect)