| #!/usr/bin/env python3 |
| |
| """ |
| Convert board minutes to HTML with anchors and index |
| |
| Processes minutes to add the following: |
| - anchors for internal sections |
| - links to internal sections |
| - links to external http(s) URLs |
| - links to board_minute references |
| - index to sections (excluding committee report sections which are just references) |
| |
| N.B. The naming convention for internal anchors is: |
| section-xx or attachment-xx |
| These anchors are intended to be referenced externally, so the format must not be changed |
| """ |
| |
| import sys |
| import re |
| from html import escape |
| |
| MINUTES = 'https://www.apache.org/foundation/records/minutes/' |
| |
| def pod_anchor(podling): |
| """convert podling name to anchor""" |
| return podling.strip().lower().replace(' ', '') |
| |
| # <a class="selflink" id="section-10" href="#section-10">10</a> |
| def add_anchor(current_s, line, links, info): |
| """Add anchors""" |
| # main section |
| mat = re.match(r'^([ \d]\d)(\. .+)', line) |
| if mat: |
| sect = mat.group(1) |
| off = '' |
| if sect.startswith(' '): |
| off = '' |
| sid = sect.replace(' ','') |
| rest = mat.group(2) |
| sname = f"section-{sid}" |
| line = f'{off}<a class="selflink" id="{sname}" href="#{sname}">{sid}{rest}</a>\n' |
| links[sname] = rest.lstrip('. ') |
| # flag when in committee reports |
| if 'Committee Reports' in rest: |
| info['crsection'] = sid |
| else: |
| info.pop('crsection', None) |
| return sid, line # return the updated section number |
| |
| # subsections |
| mat = re.match(r'^( {3,4})([A-Z]+)(\. .+)', line) |
| if mat: |
| off = mat.group(1) |
| sect = mat.group(2) |
| sid = current_s + sect.lstrip(' ') |
| sname = f"section-{sid}" |
| rest = mat.group(3) |
| line = f'{off}<a class="selflink" id="{sname}" href="#{sname}">{sect}{rest}</a>\n' |
| links[sname] = rest.lstrip('. ') |
| return current_s, line |
| |
| # Attachments |
| mat = re.match(r'^Attachment (\w+)(: .+)', line) |
| if mat: |
| sect = mat.group(1) |
| sname = 'attachment-' + sect |
| rest = mat.group(2) |
| info['sname'] = rest |
| line = f'<a class="selflink" id="{sname}" href="#{sname}">Attachment {sect}{rest}</a>\n' |
| links[sname] = rest.lstrip(':') |
| return current_s, line |
| |
| # Links to attachments |
| mat = re.match(r'^ +(See Attachment (\w+))', line) |
| if mat: |
| ref = mat.group(1) |
| sect = mat.group(2) |
| line = line.replace(ref, f'<a href="#attachment-{sect}">{ref}</a>') |
| # drop link to CR section if there is an attachment |
| crsect = info.get('crsection') |
| if crsect: |
| links.pop(f'section-{crsect}{sect}') |
| return current_s, line |
| |
| # board minutes |
| mat = re.search(r' (board_minutes_(\d\d\d\d)_\d\d_\d\d.txt)', line) |
| if mat: |
| minutes = mat.group(1) |
| year = mat.group(2) |
| line = line.replace(minutes, f'<a href="{MINUTES}{year}/{minutes}">{minutes}</a>') |
| return current_s, line |
| |
| # external URLs TODO: tighten matching .. |
| mat = re.search(r'(https?://[^\s,)]+)', line) |
| if mat: |
| url = mat.group(1).rstrip(".") |
| line = line.replace(url, f'<a href="{url}">{url}</a>') |
| return current_s, line |
| |
| # Podling ToC? |
| # [Podling](#podling) |
| mat = re.match(r'\[[^]]+\]\((#[^)]+)\)', line) |
| if mat: |
| anchor = mat.group(1) |
| line = line.replace(anchor, f'<a href="{pod_anchor(anchor)}">{anchor}</a>') |
| return current_s, line |
| |
| # we are in a podling report |
| if info['podhdr'] and line.strip() != '': |
| info['podhdr'] = False |
| pod = line.lstrip('# ').strip() |
| anchor = pod_anchor(pod) |
| if not pod.startswith('---'): # --- indicates end of podlings |
| line = f'<a class="selflink" id="{anchor}" href="#{anchor}">{line.strip()}</a>\n' |
| links[anchor] = "-- " + pod |
| return current_s, line |
| |
| # Start of a podling section? |
| if line.strip() == '--------------------' and 'Incubator Project' in info['sname']: |
| info['podhdr'] = True |
| |
| # anything else |
| return current_s, line |
| |
| HDR="""<html> |
| <head> |
| <meta charset="UTF-8"> |
| <style> |
| .selflink {text-decoration: none} |
| </style> |
| </head> |
| <body> |
| <a href="#index">Index</a> |
| """ |
| |
| FTR="""</body> |
| </html> |
| """ |
| |
| def text2html(inp, out, extrahdr=''): |
| """html-ise text""" |
| links = {} |
| info = {} |
| # init entries |
| info['sname'] = '' |
| info['podhdr'] = False |
| out.write(HDR) |
| out.write(extrahdr) |
| out.write('<pre>') |
| cur_s = None |
| for line in inp: |
| line = escape(line, quote=False) # probably don't need to escape quotes |
| cur_s, line = add_anchor(cur_s, line, links, info) |
| out.write(line) |
| out.write('</pre>\n') |
| out.write('<h2 id="index">Index</h2>\n') |
| out.write('<ul>\n') |
| level = 1 |
| for link, text in links.items(): |
| if re.search(r'\d[A-Z]{1,2}$', link): # second level link |
| if level == 1: |
| out.write('<ul>\n') |
| level = 2 |
| else: |
| if level == 2: |
| out.write('</ul>\n') |
| level = 1 |
| out.write(f'<li><a href="#{link}">{text}</a></li>\n') |
| if level == 2: |
| out.write('</ul>\n') |
| level = 1 |
| out.write('</ul>\n') |
| out.write(FTR) |
| |
| def process_files(infile, outfile): |
| with open(infile, 'r', encoding='utf8') as inp: |
| with open(outfile, 'w', encoding='utf8') as out: |
| text2html(inp,out) |
| |
| def main(): |
| """Main""" |
| infile = sys.argv[1] |
| outfile = sys.argv[2] |
| process_files(infile, outfile) |
| |
| if __name__ == '__main__': |
| main() |