blob: f0876163ce3c132c0ed36b6f5be79fa603066845 [file] [log] [blame]
#!/usr/bin/env python
"""Manipulate codes blocks in a markdown file"""
import argparse
import re
import os
import codecs
import json
# language names and the according file extensions
_LANGS = {'python', 'r', 'scala', 'julia', 'perl', 'cpp'}
# start or end of a code block
_CODE_MARK = re.compile('^([ ]*)```([\w]*)')
class CodeBlocks(object):
def __init__(self, fname, lang):
with codecs.open(fname, 'r', 'utf-8') as f:
self.data = f.readlines()
self.lang = lang.lower()
self.cells = []
def _parse_lines(self):
in_code = False
lang = None
indent = None
for l in self.data:
m = _CODE_MARK.match(l)
if m is not None:
if not in_code:
if m.groups()[1].lower() in _LANGS:
lang = m.groups()[1].lower()
indent = len(m.groups()[0])
in_code = True
yield (l, in_code, lang, indent)
else:
yield (l, in_code, lang, indent)
lang = None
indent = None
in_code = False
else:
yield (l, in_code, lang, indent)
def _add_jupyter_block(self, lines, is_code ):
if is_code and len(lines) >= 2:
lines = lines[1:-1] # remove ```
while len(lines) > 0:
if len(lines[0].rstrip()) == 0:
lines.pop(0)
else:
break
while len(lines) > 0:
if len(lines[-1].rstrip()) == 0:
lines.pop()
else:
break
if len(lines) == 0:
return
lines[-1] = lines[-1].rstrip()
cell = {
"cell_type": "code" if is_code else "markdown",
"metadata": {},
"source": lines
}
if is_code:
cell.update({
"outputs": [],
"execution_count": None,
})
self.cells.append(cell)
def write(self, action, ofname):
if action == 'get':
with open(ofname, 'w') as f:
for (l, in_code, lang, indent) in self._parse_lines():
if in_code and lang == self.lang and l[indent:indent+3] != '```':
f.write(l[indent:])
return
if action == 'keep':
with open(ofname, 'w') as f:
for (l, in_code, lang, _) in self._parse_lines():
if not in_code or in_code and lang == self.lang:
f.write(l)
return
if action == 'convert':
cur_block = []
pre_in_code = None
pre_lang = None
for (l, in_code, lang, _) in self._parse_lines():
if in_code != pre_in_code or lang != pre_lang:
self._add_jupyter_block(cur_block, pre_in_code)
cur_block = []
if not in_code or (in_code and lang == self.lang):
cur_block.append(l)
(pre_in_code, pre_lang) = (in_code, lang)
self._add_jupyter_block(cur_block, pre_in_code)
ipynb = {"nbformat":4, "nbformat_minor":2,
"metadata":{"language":self.lang, "display_name":'', "name":''}, "cells":self.cells}
with open(ofname, 'w') as f:
json.dump(ipynb, f)
return
if action == 'add_btn':
langs = set([l for (_, _, l, _) in self._parse_lines() if l is not None])
print langs
active = True
btngroup = """<div class="text-center">
<div class="btn-group opt-group" role="group">
"""
for l in langs:
btngroup += "<button type=\"button\" class=\"btn btn-default opt %s\">%s</button>\n" % (
'active' if active else '', l[0].upper()+l[1:].lower())
active = False
btngroup += """</div>
</div>
<script type="text/javascript" src='../../_static/js/options.js'></script>
"""
with open(ofname, 'w') as f:
for l in self.data:
if 'ENABLE LANGUAGE BAR' in l:
f.write(btngroup)
else:
f.write(l)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""Manipulate code blocks in markdown files.
Sample usage:
- extract all python code blocks in example
./mdcode.py get python example.md example.py
- remove all codes blocks except for python
./mdcode.py keep python example.md example_py.md
- remove all codes blocks except for python and then convert into jupyter notebook
./mdcode.py convert python example.md example.ipynb
- add the language selection botton group into example.md
./mdcode.py add_btn all example.md example.md
""")
parser.add_argument('action', help='action',
choices=['get', 'keep', 'convert', 'add_btn'])
parser.add_argument('lang', help='code language')
parser.add_argument('input', help='input markdown filename')
parser.add_argument('output', help='output file')
args = parser.parse_args()
code_blocks = CodeBlocks(args.input, args.lang)
code_blocks.write(args.action, args.output)