docs/tools/mdcode.py - mxnet-test - Git at Google

 #!/usr/bin/env python
 """Manipulate codes blocks in a markdown file"""
 import argparse
 import re
 import os
 import codecs
 import json

 # language names and the according file extensions
 _LANGS = {'python', 'r', 'scala', 'julia', 'perl', 'cpp'}

 # start or end of a code block
 _CODE_MARK = re.compile('^([ ]*)```([\w]*)')

 class CodeBlocks(object):
     def __init__(self, fname, lang):
         with codecs.open(fname, 'r', 'utf-8') as f:
             self.data = f.readlines()
         self.lang = lang.lower()
         self.cells = []

     def _parse_lines(self):
         in_code = False
         lang = None
         indent = None
         for l in self.data:
             m = _CODE_MARK.match(l)
             if m is not None:
                 if not in_code:
                     if m.groups()[1].lower() in _LANGS:
                         lang = m.groups()[1].lower()
                         indent = len(m.groups()[0])
                         in_code = True
                     yield (l, in_code, lang, indent)
                 else:
                     yield (l, in_code, lang, indent)
                     lang = None
                     indent = None
                     in_code = False
             else:
                 yield (l, in_code, lang, indent)

     def _add_jupyter_block(self, lines, is_code ):
         if is_code and len(lines) >= 2:
             lines = lines[1:-1] # remove ```
         while len(lines) > 0:
             if len(lines[0].rstrip()) == 0:
                 lines.pop(0)
             else:
                 break
         while len(lines) > 0:
             if len(lines[-1].rstrip()) == 0:
                 lines.pop()
             else:
                 break
         if len(lines) == 0:
             return
         lines[-1] = lines[-1].rstrip()
         cell = {
             "cell_type": "code" if is_code else "markdown",
             "metadata": {},
             "source":  lines
         }
         if is_code:
             cell.update({
                 "outputs": [],
                 "execution_count": None,
             })
         self.cells.append(cell)

     def write(self, action, ofname):
         if action == 'get':
             with open(ofname, 'w') as f:
                 for (l, in_code, lang, indent) in self._parse_lines():
                     if in_code and lang == self.lang and l[indent:indent+3] != '```':
                         f.write(l[indent:])
             return
         if action == 'keep':
             with open(ofname, 'w') as f:
                 for (l, in_code, lang, _) in self._parse_lines():
                     if not in_code or in_code and lang == self.lang:
                         f.write(l)
             return
         if action == 'convert':
             cur_block = []
             pre_in_code = None
             pre_lang = None
             for (l, in_code, lang, _) in self._parse_lines():
                 if in_code != pre_in_code or lang != pre_lang:
                     self._add_jupyter_block(cur_block, pre_in_code)
                     cur_block = []
                 if not in_code or (in_code and lang == self.lang):
                     cur_block.append(l)
                 (pre_in_code, pre_lang) = (in_code, lang)
             self._add_jupyter_block(cur_block, pre_in_code)

             ipynb = {"nbformat":4, "nbformat_minor":2,
                      "metadata":{"language":self.lang, "display_name":'', "name":''}, "cells":self.cells}
             with open(ofname, 'w') as f:
                 json.dump(ipynb, f)
             return
         if action == 'add_btn':
             langs = set([l for (_, _, l, _) in self._parse_lines() if l is not None])
             print langs
             active = True
             btngroup = """<div class="text-center">
 <div class="btn-group opt-group" role="group">
 """
             for l in langs:
                 btngroup += "<button type=\"button\" class=\"btn btn-default opt %s\">%s</button>\n" % (
                     'active' if active else '', l[0].upper()+l[1:].lower())
                 active = False
             btngroup += """</div>
 </div>
 <script type="text/javascript" src='../../_static/js/options.js'></script>
 """
             with open(ofname, 'w') as f:
                 for l in self.data:
                     if 'ENABLE LANGUAGE BAR' in l:
                         f.write(btngroup)
                     else:
                         f.write(l)

 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
         formatter_class=argparse.RawDescriptionHelpFormatter,
         description="""Manipulate code blocks in markdown files.
 Sample usage:
 - extract all python code blocks in example
   ./mdcode.py get python example.md example.py
 - remove all codes blocks except for python
   ./mdcode.py keep python example.md example_py.md
 - remove all codes blocks except for python and then convert into jupyter notebook
   ./mdcode.py convert python example.md example.ipynb
 - add the language selection botton group into example.md
   ./mdcode.py add_btn all example.md example.md
     """)
     parser.add_argument('action', help='action',
                         choices=['get', 'keep', 'convert', 'add_btn'])
     parser.add_argument('lang', help='code language')
     parser.add_argument('input', help='input markdown filename')
     parser.add_argument('output', help='output file')
     args = parser.parse_args()

     code_blocks = CodeBlocks(args.input, args.lang)
     code_blocks.write(args.action, args.output)
	#!/usr/bin/env python
	"""Manipulate codes blocks in a markdown file"""
	import argparse
	import re
	import os
	import codecs
	import json

	# language names and the according file extensions
	_LANGS = {'python', 'r', 'scala', 'julia', 'perl', 'cpp'}

	# start or end of a code block
	_CODE_MARK = re.compile('^([ ])```([\w])')

	class CodeBlocks(object):
	def __init__(self, fname, lang):
	with codecs.open(fname, 'r', 'utf-8') as f:
	self.data = f.readlines()
	self.lang = lang.lower()
	self.cells = []

	def _parse_lines(self):
	in_code = False
	lang = None
	indent = None
	for l in self.data:
	m = _CODE_MARK.match(l)
	if m is not None:
	if not in_code:
	if m.groups()[1].lower() in _LANGS:
	lang = m.groups()[1].lower()
	indent = len(m.groups()[0])
	in_code = True
	yield (l, in_code, lang, indent)
	else:
	yield (l, in_code, lang, indent)
	lang = None
	indent = None
	in_code = False
	else:
	yield (l, in_code, lang, indent)

	def _add_jupyter_block(self, lines, is_code ):
	if is_code and len(lines) >= 2:
	lines = lines[1:-1] # remove ```
	while len(lines) > 0:
	if len(lines[0].rstrip()) == 0:
	lines.pop(0)
	else:
	break
	while len(lines) > 0:
	if len(lines[-1].rstrip()) == 0:
	lines.pop()
	else:
	break
	if len(lines) == 0:
	return
	lines[-1] = lines[-1].rstrip()
	cell = {
	"cell_type": "code" if is_code else "markdown",
	"metadata": {},
	"source": lines
	}
	if is_code:
	cell.update({
	"outputs": [],
	"execution_count": None,
	})
	self.cells.append(cell)

	def write(self, action, ofname):
	if action == 'get':
	with open(ofname, 'w') as f:
	for (l, in_code, lang, indent) in self._parse_lines():
	if in_code and lang == self.lang and l[indent:indent+3] != '```':
	f.write(l[indent:])
	return
	if action == 'keep':
	with open(ofname, 'w') as f:
	for (l, in_code, lang, _) in self._parse_lines():
	if not in_code or in_code and lang == self.lang:
	f.write(l)
	return
	if action == 'convert':
	cur_block = []
	pre_in_code = None
	pre_lang = None
	for (l, in_code, lang, _) in self._parse_lines():
	if in_code != pre_in_code or lang != pre_lang:
	self._add_jupyter_block(cur_block, pre_in_code)
	cur_block = []
	if not in_code or (in_code and lang == self.lang):
	cur_block.append(l)
	(pre_in_code, pre_lang) = (in_code, lang)
	self._add_jupyter_block(cur_block, pre_in_code)

	ipynb = {"nbformat":4, "nbformat_minor":2,
	"metadata":{"language":self.lang, "display_name":'', "name":''}, "cells":self.cells}
	with open(ofname, 'w') as f:
	json.dump(ipynb, f)
	return
	if action == 'add_btn':
	langs = set([l for (_, _, l, _) in self._parse_lines() if l is not None])
	print langs
	active = True
	btngroup = """<div class="text-center">
	<div class="btn-group opt-group" role="group">
	"""
	for l in langs:
	btngroup += "<button type=\"button\" class=\"btn btn-default opt %s\">%s</button>\n" % (
	'active' if active else '', l[0].upper()+l[1:].lower())
	active = False
	btngroup += """</div>
	</div>
	<script type="text/javascript" src='../../_static/js/options.js'></script>
	"""
	with open(ofname, 'w') as f:
	for l in self.data:
	if 'ENABLE LANGUAGE BAR' in l:
	f.write(btngroup)
	else:
	f.write(l)

	if __name__ == '__main__':
	parser = argparse.ArgumentParser(
	formatter_class=argparse.RawDescriptionHelpFormatter,
	description="""Manipulate code blocks in markdown files.
	Sample usage:
	- extract all python code blocks in example
	./mdcode.py get python example.md example.py
	- remove all codes blocks except for python
	./mdcode.py keep python example.md example_py.md
	- remove all codes blocks except for python and then convert into jupyter notebook
	./mdcode.py convert python example.md example.ipynb
	- add the language selection botton group into example.md
	./mdcode.py add_btn all example.md example.md
	""")
	parser.add_argument('action', help='action',
	choices=['get', 'keep', 'convert', 'add_btn'])
	parser.add_argument('lang', help='code language')
	parser.add_argument('input', help='input markdown filename')
	parser.add_argument('output', help='output file')
	args = parser.parse_args()

	code_blocks = CodeBlocks(args.input, args.lang)
	code_blocks.write(args.action, args.output)