gfm.py - infrastructure-pelican - Git at Google

 #!/usr/bin/python
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #
 # gfm_reader.py -- GitHub-Flavored Markdown reader for Pelican
 #

 import sys
 import os.path
 import ctypes
 import time
 import re
 import platform

 import pelican.utils
 import pelican.plugins.signals
 import pelican.readers

 _LIBDIR = os.environ['LIBCMARKDIR']
 if platform.system() == 'Darwin':
     _LIBEXT = '.dylib'
 else:
     _LIBEXT = '.so'
 _LIBCMARK = f'libcmark-gfm{_LIBEXT}'
 try:
     cmark = ctypes.CDLL(os.path.join(_LIBDIR, _LIBCMARK))
 except OSError:
     raise ImportError('%s not found. see build-cmark.sh' % (_LIBCMARK,))

 # Newer releases have different naming for this library. Try it first.
 try:
     cmark_ext = ctypes.CDLL(os.path.join(_LIBDIR, f'libcmark-gfm-extensions{_LIBEXT}'))
     ENSURE_REGISTERED = 'cmark_gfm_core_extensions_ensure_registered'
 except OSError:
     # Try the older name for the library.
     try:
         cmark_ext = ctypes.CDLL(os.path.join(_LIBDIR, f'libcmark-gfmextensions{_LIBEXT}'))
         ENSURE_REGISTERED = 'core_extensions_ensure_registered'
     except OSError:
         #print('LIBDIR:', _LIBDIR)
         raise ImportError('GFM Extensions not found. See build-cmark.sh')
 #print(f'USING: {ENSURE_REGISTERED}')


 # Use ctypes to access the functions in libcmark-gfm
 F_cmark_parser_new = cmark.cmark_parser_new
 F_cmark_parser_new.restype = ctypes.c_void_p
 F_cmark_parser_new.argtypes = (ctypes.c_int,)

 F_cmark_parser_feed = cmark.cmark_parser_feed
 F_cmark_parser_feed.restype = None
 F_cmark_parser_feed.argtypes = (ctypes.c_void_p, ctypes.c_char_p, ctypes.c_size_t)

 F_cmark_parser_finish = cmark.cmark_parser_finish
 F_cmark_parser_finish.restype = ctypes.c_void_p
 F_cmark_parser_finish.argtypes = (ctypes.c_void_p,)

 F_cmark_parser_attach_syntax_extension = cmark.cmark_parser_attach_syntax_extension
 F_cmark_parser_attach_syntax_extension.restype = ctypes.c_int
 F_cmark_parser_attach_syntax_extension.argtypes = (ctypes.c_void_p, ctypes.c_void_p)

 F_cmark_parser_get_syntax_extensions = cmark.cmark_parser_get_syntax_extensions
 F_cmark_parser_get_syntax_extensions.restype = ctypes.c_void_p
 F_cmark_parser_get_syntax_extensions.argtypes = (ctypes.c_void_p,)

 F_cmark_parser_free = cmark.cmark_parser_free
 F_cmark_parser_free.restype = None
 F_cmark_parser_free.argtypes = (ctypes.c_void_p,)

 F_cmark_node_free = cmark.cmark_node_free
 F_cmark_node_free.restype = None
 F_cmark_node_free.argtypes = (ctypes.c_void_p,)

 F_cmark_find_syntax_extension = cmark.cmark_find_syntax_extension
 F_cmark_find_syntax_extension.restype = ctypes.c_void_p
 F_cmark_find_syntax_extension.argtypes = (ctypes.c_char_p,)

 F_cmark_render_html = cmark.cmark_render_html
 F_cmark_render_html.restype = ctypes.c_char_p
 F_cmark_render_html.argtypes = (ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p)


 # Set up the libcmark-gfm library and its extensions
 F_register = getattr(cmark_ext, ENSURE_REGISTERED)
 F_register.restype = None
 F_register.argtypes = ( )
 F_register()

 ### technically, maybe install an atexit() to release the plugins

 # Options for the GFM rendering call
 ### this could be moved into SETTINGS or somesuch, but meh. not needed now.
 OPTS = 0

 # The GFM extensions that we want to use
 EXTENSIONS = (
     'autolink',
     'table',
     'strikethrough',
     'tagfilter',
 )


 class GFMReader(pelican.readers.BaseReader):
     enabled = True
     """GFM-flavored Reader for the Pelican system.

     Pelican looks for all subclasses of BaseReader, and automatically
     registers them for the file extensions listed below. Thus, nothing
     further is required by users of this Reader.
     """

     # NOTE: the builtin MarkdownReader must be disabled. Otherwise, it will be
     #       non-deterministic which Reader will be used for these files.
     file_extensions = ['md', 'markdown', 'mkd', 'mdown']

     # Metadata is specified as a single, colon-separated line, such as:
     #
     # Title: this is the title
     #
     # Note: name starts in column 0, no whitespace before colon, will be
     #       made lower-case, and value will be stripped
     #
     RE_METADATA = re.compile('^([A-za-z]+): (.*)$')

     def read_source(self, source_path):
         "Read metadata and content from the source."

         # Prepare the "slug", which is the target file name. It will be the
         # same as the source file, minus the leading ".../content/(articles|pages)"
         # and with the extension removed (Pelican will add .html)
         relpath = os.path.relpath(source_path, self.settings['PATH'])
         parts = relpath.split(os.sep)
         parts[-1] = os.path.splitext(parts[-1])[0]  # split off ext, keep base
         slug = os.sep.join(parts[1:])

         metadata = {
             'slug': slug,
         }
         # Fetch the source content, with a few appropriate tweaks
         with pelican.utils.pelican_open(source_path) as text:

             # Extract the metadata from the header of the text
             lines = text.splitlines()
             for i in range(len(lines)):
                 line = lines[i]
                 match = GFMReader.RE_METADATA.match(line)
                 if match:
                     name = match.group(1).strip().lower()
                     if name != 'slug':
                         value = match.group(2).strip()
                         if name == 'date':
                             value = pelican.utils.get_date(value)
                     metadata[name] = value
                     #if name != 'title':
                     #  print 'META:', name, value
                 elif not line.strip():
                     # blank line
                     continue
                 else:
                     # reached actual content
                     break

             # Redo the slug for articles.
             # depending on pelicanconf.py this will change the output filename
             if parts[0] == 'articles' and 'title' in metadata:
                 metadata['slug'] = pelican.utils.slugify(
                     metadata['title'],
                     self.settings.get('SLUG_SUBSTITUTIONS', ()))

             # Reassemble content, minus the metadata
             text = '\n'.join(lines[i:])

             return text, metadata

     def read(self, source_path):
         "Read metadata and content then render into HTML."

         # read metadata and markdown content
         text, metadata = self.read_source(source_path)
         assert text
         assert metadata
         # Render the markdown into HTML
         if sys.version_info >= (3, 0):
             text = text.encode('utf-8')
             content = self.render(text).decode('utf-8')
         else:
             content = self.render(text)
         assert content

         return content, metadata

     def render(self, text):
       "Use cmark-gfm to render the Markdown into an HTML fragment."

       parser = F_cmark_parser_new(OPTS)
       assert parser
       for name in EXTENSIONS:
         ext = F_cmark_find_syntax_extension(name.encode('utf-8'))
         assert ext
         rv = F_cmark_parser_attach_syntax_extension(parser, ext)
         assert rv
       exts = F_cmark_parser_get_syntax_extensions(parser)
       F_cmark_parser_feed(parser, text, len(text))
       doc = F_cmark_parser_finish(parser)
       assert doc

       output = F_cmark_render_html(doc, OPTS, exts)

       F_cmark_parser_free(parser)
       F_cmark_node_free(doc)

       return output


 def add_readers(readers):
     readers.reader_classes['md'] = GFMReader


 def register():
     pelican.plugins.signals.readers_init.connect(add_readers)
	#!/usr/bin/python
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#
	# gfm_reader.py -- GitHub-Flavored Markdown reader for Pelican
	#

	import sys
	import os.path
	import ctypes
	import time
	import re
	import platform

	import pelican.utils
	import pelican.plugins.signals
	import pelican.readers

	_LIBDIR = os.environ['LIBCMARKDIR']
	if platform.system() == 'Darwin':
	_LIBEXT = '.dylib'
	else:
	_LIBEXT = '.so'
	_LIBCMARK = f'libcmark-gfm{_LIBEXT}'
	try:
	cmark = ctypes.CDLL(os.path.join(_LIBDIR, _LIBCMARK))
	except OSError:
	raise ImportError('%s not found. see build-cmark.sh' % (_LIBCMARK,))

	# Newer releases have different naming for this library. Try it first.
	try:
	cmark_ext = ctypes.CDLL(os.path.join(_LIBDIR, f'libcmark-gfm-extensions{_LIBEXT}'))
	ENSURE_REGISTERED = 'cmark_gfm_core_extensions_ensure_registered'
	except OSError:
	# Try the older name for the library.
	try:
	cmark_ext = ctypes.CDLL(os.path.join(_LIBDIR, f'libcmark-gfmextensions{_LIBEXT}'))
	ENSURE_REGISTERED = 'core_extensions_ensure_registered'
	except OSError:
	#print('LIBDIR:', _LIBDIR)
	raise ImportError('GFM Extensions not found. See build-cmark.sh')
	#print(f'USING: {ENSURE_REGISTERED}')


	# Use ctypes to access the functions in libcmark-gfm
	F_cmark_parser_new = cmark.cmark_parser_new
	F_cmark_parser_new.restype = ctypes.c_void_p
	F_cmark_parser_new.argtypes = (ctypes.c_int,)

	F_cmark_parser_feed = cmark.cmark_parser_feed
	F_cmark_parser_feed.restype = None
	F_cmark_parser_feed.argtypes = (ctypes.c_void_p, ctypes.c_char_p, ctypes.c_size_t)

	F_cmark_parser_finish = cmark.cmark_parser_finish
	F_cmark_parser_finish.restype = ctypes.c_void_p
	F_cmark_parser_finish.argtypes = (ctypes.c_void_p,)

	F_cmark_parser_attach_syntax_extension = cmark.cmark_parser_attach_syntax_extension
	F_cmark_parser_attach_syntax_extension.restype = ctypes.c_int
	F_cmark_parser_attach_syntax_extension.argtypes = (ctypes.c_void_p, ctypes.c_void_p)

	F_cmark_parser_get_syntax_extensions = cmark.cmark_parser_get_syntax_extensions
	F_cmark_parser_get_syntax_extensions.restype = ctypes.c_void_p
	F_cmark_parser_get_syntax_extensions.argtypes = (ctypes.c_void_p,)

	F_cmark_parser_free = cmark.cmark_parser_free
	F_cmark_parser_free.restype = None
	F_cmark_parser_free.argtypes = (ctypes.c_void_p,)

	F_cmark_node_free = cmark.cmark_node_free
	F_cmark_node_free.restype = None
	F_cmark_node_free.argtypes = (ctypes.c_void_p,)

	F_cmark_find_syntax_extension = cmark.cmark_find_syntax_extension
	F_cmark_find_syntax_extension.restype = ctypes.c_void_p
	F_cmark_find_syntax_extension.argtypes = (ctypes.c_char_p,)

	F_cmark_render_html = cmark.cmark_render_html
	F_cmark_render_html.restype = ctypes.c_char_p
	F_cmark_render_html.argtypes = (ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p)


	# Set up the libcmark-gfm library and its extensions
	F_register = getattr(cmark_ext, ENSURE_REGISTERED)
	F_register.restype = None
	F_register.argtypes = ( )
	F_register()

	### technically, maybe install an atexit() to release the plugins

	# Options for the GFM rendering call
	### this could be moved into SETTINGS or somesuch, but meh. not needed now.
	OPTS = 0

	# The GFM extensions that we want to use
	EXTENSIONS = (
	'autolink',
	'table',
	'strikethrough',
	'tagfilter',
	)


	class GFMReader(pelican.readers.BaseReader):
	enabled = True
	"""GFM-flavored Reader for the Pelican system.

	Pelican looks for all subclasses of BaseReader, and automatically
	registers them for the file extensions listed below. Thus, nothing
	further is required by users of this Reader.
	"""

	# NOTE: the builtin MarkdownReader must be disabled. Otherwise, it will be
	# non-deterministic which Reader will be used for these files.
	file_extensions = ['md', 'markdown', 'mkd', 'mdown']

	# Metadata is specified as a single, colon-separated line, such as:
	#
	# Title: this is the title
	#
	# Note: name starts in column 0, no whitespace before colon, will be
	# made lower-case, and value will be stripped
	#
	RE_METADATA = re.compile('^([A-za-z]+): (.*)$')

	def read_source(self, source_path):
	"Read metadata and content from the source."

	# Prepare the "slug", which is the target file name. It will be the
	# same as the source file, minus the leading ".../content/(articles\|pages)"
	# and with the extension removed (Pelican will add .html)
	relpath = os.path.relpath(source_path, self.settings['PATH'])
	parts = relpath.split(os.sep)
	parts[-1] = os.path.splitext(parts[-1])[0] # split off ext, keep base
	slug = os.sep.join(parts[1:])

	metadata = {
	'slug': slug,
	}
	# Fetch the source content, with a few appropriate tweaks
	with pelican.utils.pelican_open(source_path) as text:

	# Extract the metadata from the header of the text
	lines = text.splitlines()
	for i in range(len(lines)):
	line = lines[i]
	match = GFMReader.RE_METADATA.match(line)
	if match:
	name = match.group(1).strip().lower()
	if name != 'slug':
	value = match.group(2).strip()
	if name == 'date':
	value = pelican.utils.get_date(value)
	metadata[name] = value
	#if name != 'title':
	# print 'META:', name, value
	elif not line.strip():
	# blank line
	continue
	else:
	# reached actual content
	break

	# Redo the slug for articles.
	# depending on pelicanconf.py this will change the output filename
	if parts[0] == 'articles' and 'title' in metadata:
	metadata['slug'] = pelican.utils.slugify(
	metadata['title'],
	self.settings.get('SLUG_SUBSTITUTIONS', ()))

	# Reassemble content, minus the metadata
	text = '\n'.join(lines[i:])

	return text, metadata

	def read(self, source_path):
	"Read metadata and content then render into HTML."

	# read metadata and markdown content
	text, metadata = self.read_source(source_path)
	assert text
	assert metadata
	# Render the markdown into HTML
	if sys.version_info >= (3, 0):
	text = text.encode('utf-8')
	content = self.render(text).decode('utf-8')
	else:
	content = self.render(text)
	assert content

	return content, metadata

	def render(self, text):
	"Use cmark-gfm to render the Markdown into an HTML fragment."

	parser = F_cmark_parser_new(OPTS)
	assert parser
	for name in EXTENSIONS:
	ext = F_cmark_find_syntax_extension(name.encode('utf-8'))
	assert ext
	rv = F_cmark_parser_attach_syntax_extension(parser, ext)
	assert rv
	exts = F_cmark_parser_get_syntax_extensions(parser)
	F_cmark_parser_feed(parser, text, len(text))
	doc = F_cmark_parser_finish(parser)
	assert doc

	output = F_cmark_render_html(doc, OPTS, exts)

	F_cmark_parser_free(parser)
	F_cmark_node_free(doc)

	return output


	def add_readers(readers):
	readers.reader_classes['md'] = GFMReader


	def register():
	pelican.plugins.signals.readers_init.connect(add_readers)